upstream/mercurial-mirror Commit - r48780:681851d6

1

//! The "version 2" disk representation of the dirstate

1

//! The "version 2" disk representation of the dirstate

2

//!

2

//!

3

//! # File format

3

//! # File format

4

//!

4

//!

5

//! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts

5

//! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts

6

//! with a fixed-sized header whose layout is defined by the `DocketHeader`

6

//! with a fixed-sized header whose layout is defined by the `DocketHeader`

7

//! struct, followed by the data file identifier.

7

//! struct, followed by the data file identifier.

8

//!

8

//!

9

//! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That

9

//! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That

10

//! file may be longer than the size given in the docket, but not shorter. Only

10

//! file may be longer than the size given in the docket, but not shorter. Only

11

//! the start of the data file up to the given size is considered. The

11

//! the start of the data file up to the given size is considered. The

12

//! fixed-size "root" of the dirstate tree whose layout is defined by the

12

//! fixed-size "root" of the dirstate tree whose layout is defined by the

13

//! `Root` struct is found at the end of that slice of data.

13

//! `Root` struct is found at the end of that slice of data.

14

//!

14

//!

15

//! Its `root_nodes` field contains the slice (offset and length) to

15

//! Its `root_nodes` field contains the slice (offset and length) to

16

//! the nodes representing the files and directories at the root of the

16

//! the nodes representing the files and directories at the root of the

17

//! repository. Each node is also fixed-size, defined by the `Node` struct.

17

//! repository. Each node is also fixed-size, defined by the `Node` struct.

18

//! Nodes in turn contain slices to variable-size paths, and to their own child

18

//! Nodes in turn contain slices to variable-size paths, and to their own child

19

//! nodes (if any) for nested files and directories.

19

//! nodes (if any) for nested files and directories.

20

21

use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};

21

use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};

22

use crate::dirstate_tree::path_with_basename::WithBasename;

22

use crate::dirstate_tree::path_with_basename::WithBasename;

23

use crate::errors::HgError;

23

use crate::errors::HgError;

24

use crate::utils::hg_path::HgPath;

24

use crate::utils::hg_path::HgPath;

25

use crate::DirstateEntry;

25

use crate::DirstateEntry;

26

use crate::DirstateError;

26

use crate::DirstateError;

27

use crate::DirstateParents;

27

use crate::DirstateParents;

28

use crate::EntryState;

28

use crate::EntryState;

29

use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be};

29

use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be};

30

use bytes_cast::BytesCast;

30

use bytes_cast::BytesCast;

31

use format_bytes::format_bytes;

31

use format_bytes::format_bytes;

32

use std::borrow::Cow;

32

use std::borrow::Cow;

33

use std::convert::{TryFrom, TryInto};

33

use std::convert::{TryFrom, TryInto};

34

use std::time::{Duration, SystemTime, UNIX_EPOCH};

34

use std::time::{Duration, SystemTime, UNIX_EPOCH};

35

36

/// Added at the start of `.hg/dirstate` when the "v2" format is used.

36

/// Added at the start of `.hg/dirstate` when the "v2" format is used.

37

/// This a redundant sanity check more than an actual "magic number" since

37

/// This a redundant sanity check more than an actual "magic number" since

38

/// `.hg/requires` already governs which format should be used.

38

/// `.hg/requires` already governs which format should be used.

39

pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";

39

pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";

40

41

/// Keep space for 256-bit hashes

41

/// Keep space for 256-bit hashes

42

const STORED_NODE_ID_BYTES: usize = 32;

42

const STORED_NODE_ID_BYTES: usize = 32;

43

44

/// … even though only 160 bits are used for now, with SHA-1

44

/// … even though only 160 bits are used for now, with SHA-1

45

const USED_NODE_ID_BYTES: usize = 20;

45

const USED_NODE_ID_BYTES: usize = 20;

46

47

pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;

47

pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;

48

pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];

48

pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];

49

50

/// Must match the constant of the same name in

50

/// Must match the constant of the same name in

51

/// `mercurial/dirstateutils/docket.py`

51

/// `mercurial/dirstateutils/docket.py`

52

const TREE_METADATA_SIZE: usize = 44;

52

const TREE_METADATA_SIZE: usize = 44;

53

54

/// Make sure that size-affecting changes are made knowingly

54

/// Make sure that size-affecting changes are made knowingly

55

#[allow(unused)]

55

#[allow(unused)]

56

fn static_assert_size_of() {

56

fn static_assert_size_of() {

57

let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;

57

let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;

58

let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;

58

let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;

59

let _ = std::mem::transmute::<Node, [u8; 43]>;

59

let _ = std::mem::transmute::<Node, [u8; 43]>;

60

}

60

}

61

62

// Must match `HEADER` in `mercurial/dirstateutils/docket.py`

62

// Must match `HEADER` in `mercurial/dirstateutils/docket.py`

63

#[derive(BytesCast)]

63

#[derive(BytesCast)]

64

#[repr(C)]

64

#[repr(C)]

65

struct DocketHeader {

65

struct DocketHeader {

66

marker: [u8; V2_FORMAT_MARKER.len()],

66

marker: [u8; V2_FORMAT_MARKER.len()],

67

parent_1: [u8; STORED_NODE_ID_BYTES],

67

parent_1: [u8; STORED_NODE_ID_BYTES],

68

parent_2: [u8; STORED_NODE_ID_BYTES],

68

parent_2: [u8; STORED_NODE_ID_BYTES],

69

70

/// Counted in bytes

70

/// Counted in bytes

71

data_size: Size,

71

data_size: Size,

72

73

metadata: TreeMetadata,

73

metadata: TreeMetadata,

74

75

uuid_size: u8,

75

uuid_size: u8,

76

}

76

}

77

78

pub struct Docket<'on_disk> {

78

pub struct Docket<'on_disk> {

79

header: &'on_disk DocketHeader,

79

header: &'on_disk DocketHeader,

80

uuid: &'on_disk [u8],

80

uuid: &'on_disk [u8],

81

}

81

}

82

83

#[derive(BytesCast)]

83

#[derive(BytesCast)]

84

#[repr(C)]

84

#[repr(C)]

85

struct TreeMetadata {

85

struct TreeMetadata {

86

root_nodes: ChildNodes,

86

root_nodes: ChildNodes,

87

nodes_with_entry_count: Size,

87

nodes_with_entry_count: Size,

88

nodes_with_copy_source_count: Size,

88

nodes_with_copy_source_count: Size,

89

90

/// How many bytes of this data file are not used anymore

90

/// How many bytes of this data file are not used anymore

91

unreachable_bytes: Size,

91

unreachable_bytes: Size,

92

93

/// Current version always sets these bytes to zero when creating or

93

/// Current version always sets these bytes to zero when creating or

94

/// updating a dirstate. Future versions could assign some bits to signal

94

/// updating a dirstate. Future versions could assign some bits to signal

95

/// for example "the version that last wrote/updated this dirstate did so

95

/// for example "the version that last wrote/updated this dirstate did so

96

/// in such and such way that can be relied on by versions that know to."

96

/// in such and such way that can be relied on by versions that know to."

97

unused: [u8; 4],

97

unused: [u8; 4],

98

99

/// If non-zero, a hash of ignore files that were used for some previous

99

/// If non-zero, a hash of ignore files that were used for some previous

100

/// run of the `status` algorithm.

100

/// run of the `status` algorithm.

101

///

101

///

102

/// We define:

102

/// We define:

103

///

103

///

104

/// * "Root" ignore files are `.hgignore` at the root of the repository if

104

/// * "Root" ignore files are `.hgignore` at the root of the repository if

105

/// it exists, and files from `ui.ignore.*` config. This set of files is

105

/// it exists, and files from `ui.ignore.*` config. This set of files is

106

/// then sorted by the string representation of their path.

106

/// then sorted by the string representation of their path.

107

/// * The "expanded contents" of an ignore files is the byte string made

107

/// * The "expanded contents" of an ignore files is the byte string made

108

/// by concatenating its contents with the "expanded contents" of other

108

/// by concatenating its contents with the "expanded contents" of other

109

/// files included with `include:` or `subinclude:` files, in inclusion

109

/// files included with `include:` or `subinclude:` files, in inclusion

110

/// order. This definition is recursive, as included files can

110

/// order. This definition is recursive, as included files can

111

/// themselves include more files.

111

/// themselves include more files.

112

///

112

///

113

/// This hash is defined as the SHA-1 of the concatenation (in sorted

113

/// This hash is defined as the SHA-1 of the concatenation (in sorted

114

/// order) of the "expanded contents" of each "root" ignore file.

114

/// order) of the "expanded contents" of each "root" ignore file.

115

/// (Note that computing this does not require actually concatenating byte

115

/// (Note that computing this does not require actually concatenating byte

116

/// strings into contiguous memory, instead SHA-1 hashing can be done

116

/// strings into contiguous memory, instead SHA-1 hashing can be done

117

/// incrementally.)

117

/// incrementally.)

118

ignore_patterns_hash: IgnorePatternsHash,

118

ignore_patterns_hash: IgnorePatternsHash,

119

}

119

}

120

121

#[derive(BytesCast)]

121

#[derive(BytesCast)]

122

#[repr(C)]

122

#[repr(C)]

123

pub(super) struct Node {

123

pub(super) struct Node {

124

full_path: PathSlice,

124

full_path: PathSlice,

125

126

/// In bytes from `self.full_path.start`

126

/// In bytes from `self.full_path.start`

127

base_name_start: PathSize,

127

base_name_start: PathSize,

128

129

copy_source: OptPathSlice,

129

copy_source: OptPathSlice,

130

children: ChildNodes,

130

children: ChildNodes,

131

pub(super) descendants_with_entry_count: Size,

131

pub(super) descendants_with_entry_count: Size,

132

pub(super) tracked_descendants_count: Size,

132

pub(super) tracked_descendants_count: Size,

133

134

/// Depending on the value of `state`:

134

/// Depending on the value of `state`:

135

///

135

///

136

/// * A null byte: `data` is not used.

136

/// * A null byte: `data` is not used.

137

///

137

///

138

/// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together

138

/// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together

139

/// represent a dirstate entry like in the v1 format.

139

/// represent a dirstate entry like in the v1 format.

140

///

140

///

141

/// * A `d` ASCII byte: the bytes of `data` should instead be interpreted

141

/// * A `d` ASCII byte: the bytes of `data` should instead be interpreted

142

/// as the `Timestamp` for the mtime of a cached directory.

142

/// as the `Timestamp` for the mtime of a cached directory.

143

///

143

///

144

/// The presence of this state means that at some point, this path in

144

/// The presence of this state means that at some point, this path in

145

/// the working directory was observed:

145

/// the working directory was observed:

146

///

146

///

147

/// - To be a directory

147

/// - To be a directory

148

/// - With the modification time as given by `Timestamp`

148

/// - With the modification time as given by `Timestamp`

149

/// - That timestamp was already strictly in the past when observed,

149

/// - That timestamp was already strictly in the past when observed,

150

/// meaning that later changes cannot happen in the same clock tick

150

/// meaning that later changes cannot happen in the same clock tick

151

/// and must cause a different modification time (unless the system

151

/// and must cause a different modification time (unless the system

152

/// clock jumps back and we get unlucky, which is not impossible but

152

/// clock jumps back and we get unlucky, which is not impossible but

153

/// but deemed unlikely enough).

153

/// but deemed unlikely enough).

154

/// - All direct children of this directory (as returned by

154

/// - All direct children of this directory (as returned by

155

/// `std::fs::read_dir`) either have a corresponding dirstate node, or

155

/// `std::fs::read_dir`) either have a corresponding dirstate node, or

156

/// are ignored by ignore patterns whose hash is in

156

/// are ignored by ignore patterns whose hash is in

157

/// `TreeMetadata::ignore_patterns_hash`.

157

/// `TreeMetadata::ignore_patterns_hash`.

158

///

158

///

159

/// This means that if `std::fs::symlink_metadata` later reports the

159

/// This means that if `std::fs::symlink_metadata` later reports the

160

/// same modification time and ignored patterns haven’t changed, a run

160

/// same modification time and ignored patterns haven’t changed, a run

161

/// of status that is not listing ignored files can skip calling

161

/// of status that is not listing ignored files can skip calling

162

/// `std::fs::read_dir` again for this directory, iterate child

162

/// `std::fs::read_dir` again for this directory, iterate child

163

/// dirstate nodes instead.

163

/// dirstate nodes instead.

164

state: u8,

164

state: u8,

165

data: Entry,

165

data: Entry,

166

}

166

}

167

168

#[derive(BytesCast, Copy, Clone)]

168

#[derive(BytesCast, Copy, Clone)]

169

#[repr(C)]

169

#[repr(C)]

170

struct Entry {

170

struct Entry {

171

mode: I32Be,

171

mode: I32Be,

172

mtime: I32Be,

172

mtime: I32Be,

173

size: I32Be,

173

size: I32Be,

174

}

174

}

175

176

/// Duration since the Unix epoch

176

/// Duration since the Unix epoch

177

#[derive(BytesCast, Copy, Clone, PartialEq)]

177

#[derive(BytesCast, Copy, Clone, PartialEq)]

178

#[repr(C)]

178

#[repr(C)]

179

pub(super) struct Timestamp {

179

pub(super) struct Timestamp {

180

seconds: I64Be,

180

seconds: I64Be,

181

182

/// In `0 .. 1_000_000_000`.

182

/// In `0 .. 1_000_000_000`.

183

///

183

///

184

/// This timestamp is later or earlier than `(seconds, 0)` by this many

184

/// This timestamp is later or earlier than `(seconds, 0)` by this many

185

/// nanoseconds, if `seconds` is non-negative or negative, respectively.

185

/// nanoseconds, if `seconds` is non-negative or negative, respectively.

186

nanoseconds: U32Be,

186

nanoseconds: U32Be,

187

}

187

}

188

189

/// Counted in bytes from the start of the file

189

/// Counted in bytes from the start of the file

190

///

190

///

191

/// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.

191

/// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.

192

type Offset = U32Be;

192

type Offset = U32Be;

193

194

/// Counted in number of items

194

/// Counted in number of items

195

///

195

///

196

/// NOTE: we choose not to support counting more than 4 billion nodes anywhere.

196

/// NOTE: we choose not to support counting more than 4 billion nodes anywhere.

197

type Size = U32Be;

197

type Size = U32Be;

198

199

/// Counted in bytes

199

/// Counted in bytes

200

///

200

///

201

/// NOTE: we choose not to support file names/paths longer than 64 KiB.

201

/// NOTE: we choose not to support file names/paths longer than 64 KiB.

202

type PathSize = U16Be;

202

type PathSize = U16Be;

203

204

/// A contiguous sequence of `len` times `Node`, representing the child nodes

204

/// A contiguous sequence of `len` times `Node`, representing the child nodes

205

/// of either some other node or of the repository root.

205

/// of either some other node or of the repository root.

206

///

206

///

207

/// Always sorted by ascending `full_path`, to allow binary search.

207

/// Always sorted by ascending `full_path`, to allow binary search.

208

/// Since nodes with the same parent nodes also have the same parent path,

208

/// Since nodes with the same parent nodes also have the same parent path,

209

/// only the `base_name`s need to be compared during binary search.

209

/// only the `base_name`s need to be compared during binary search.

210

#[derive(BytesCast, Copy, Clone)]

210

#[derive(BytesCast, Copy, Clone)]

211

#[repr(C)]

211

#[repr(C)]

212

struct ChildNodes {

212

struct ChildNodes {

213

start: Offset,

213

start: Offset,

214

len: Size,

214

len: Size,

215

}

215

}

216

217

/// A `HgPath` of `len` bytes

217

/// A `HgPath` of `len` bytes

218

#[derive(BytesCast, Copy, Clone)]

218

#[derive(BytesCast, Copy, Clone)]

219

#[repr(C)]

219

#[repr(C)]

220

struct PathSlice {

220

struct PathSlice {

221

start: Offset,

221

start: Offset,

222

len: PathSize,

222

len: PathSize,

223

}

223

}

224

225

/// Either nothing if `start == 0`, or a `HgPath` of `len` bytes

225

/// Either nothing if `start == 0`, or a `HgPath` of `len` bytes

226

type OptPathSlice = PathSlice;

226

type OptPathSlice = PathSlice;

227

228

/// Unexpected file format found in `.hg/dirstate` with the "v2" format.

228

/// Unexpected file format found in `.hg/dirstate` with the "v2" format.

229

///

229

///

230

/// This should only happen if Mercurial is buggy or a repository is corrupted.

230

/// This should only happen if Mercurial is buggy or a repository is corrupted.

231

#[derive(Debug)]

231

#[derive(Debug)]

232

pub struct DirstateV2ParseError;

232

pub struct DirstateV2ParseError;

233

234

impl From<DirstateV2ParseError> for HgError {

234

impl From<DirstateV2ParseError> for HgError {

235

fn from(_: DirstateV2ParseError) -> Self {

235

fn from(_: DirstateV2ParseError) -> Self {

236

HgError::corrupted("dirstate-v2 parse error")

236

HgError::corrupted("dirstate-v2 parse error")

237

}

237

}

238

}

238

}

239

240

impl From<DirstateV2ParseError> for crate::DirstateError {

240

impl From<DirstateV2ParseError> for crate::DirstateError {

241

fn from(error: DirstateV2ParseError) -> Self {

241

fn from(error: DirstateV2ParseError) -> Self {

242

HgError::from(error).into()

242

HgError::from(error).into()

243

}

243

}

244

}

244

}

245

246

impl<'on_disk> Docket<'on_disk> {

246

impl<'on_disk> Docket<'on_disk> {

247

pub fn parents(&self) -> DirstateParents {

247

pub fn parents(&self) -> DirstateParents {

248

use crate::Node;

248

use crate::Node;

249

let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])

249

let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])

250

.unwrap()

250

.unwrap()

251

.clone();

251

.clone();

252

let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])

252

let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])

253

.unwrap()

253

.unwrap()

254

.clone();

254

.clone();

255

DirstateParents { p1, p2 }

255

DirstateParents { p1, p2 }

256

}

256

}

257

258

pub fn tree_metadata(&self) -> &[u8] {

258

pub fn tree_metadata(&self) -> &[u8] {

259

self.header.metadata.as_bytes()

259

self.header.metadata.as_bytes()

260

}

260

}

261

262

pub fn data_size(&self) -> usize {

262

pub fn data_size(&self) -> usize {

263

// This `unwrap` could only panic on a 16-bit CPU

263

// This `unwrap` could only panic on a 16-bit CPU

264

self.header.data_size.get().try_into().unwrap()

264

self.header.data_size.get().try_into().unwrap()

265

}

265

}

266

267

pub fn data_filename(&self) -> String {

267

pub fn data_filename(&self) -> String {

268

String::from_utf8(format_bytes!(b"dirstate.{}.d", self.uuid)).unwrap()

268

String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()

269

}

269

}

270

}

270

}

271

272

pub fn read_docket(

272

pub fn read_docket(

273

on_disk: &[u8],

273

on_disk: &[u8],

274

) -> Result<Docket<'_>, DirstateV2ParseError> {

274

) -> Result<Docket<'_>, DirstateV2ParseError> {

275

let (header, uuid) =

275

let (header, uuid) =

276

DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;

276

DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;

277

let uuid_size = header.uuid_size as usize;

277

let uuid_size = header.uuid_size as usize;

278

if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {

278

if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {

279

Ok(Docket { header, uuid })

279

Ok(Docket { header, uuid })

280

} else {

280

} else {

281

Err(DirstateV2ParseError)

281

Err(DirstateV2ParseError)

282

}

282

}

283

}

283

}

284

285

pub(super) fn read<'on_disk>(

285

pub(super) fn read<'on_disk>(

286

on_disk: &'on_disk [u8],

286

on_disk: &'on_disk [u8],

287

metadata: &[u8],

287

metadata: &[u8],

288

) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {

288

) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {

289

if on_disk.is_empty() {

289

if on_disk.is_empty() {

290

return Ok(DirstateMap::empty(on_disk));

290

return Ok(DirstateMap::empty(on_disk));

291

}

291

}

292

let (meta, _) = TreeMetadata::from_bytes(metadata)

292

let (meta, _) = TreeMetadata::from_bytes(metadata)

293

.map_err(|_| DirstateV2ParseError)?;

293

.map_err(|_| DirstateV2ParseError)?;

294

let dirstate_map = DirstateMap {

294

let dirstate_map = DirstateMap {

295

on_disk,

295

on_disk,

296

root: dirstate_map::ChildNodes::OnDisk(read_nodes(

296

root: dirstate_map::ChildNodes::OnDisk(read_nodes(

297

on_disk,

297

on_disk,

298

meta.root_nodes,

298

meta.root_nodes,

299

)?),

299

)?),

300

nodes_with_entry_count: meta.nodes_with_entry_count.get(),

300

nodes_with_entry_count: meta.nodes_with_entry_count.get(),

301

nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),

301

nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),

302

ignore_patterns_hash: meta.ignore_patterns_hash,

302

ignore_patterns_hash: meta.ignore_patterns_hash,

303

unreachable_bytes: meta.unreachable_bytes.get(),

303

unreachable_bytes: meta.unreachable_bytes.get(),

304

};

304

};

305

Ok(dirstate_map)

305

Ok(dirstate_map)

306

}

306

}

307

308

impl Node {

308

impl Node {

309

pub(super) fn full_path<'on_disk>(

309

pub(super) fn full_path<'on_disk>(

310

&self,

310

&self,

311

on_disk: &'on_disk [u8],

311

on_disk: &'on_disk [u8],

312

) -> Result<&'on_disk HgPath, DirstateV2ParseError> {

312

) -> Result<&'on_disk HgPath, DirstateV2ParseError> {

313

read_hg_path(on_disk, self.full_path)

313

read_hg_path(on_disk, self.full_path)

314

}

314

}

315

316

pub(super) fn base_name_start<'on_disk>(

316

pub(super) fn base_name_start<'on_disk>(

317

&self,

317

&self,

318

) -> Result<usize, DirstateV2ParseError> {

318

) -> Result<usize, DirstateV2ParseError> {

319

let start = self.base_name_start.get();

319

let start = self.base_name_start.get();

320

if start < self.full_path.len.get() {

320

if start < self.full_path.len.get() {

321

let start = usize::try_from(start)

321

let start = usize::try_from(start)

322

// u32 -> usize, could only panic on a 16-bit CPU

322

// u32 -> usize, could only panic on a 16-bit CPU

323

.expect("dirstate-v2 base_name_start out of bounds");

323

.expect("dirstate-v2 base_name_start out of bounds");

324

Ok(start)

324

Ok(start)

325

} else {

325

} else {

326

Err(DirstateV2ParseError)

326

Err(DirstateV2ParseError)

327

}

327

}

328

}

328

}

329

330

pub(super) fn base_name<'on_disk>(

330

pub(super) fn base_name<'on_disk>(

331

&self,

331

&self,

332

on_disk: &'on_disk [u8],

332

on_disk: &'on_disk [u8],

333

) -> Result<&'on_disk HgPath, DirstateV2ParseError> {

333

) -> Result<&'on_disk HgPath, DirstateV2ParseError> {

334

let full_path = self.full_path(on_disk)?;

334

let full_path = self.full_path(on_disk)?;

335

let base_name_start = self.base_name_start()?;

335

let base_name_start = self.base_name_start()?;

336

Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))

336

Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))

337

}

337

}

338

339

pub(super) fn path<'on_disk>(

339

pub(super) fn path<'on_disk>(

340

&self,

340

&self,

341

on_disk: &'on_disk [u8],

341

on_disk: &'on_disk [u8],

342

) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {

342

) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {

343

Ok(WithBasename::from_raw_parts(

343

Ok(WithBasename::from_raw_parts(

344

Cow::Borrowed(self.full_path(on_disk)?),

344

Cow::Borrowed(self.full_path(on_disk)?),

345

self.base_name_start()?,

345

self.base_name_start()?,

346

))

346

))

347

}

347

}

348

349

pub(super) fn has_copy_source<'on_disk>(&self) -> bool {

349

pub(super) fn has_copy_source<'on_disk>(&self) -> bool {

350

self.copy_source.start.get() != 0

350

self.copy_source.start.get() != 0

351

}

351

}

352

353

pub(super) fn copy_source<'on_disk>(

353

pub(super) fn copy_source<'on_disk>(

354

&self,

354

&self,

355

on_disk: &'on_disk [u8],

355

on_disk: &'on_disk [u8],

356

) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {

356

) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {

357

Ok(if self.has_copy_source() {

357

Ok(if self.has_copy_source() {

358

Some(read_hg_path(on_disk, self.copy_source)?)

358

Some(read_hg_path(on_disk, self.copy_source)?)

359

} else {

359

} else {

360

None

360

None

361

})

361

})

362

}

362

}

363

364

pub(super) fn node_data(

364

pub(super) fn node_data(

365

&self,

365

&self,

366

) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {

366

) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {

367

let entry = |state| {

367

let entry = |state| {

368

dirstate_map::NodeData::Entry(self.entry_with_given_state(state))

368

dirstate_map::NodeData::Entry(self.entry_with_given_state(state))

369

};

369

};

370

371

match self.state {

371

match self.state {

372

b'\0' => Ok(dirstate_map::NodeData::None),

372

b'\0' => Ok(dirstate_map::NodeData::None),

373

b'd' => Ok(dirstate_map::NodeData::CachedDirectory {

373

b'd' => Ok(dirstate_map::NodeData::CachedDirectory {

374

mtime: *self.data.as_timestamp(),

374

mtime: *self.data.as_timestamp(),

375

}),

375

}),

376

b'n' => Ok(entry(EntryState::Normal)),

376

b'n' => Ok(entry(EntryState::Normal)),

377

b'a' => Ok(entry(EntryState::Added)),

377

b'a' => Ok(entry(EntryState::Added)),

378

b'r' => Ok(entry(EntryState::Removed)),

378

b'r' => Ok(entry(EntryState::Removed)),

379

b'm' => Ok(entry(EntryState::Merged)),

379

b'm' => Ok(entry(EntryState::Merged)),

380

_ => Err(DirstateV2ParseError),

380

_ => Err(DirstateV2ParseError),

381

}

381

}

382

}

382

}

383

384

pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {

384

pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {

385

if self.state == b'd' {

385

if self.state == b'd' {

386

Some(self.data.as_timestamp())

386

Some(self.data.as_timestamp())

387

} else {

387

} else {

388

None

388

None

389

}

389

}

390

}

390

}

391

392

pub(super) fn state(

392

pub(super) fn state(

393

&self,

393

&self,

394

) -> Result<Option<EntryState>, DirstateV2ParseError> {

394

) -> Result<Option<EntryState>, DirstateV2ParseError> {

395

match self.state {

395

match self.state {

396

b'\0' | b'd' => Ok(None),

396

b'\0' | b'd' => Ok(None),

397

b'n' => Ok(Some(EntryState::Normal)),

397

b'n' => Ok(Some(EntryState::Normal)),

398

b'a' => Ok(Some(EntryState::Added)),

398

b'a' => Ok(Some(EntryState::Added)),

399

b'r' => Ok(Some(EntryState::Removed)),

399

b'r' => Ok(Some(EntryState::Removed)),

400

b'm' => Ok(Some(EntryState::Merged)),

400

b'm' => Ok(Some(EntryState::Merged)),

401

_ => Err(DirstateV2ParseError),

401

_ => Err(DirstateV2ParseError),

402

}

402

}

403

}

403

}

404

405

fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry {

405

fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry {

406

DirstateEntry {

406

DirstateEntry {

407

state,

407

state,

408

mode: self.data.mode.get(),

408

mode: self.data.mode.get(),

409

mtime: self.data.mtime.get(),

409

mtime: self.data.mtime.get(),

410

size: self.data.size.get(),

410

size: self.data.size.get(),

411

}

411

}

412

}

412

}

413

414

pub(super) fn entry(

414

pub(super) fn entry(

415

&self,

415

&self,

416

) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {

416

) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {

417

Ok(self

417

Ok(self

418

.state()?

418

.state()?

419

.map(|state| self.entry_with_given_state(state)))

419

.map(|state| self.entry_with_given_state(state)))

420

}

420

}

421

422

pub(super) fn children<'on_disk>(

422

pub(super) fn children<'on_disk>(

423

&self,

423

&self,

424

on_disk: &'on_disk [u8],

424

on_disk: &'on_disk [u8],

425

) -> Result<&'on_disk [Node], DirstateV2ParseError> {

425

) -> Result<&'on_disk [Node], DirstateV2ParseError> {

426

read_nodes(on_disk, self.children)

426

read_nodes(on_disk, self.children)

427

}

427

}

428

429

pub(super) fn to_in_memory_node<'on_disk>(

429

pub(super) fn to_in_memory_node<'on_disk>(

430

&self,

430

&self,

431

on_disk: &'on_disk [u8],

431

on_disk: &'on_disk [u8],

432

) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {

432

) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {

433

Ok(dirstate_map::Node {

433

Ok(dirstate_map::Node {

434

children: dirstate_map::ChildNodes::OnDisk(

434

children: dirstate_map::ChildNodes::OnDisk(

435

self.children(on_disk)?,

435

self.children(on_disk)?,

436

),

436

),

437

copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),

437

copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),

438

data: self.node_data()?,

438

data: self.node_data()?,

439

descendants_with_entry_count: self

439

descendants_with_entry_count: self

440

.descendants_with_entry_count

440

.descendants_with_entry_count

441

.get(),

441

.get(),

442

tracked_descendants_count: self.tracked_descendants_count.get(),

442

tracked_descendants_count: self.tracked_descendants_count.get(),

443

})

443

})

444

}

444

}

445

}

445

}

446

447

impl Entry {

447

impl Entry {

448

fn from_timestamp(timestamp: Timestamp) -> Self {

448

fn from_timestamp(timestamp: Timestamp) -> Self {

449

// Safety: both types implement the `ByteCast` trait, so we could

449

// Safety: both types implement the `ByteCast` trait, so we could

450

// safely use `as_bytes` and `from_bytes` to do this conversion. Using

450

// safely use `as_bytes` and `from_bytes` to do this conversion. Using

451

// `transmute` instead makes the compiler check that the two types

451

// `transmute` instead makes the compiler check that the two types

452

// have the same size, which eliminates the error case of

452

// have the same size, which eliminates the error case of

453

// `from_bytes`.

453

// `from_bytes`.

454

unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }

454

unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }

455

}

455

}

456

457

fn as_timestamp(&self) -> &Timestamp {

457

fn as_timestamp(&self) -> &Timestamp {

458

// Safety: same as above in `from_timestamp`

458

// Safety: same as above in `from_timestamp`

459

unsafe { &*(self as *const Entry as *const Timestamp) }

459

unsafe { &*(self as *const Entry as *const Timestamp) }

460

}

460

}

461

}

461

}

462

463

impl Timestamp {

463

impl Timestamp {

464

pub fn seconds(&self) -> i64 {

464

pub fn seconds(&self) -> i64 {

465

self.seconds.get()

465

self.seconds.get()

466

}

466

}

467

}

467

}

468

469

impl From<SystemTime> for Timestamp {

469

impl From<SystemTime> for Timestamp {

470

fn from(system_time: SystemTime) -> Self {

470

fn from(system_time: SystemTime) -> Self {

471

let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {

471

let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {

472

Ok(duration) => {

472

Ok(duration) => {

473

(duration.as_secs() as i64, duration.subsec_nanos())

473

(duration.as_secs() as i64, duration.subsec_nanos())

474

}

474

}

475

Err(error) => {

475

Err(error) => {

476

let negative = error.duration();

476

let negative = error.duration();

477

(-(negative.as_secs() as i64), negative.subsec_nanos())

477

(-(negative.as_secs() as i64), negative.subsec_nanos())

478

}

478

}

479

};

479

};

480

Timestamp {

480

Timestamp {

481

seconds: secs.into(),

481

seconds: secs.into(),

482

nanoseconds: nanos.into(),

482

nanoseconds: nanos.into(),

483

}

483

}

484

}

484

}

485

}

485

}

486

487

impl From<&'_ Timestamp> for SystemTime {

487

impl From<&'_ Timestamp> for SystemTime {

488

fn from(timestamp: &'_ Timestamp) -> Self {

488

fn from(timestamp: &'_ Timestamp) -> Self {

489

let secs = timestamp.seconds.get();

489

let secs = timestamp.seconds.get();

490

let nanos = timestamp.nanoseconds.get();

490

let nanos = timestamp.nanoseconds.get();

491

if secs >= 0 {

491

if secs >= 0 {

492

UNIX_EPOCH + Duration::new(secs as u64, nanos)

492

UNIX_EPOCH + Duration::new(secs as u64, nanos)

493

} else {

493

} else {

494

UNIX_EPOCH - Duration::new((-secs) as u64, nanos)

494

UNIX_EPOCH - Duration::new((-secs) as u64, nanos)

495

}

495

}

496

}

496

}

497

}

497

}

498

499

fn read_hg_path(

499

fn read_hg_path(

500

on_disk: &[u8],

500

on_disk: &[u8],

501

slice: PathSlice,

501

slice: PathSlice,

502

) -> Result<&HgPath, DirstateV2ParseError> {

502

) -> Result<&HgPath, DirstateV2ParseError> {

503

read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)

503

read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)

504

}

504

}

505

506

fn read_nodes(

506

fn read_nodes(

507

on_disk: &[u8],

507

on_disk: &[u8],

508

slice: ChildNodes,

508

slice: ChildNodes,

509

) -> Result<&[Node], DirstateV2ParseError> {

509

) -> Result<&[Node], DirstateV2ParseError> {

510

read_slice(on_disk, slice.start, slice.len.get())

510

read_slice(on_disk, slice.start, slice.len.get())

511

}

511

}

512

513

fn read_slice<T, Len>(

513

fn read_slice<T, Len>(

514

on_disk: &[u8],

514

on_disk: &[u8],

515

start: Offset,

515

start: Offset,

516

len: Len,

516

len: Len,

517

) -> Result<&[T], DirstateV2ParseError>

517

) -> Result<&[T], DirstateV2ParseError>

518

where

518

where

519

T: BytesCast,

519

T: BytesCast,

520

Len: TryInto<usize>,

520

Len: TryInto<usize>,

521

{

521

{

522

// Either `usize::MAX` would result in "out of bounds" error since a single

522

// Either `usize::MAX` would result in "out of bounds" error since a single

523

// `&[u8]` cannot occupy the entire addess space.

523

// `&[u8]` cannot occupy the entire addess space.

524

let start = start.get().try_into().unwrap_or(std::usize::MAX);

524

let start = start.get().try_into().unwrap_or(std::usize::MAX);

525

let len = len.try_into().unwrap_or(std::usize::MAX);

525

let len = len.try_into().unwrap_or(std::usize::MAX);

526

on_disk

526

on_disk

527

.get(start..)

527

.get(start..)

528

.and_then(|bytes| T::slice_from_bytes(bytes, len).ok())

528

.and_then(|bytes| T::slice_from_bytes(bytes, len).ok())

529

.map(|(slice, _rest)| slice)

529

.map(|(slice, _rest)| slice)

530

.ok_or_else(|| DirstateV2ParseError)

530

.ok_or_else(|| DirstateV2ParseError)

531

}

531

}

532

533

pub(crate) fn for_each_tracked_path<'on_disk>(

533

pub(crate) fn for_each_tracked_path<'on_disk>(

534

on_disk: &'on_disk [u8],

534

on_disk: &'on_disk [u8],

535

metadata: &[u8],

535

metadata: &[u8],

536

mut f: impl FnMut(&'on_disk HgPath),

536

mut f: impl FnMut(&'on_disk HgPath),

537

) -> Result<(), DirstateV2ParseError> {

537

) -> Result<(), DirstateV2ParseError> {

538

let (meta, _) = TreeMetadata::from_bytes(metadata)

538

let (meta, _) = TreeMetadata::from_bytes(metadata)

539

.map_err(|_| DirstateV2ParseError)?;

539

.map_err(|_| DirstateV2ParseError)?;

540

fn recur<'on_disk>(

540

fn recur<'on_disk>(

541

on_disk: &'on_disk [u8],

541

on_disk: &'on_disk [u8],

542

nodes: ChildNodes,

542

nodes: ChildNodes,

543

f: &mut impl FnMut(&'on_disk HgPath),

543

f: &mut impl FnMut(&'on_disk HgPath),

544

) -> Result<(), DirstateV2ParseError> {

544

) -> Result<(), DirstateV2ParseError> {

545

for node in read_nodes(on_disk, nodes)? {

545

for node in read_nodes(on_disk, nodes)? {

546

if let Some(state) = node.state()? {

546

if let Some(state) = node.state()? {

547

if state.is_tracked() {

547

if state.is_tracked() {

548

f(node.full_path(on_disk)?)

548

f(node.full_path(on_disk)?)

549

}

549

}

550

}

550

}

551

recur(on_disk, node.children, f)?

551

recur(on_disk, node.children, f)?

552

}

552

}

553

Ok(())

553

Ok(())

554

}

554

}

555

recur(on_disk, meta.root_nodes, &mut f)

555

recur(on_disk, meta.root_nodes, &mut f)

556

}

556

}

557

558

/// Returns new data and metadata, together with whether that data should be

558

/// Returns new data and metadata, together with whether that data should be

559

/// appended to the existing data file whose content is at

559

/// appended to the existing data file whose content is at

560

/// `dirstate_map.on_disk` (true), instead of written to a new data file

560

/// `dirstate_map.on_disk` (true), instead of written to a new data file

561

/// (false).

561

/// (false).

562

pub(super) fn write(

562

pub(super) fn write(

563

dirstate_map: &mut DirstateMap,

563

dirstate_map: &mut DirstateMap,

564

can_append: bool,

564

can_append: bool,

565

) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {

565

) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {

566

let append = can_append && dirstate_map.write_should_append();

566

let append = can_append && dirstate_map.write_should_append();

567

568

// This ignores the space for paths, and for nodes without an entry.

568

// This ignores the space for paths, and for nodes without an entry.

569

// TODO: better estimate? Skip the `Vec` and write to a file directly?

569

// TODO: better estimate? Skip the `Vec` and write to a file directly?

570

let size_guess = std::mem::size_of::<Node>()

570

let size_guess = std::mem::size_of::<Node>()

571

* dirstate_map.nodes_with_entry_count as usize;

571

* dirstate_map.nodes_with_entry_count as usize;

572

573

let mut writer = Writer {

573

let mut writer = Writer {

574

dirstate_map,

574

dirstate_map,

575

append,

575

append,

576

out: Vec::with_capacity(size_guess),

576

out: Vec::with_capacity(size_guess),

577

};

577

};

578

579

let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;

579

let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;

580

581

let meta = TreeMetadata {

581

let meta = TreeMetadata {

582

root_nodes,

582

root_nodes,

583

nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),

583

nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),

584

nodes_with_copy_source_count: dirstate_map

584

nodes_with_copy_source_count: dirstate_map

585

.nodes_with_copy_source_count

585

.nodes_with_copy_source_count

586

.into(),

586

.into(),

587

unreachable_bytes: dirstate_map.unreachable_bytes.into(),

587

unreachable_bytes: dirstate_map.unreachable_bytes.into(),

588

unused: [0; 4],

588

unused: [0; 4],

589

ignore_patterns_hash: dirstate_map.ignore_patterns_hash,

589

ignore_patterns_hash: dirstate_map.ignore_patterns_hash,

590

};

590

};

591

Ok((writer.out, meta.as_bytes().to_vec(), append))

591

Ok((writer.out, meta.as_bytes().to_vec(), append))

592

}

592

}

593

594

struct Writer<'dmap, 'on_disk> {

594

struct Writer<'dmap, 'on_disk> {

595

dirstate_map: &'dmap DirstateMap<'on_disk>,

595

dirstate_map: &'dmap DirstateMap<'on_disk>,

596

append: bool,

596

append: bool,

597

out: Vec<u8>,

597

out: Vec<u8>,

598

}

598

}

599

600

impl Writer<'_, '_> {

600

impl Writer<'_, '_> {

601

fn write_nodes(

601

fn write_nodes(

602

&mut self,

602

&mut self,

603

nodes: dirstate_map::ChildNodesRef,

603

nodes: dirstate_map::ChildNodesRef,

604

) -> Result<ChildNodes, DirstateError> {

604

) -> Result<ChildNodes, DirstateError> {

605

// Reuse already-written nodes if possible

605

// Reuse already-written nodes if possible

606

if self.append {

606

if self.append {

607

if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {

607

if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {

608

let start = self.on_disk_offset_of(nodes_slice).expect(

608

let start = self.on_disk_offset_of(nodes_slice).expect(

609

"dirstate-v2 OnDisk nodes not found within on_disk",

609

"dirstate-v2 OnDisk nodes not found within on_disk",

610

);

610

);

611

let len = child_nodes_len_from_usize(nodes_slice.len());

611

let len = child_nodes_len_from_usize(nodes_slice.len());

612

return Ok(ChildNodes { start, len });

612

return Ok(ChildNodes { start, len });

613

}

613

}

614

}

614

}

615

616

// `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has

616

// `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has

617

// undefined iteration order. Sort to enable binary search in the

617

// undefined iteration order. Sort to enable binary search in the

618

// written file.

618

// written file.

619

let nodes = nodes.sorted();

619

let nodes = nodes.sorted();

620

let nodes_len = nodes.len();

620

let nodes_len = nodes.len();

621

622

// First accumulate serialized nodes in a `Vec`

622

// First accumulate serialized nodes in a `Vec`

623

let mut on_disk_nodes = Vec::with_capacity(nodes_len);

623

let mut on_disk_nodes = Vec::with_capacity(nodes_len);

624

for node in nodes {

624

for node in nodes {

625

let children =

625

let children =

626

self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;

626

self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;

627

let full_path = node.full_path(self.dirstate_map.on_disk)?;

627

let full_path = node.full_path(self.dirstate_map.on_disk)?;

628

let full_path = self.write_path(full_path.as_bytes());

628

let full_path = self.write_path(full_path.as_bytes());

629

let copy_source = if let Some(source) =

629

let copy_source = if let Some(source) =

630

node.copy_source(self.dirstate_map.on_disk)?

630

node.copy_source(self.dirstate_map.on_disk)?

631

{

631

{

632

self.write_path(source.as_bytes())

632

self.write_path(source.as_bytes())

633

} else {

633

} else {

634

PathSlice {

634

PathSlice {

635

start: 0.into(),

635

start: 0.into(),

636

len: 0.into(),

636

len: 0.into(),

637

}

637

}

638

};

638

};

639

on_disk_nodes.push(match node {

639

on_disk_nodes.push(match node {

640

NodeRef::InMemory(path, node) => {

640

NodeRef::InMemory(path, node) => {

641

let (state, data) = match &node.data {

641

let (state, data) = match &node.data {

642

dirstate_map::NodeData::Entry(entry) => (

642

dirstate_map::NodeData::Entry(entry) => (

643

entry.state.into(),

643

entry.state.into(),

644

Entry {

644

Entry {

645

mode: entry.mode.into(),

645

mode: entry.mode.into(),

646

mtime: entry.mtime.into(),

646

mtime: entry.mtime.into(),

647

size: entry.size.into(),

647

size: entry.size.into(),

648

},

648

},

649

),

649

),

650

dirstate_map::NodeData::CachedDirectory { mtime } => {

650

dirstate_map::NodeData::CachedDirectory { mtime } => {

651

(b'd', Entry::from_timestamp(*mtime))

651

(b'd', Entry::from_timestamp(*mtime))

652

}

652

}

653

dirstate_map::NodeData::None => (

653

dirstate_map::NodeData::None => (

654

b'\0',

654

b'\0',

655

Entry {

655

Entry {

656

mode: 0.into(),

656

mode: 0.into(),

657

mtime: 0.into(),

657

mtime: 0.into(),

658

size: 0.into(),

658

size: 0.into(),

659

},

659

},

660

),

660

),

661

};

661

};

662

Node {

662

Node {

663

children,

663

children,

664

copy_source,

664

copy_source,

665

full_path,

665

full_path,

666

base_name_start: u16::try_from(path.base_name_start())

666

base_name_start: u16::try_from(path.base_name_start())

667

// Could only panic for paths over 64 KiB

667

// Could only panic for paths over 64 KiB

668

.expect("dirstate-v2 path length overflow")

668

.expect("dirstate-v2 path length overflow")

669

.into(),

669

.into(),

670

descendants_with_entry_count: node

670

descendants_with_entry_count: node

671

.descendants_with_entry_count

671

.descendants_with_entry_count

672

.into(),

672

.into(),

673

tracked_descendants_count: node

673

tracked_descendants_count: node

674

.tracked_descendants_count

674

.tracked_descendants_count

675

.into(),

675

.into(),

676

state,

676

state,

677

data,

677

data,

678

}

678

}

679

}

679

}

680

NodeRef::OnDisk(node) => Node {

680

NodeRef::OnDisk(node) => Node {

681

children,

681

children,

682

copy_source,

682

copy_source,

683

full_path,

683

full_path,

684

..*node

684

..*node

685

},

685

},

686

})

686

})

687

}

687

}

688

// … so we can write them contiguously, after writing everything else

688

// … so we can write them contiguously, after writing everything else

689

// they refer to.

689

// they refer to.

690

let start = self.current_offset();

690

let start = self.current_offset();

691

let len = child_nodes_len_from_usize(nodes_len);

691

let len = child_nodes_len_from_usize(nodes_len);

692

self.out.extend(on_disk_nodes.as_bytes());

692

self.out.extend(on_disk_nodes.as_bytes());

693

Ok(ChildNodes { start, len })

693

Ok(ChildNodes { start, len })

694

}

694

}

695

696

/// If the given slice of items is within `on_disk`, returns its offset

696

/// If the given slice of items is within `on_disk`, returns its offset

697

/// from the start of `on_disk`.

697

/// from the start of `on_disk`.

698

fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>

698

fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>

699

where

699

where

700

T: BytesCast,

700

T: BytesCast,

701

{

701

{

702

fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {

702

fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {

703

let start = slice.as_ptr() as usize;

703

let start = slice.as_ptr() as usize;

704

let end = start + slice.len();

704

let end = start + slice.len();

705

start..=end

705

start..=end

706

}

706

}

707

let slice_addresses = address_range(slice.as_bytes());

707

let slice_addresses = address_range(slice.as_bytes());

708

let on_disk_addresses = address_range(self.dirstate_map.on_disk);

708

let on_disk_addresses = address_range(self.dirstate_map.on_disk);

709

if on_disk_addresses.contains(slice_addresses.start())

709

if on_disk_addresses.contains(slice_addresses.start())

710

&& on_disk_addresses.contains(slice_addresses.end())

710

&& on_disk_addresses.contains(slice_addresses.end())

711

{

711

{

712

let offset = slice_addresses.start() - on_disk_addresses.start();

712

let offset = slice_addresses.start() - on_disk_addresses.start();

713

Some(offset_from_usize(offset))

713

Some(offset_from_usize(offset))

714

} else {

714

} else {

715

None

715

None

716

}

716

}

717

}

717

}

718

719

fn current_offset(&mut self) -> Offset {

719

fn current_offset(&mut self) -> Offset {

720

let mut offset = self.out.len();

720

let mut offset = self.out.len();

721

if self.append {

721

if self.append {

722

offset += self.dirstate_map.on_disk.len()

722

offset += self.dirstate_map.on_disk.len()

723

}

723

}

724

offset_from_usize(offset)

724

offset_from_usize(offset)

725

}

725

}

726

727

fn write_path(&mut self, slice: &[u8]) -> PathSlice {

727

fn write_path(&mut self, slice: &[u8]) -> PathSlice {

728

let len = path_len_from_usize(slice.len());

728

let len = path_len_from_usize(slice.len());

729

// Reuse an already-written path if possible

729

// Reuse an already-written path if possible

730

if self.append {

730

if self.append {

731

if let Some(start) = self.on_disk_offset_of(slice) {

731

if let Some(start) = self.on_disk_offset_of(slice) {

732

return PathSlice { start, len };

732

return PathSlice { start, len };

733

}

733

}

734

}

734

}

735

let start = self.current_offset();

735

let start = self.current_offset();

736

self.out.extend(slice.as_bytes());

736

self.out.extend(slice.as_bytes());

737

PathSlice { start, len }

737

PathSlice { start, len }

738

}

738

}

739

}

739

}

740

741

fn offset_from_usize(x: usize) -> Offset {

741

fn offset_from_usize(x: usize) -> Offset {

742

u32::try_from(x)

742

u32::try_from(x)

743

// Could only panic for a dirstate file larger than 4 GiB

743

// Could only panic for a dirstate file larger than 4 GiB

744

.expect("dirstate-v2 offset overflow")

744

.expect("dirstate-v2 offset overflow")

745

.into()

745

.into()

746

}

746

}

747

748

fn child_nodes_len_from_usize(x: usize) -> Size {

748

fn child_nodes_len_from_usize(x: usize) -> Size {

749

u32::try_from(x)

749

u32::try_from(x)

750

// Could only panic with over 4 billion nodes

750

// Could only panic with over 4 billion nodes

751

.expect("dirstate-v2 slice length overflow")

751

.expect("dirstate-v2 slice length overflow")

752

.into()

752

.into()

753

}

753

}

754

755

fn path_len_from_usize(x: usize) -> PathSize {

755

fn path_len_from_usize(x: usize) -> PathSize {

756

u16::try_from(x)

756

u16::try_from(x)

757

// Could only panic for paths over 64 KiB

757

// Could only panic for paths over 64 KiB

758

.expect("dirstate-v2 path length overflow")

758

.expect("dirstate-v2 path length overflow")

759

.into()

759

.into()

760

}

760

}

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # dirstatedocket.py - docket file for dirstate-v2
             #
             # Copyright Mercurial Contributors
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import struct
             from ..revlogutils import docket as docket_mod
             V2_FORMAT_MARKER = b"dirstate-v2\n"
             # Must match the constant of the same name in
             # `rust/hg-core/src/dirstate_tree/on_disk.rs`
             TREE_METADATA_SIZE = 44
             # * 12 bytes: format marker
             # * 32 bytes: node ID of the working directory's first parent
             # * 32 bytes: node ID of the working directory's second parent
             # * 4 bytes: big-endian used size of the data file
             # * {TREE_METADATA_SIZE} bytes: tree metadata, parsed separately
             # * 1 byte: length of the data file's UUID
             # * variable: data file's UUID
             #
             # Node IDs are null-padded if shorter than 32 bytes.
             # A data file shorter than the specified used size is corrupted (truncated)
             HEADER = struct.Struct(
                 ">{}s32s32sL{}sB".format(len(V2_FORMAT_MARKER), TREE_METADATA_SIZE)
             )
             class DirstateDocket(object):
-                data_filename_pattern = b'dirstate.%s.d'
+                data_filename_pattern = b'dirstate.%s'
                 def __init__(self, parents, data_size, tree_metadata, uuid):
                     self.parents = parents
                     self.data_size = data_size
                     self.tree_metadata = tree_metadata
                     self.uuid = uuid
                 @classmethod
                 def with_new_uuid(cls, parents, data_size, tree_metadata):
                     return cls(parents, data_size, tree_metadata, docket_mod.make_uid())
                 @classmethod
                 def parse(cls, data, nodeconstants):
                     if not data:
                         parents = (nodeconstants.nullid, nodeconstants.nullid)
                         return cls(parents, 0, b'', None)
                     marker, p1, p2, data_size, meta, uuid_size = HEADER.unpack_from(data)
                     if marker != V2_FORMAT_MARKER:
                         raise ValueError("expected dirstate-v2 marker")
                     uuid = data[HEADER.size : HEADER.size + uuid_size]
                     p1 = p1[: nodeconstants.nodelen]
                     p2 = p2[: nodeconstants.nodelen]
                     return cls((p1, p2), data_size, meta, uuid)
                 def serialize(self):
                     p1, p2 = self.parents
                     header = HEADER.pack(
                         V2_FORMAT_MARKER,
                         p1,
                         p2,
                         self.data_size,
                         self.tree_metadata,
                         len(self.uuid),
                     )
                     return header + self.uuid
                 def data_filename(self):
                     return self.data_filename_pattern % self.uuid

             //! The "version 2" disk representation of the dirstate
             //!
             //! # File format
             //!
             //! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts
             //! with a fixed-sized header whose layout is defined by the `DocketHeader`
             //! struct, followed by the data file identifier.
             //!
             //! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That
             //! file may be longer than the size given in the docket, but not shorter. Only
             //! the start of the data file up to the given size is considered. The
             //! fixed-size "root" of the dirstate tree whose layout is defined by the
             //! `Root` struct is found at the end of that slice of data.
             //!
             //! Its `root_nodes` field contains the slice (offset and length) to
             //! the nodes representing the files and directories at the root of the
             //! repository. Each node is also fixed-size, defined by the `Node` struct.
             //! Nodes in turn contain slices to variable-size paths, and to their own child
             //! nodes (if any) for nested files and directories.
             use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
             use crate::dirstate_tree::path_with_basename::WithBasename;
             use crate::errors::HgError;
             use crate::utils::hg_path::HgPath;
             use crate::DirstateEntry;
             use crate::DirstateError;
             use crate::DirstateParents;
             use crate::EntryState;
             use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be};
             use bytes_cast::BytesCast;
             use format_bytes::format_bytes;
             use std::borrow::Cow;
             use std::convert::{TryFrom, TryInto};
             use std::time::{Duration, SystemTime, UNIX_EPOCH};
             /// Added at the start of `.hg/dirstate` when the "v2" format is used.
             /// This a redundant sanity check more than an actual "magic number" since
             /// `.hg/requires` already governs which format should be used.
             pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
             /// Keep space for 256-bit hashes
             const STORED_NODE_ID_BYTES: usize = 32;
             /// … even though only 160 bits are used for now, with SHA-1
             const USED_NODE_ID_BYTES: usize = 20;
             pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
             pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
             /// Must match the constant of the same name in
             /// `mercurial/dirstateutils/docket.py`
             const TREE_METADATA_SIZE: usize = 44;
             /// Make sure that size-affecting changes are made knowingly
             #[allow(unused)]
             fn static_assert_size_of() {
                 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
                 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
                 let _ = std::mem::transmute::<Node, [u8; 43]>;
             }
             // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
             #[derive(BytesCast)]
             #[repr(C)]
             struct DocketHeader {
                 marker: [u8; V2_FORMAT_MARKER.len()],
                 parent_1: [u8; STORED_NODE_ID_BYTES],
                 parent_2: [u8; STORED_NODE_ID_BYTES],
                 /// Counted in bytes
                 data_size: Size,
                 metadata: TreeMetadata,
                 uuid_size: u8,
             }
             pub struct Docket<'on_disk> {
                 header: &'on_disk DocketHeader,
                 uuid: &'on_disk [u8],
             }
             #[derive(BytesCast)]
             #[repr(C)]
             struct TreeMetadata {
                 root_nodes: ChildNodes,
                 nodes_with_entry_count: Size,
                 nodes_with_copy_source_count: Size,
                 /// How many bytes of this data file are not used anymore
                 unreachable_bytes: Size,
                 /// Current version always sets these bytes to zero when creating or
                 /// updating a dirstate. Future versions could assign some bits to signal
                 /// for example "the version that last wrote/updated this dirstate did so
                 /// in such and such way that can be relied on by versions that know to."
                 unused: [u8; 4],
                 /// If non-zero, a hash of ignore files that were used for some previous
                 /// run of the `status` algorithm.
                 ///
                 /// We define:
                 ///
                 /// * "Root" ignore files are `.hgignore` at the root of the repository if
                 ///   it exists, and files from `ui.ignore.*` config. This set of files is
                 ///   then sorted by the string representation of their path.
                 /// * The "expanded contents" of an ignore files is the byte string made
                 ///   by concatenating its contents with the "expanded contents" of other
                 ///   files included with `include:` or `subinclude:` files, in inclusion
                 ///   order. This definition is recursive, as included files can
                 ///   themselves include more files.
                 ///
                 /// This hash is defined as the SHA-1 of the concatenation (in sorted
                 /// order) of the "expanded contents" of each "root" ignore file.
                 /// (Note that computing this does not require actually concatenating byte
                 /// strings into contiguous memory, instead SHA-1 hashing can be done
                 /// incrementally.)
                 ignore_patterns_hash: IgnorePatternsHash,
             }
             #[derive(BytesCast)]
             #[repr(C)]
             pub(super) struct Node {
                 full_path: PathSlice,
                 /// In bytes from `self.full_path.start`
                 base_name_start: PathSize,
                 copy_source: OptPathSlice,
                 children: ChildNodes,
                 pub(super) descendants_with_entry_count: Size,
                 pub(super) tracked_descendants_count: Size,
                 /// Depending on the value of `state`:
                 ///
                 /// * A null byte: `data` is not used.
                 ///
                 /// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together
                 ///   represent a dirstate entry like in the v1 format.
                 ///
                 /// * A `d` ASCII byte: the bytes of `data` should instead be interpreted
                 ///   as the `Timestamp` for the mtime of a cached directory.
                 ///
                 ///   The presence of this state means that at some point, this path in
                 ///   the working directory was observed:
                 ///
                 ///   - To be a directory
                 ///   - With the modification time as given by `Timestamp`
                 ///   - That timestamp was already strictly in the past when observed,
                 ///     meaning that later changes cannot happen in the same clock tick
                 ///     and must cause a different modification time (unless the system
                 ///     clock jumps back and we get unlucky, which is not impossible but
                 ///     but deemed unlikely enough).
                 ///   - All direct children of this directory (as returned by
                 ///     `std::fs::read_dir`) either have a corresponding dirstate node, or
                 ///     are ignored by ignore patterns whose hash is in
                 ///     `TreeMetadata::ignore_patterns_hash`.
                 ///
                 ///   This means that if `std::fs::symlink_metadata` later reports the
                 ///   same modification time and ignored patterns haven’t changed, a run
                 ///   of status that is not listing ignored   files can skip calling
                 ///   `std::fs::read_dir` again for this directory,   iterate child
                 ///   dirstate nodes instead.
                 state: u8,
                 data: Entry,
             }
             #[derive(BytesCast, Copy, Clone)]
             #[repr(C)]
             struct Entry {
                 mode: I32Be,
                 mtime: I32Be,
                 size: I32Be,
             }
             /// Duration since the Unix epoch
             #[derive(BytesCast, Copy, Clone, PartialEq)]
             #[repr(C)]
             pub(super) struct Timestamp {
                 seconds: I64Be,
                 /// In `0 .. 1_000_000_000`.
                 ///
                 /// This timestamp is later or earlier than `(seconds, 0)` by this many
                 /// nanoseconds, if `seconds` is non-negative or negative, respectively.
                 nanoseconds: U32Be,
             }
             /// Counted in bytes from the start of the file
             ///
             /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
             type Offset = U32Be;
             /// Counted in number of items
             ///
             /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
             type Size = U32Be;
             /// Counted in bytes
             ///
             /// NOTE: we choose not to support file names/paths longer than 64 KiB.
             type PathSize = U16Be;
             /// A contiguous sequence of `len` times `Node`, representing the child nodes
             /// of either some other node or of the repository root.
             ///
             /// Always sorted by ascending `full_path`, to allow binary search.
             /// Since nodes with the same parent nodes also have the same parent path,
             /// only the `base_name`s need to be compared during binary search.
             #[derive(BytesCast, Copy, Clone)]
             #[repr(C)]
             struct ChildNodes {
                 start: Offset,
                 len: Size,
             }
             /// A `HgPath` of `len` bytes
             #[derive(BytesCast, Copy, Clone)]
             #[repr(C)]
             struct PathSlice {
                 start: Offset,
                 len: PathSize,
             }
             /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
             type OptPathSlice = PathSlice;
             /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
             ///
             /// This should only happen if Mercurial is buggy or a repository is corrupted.
             #[derive(Debug)]
             pub struct DirstateV2ParseError;
             impl From<DirstateV2ParseError> for HgError {
                 fn from(_: DirstateV2ParseError) -> Self {
                     HgError::corrupted("dirstate-v2 parse error")
                 }
             }
             impl From<DirstateV2ParseError> for crate::DirstateError {
                 fn from(error: DirstateV2ParseError) -> Self {
                     HgError::from(error).into()
                 }
             }
             impl<'on_disk> Docket<'on_disk> {
                 pub fn parents(&self) -> DirstateParents {
                     use crate::Node;
                     let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
                         .unwrap()
                         .clone();
                     let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
                         .unwrap()
                         .clone();
                     DirstateParents { p1, p2 }
                 }
                 pub fn tree_metadata(&self) -> &[u8] {
                     self.header.metadata.as_bytes()
                 }
                 pub fn data_size(&self) -> usize {
                     // This `unwrap` could only panic on a 16-bit CPU
                     self.header.data_size.get().try_into().unwrap()
                 }
                 pub fn data_filename(&self) -> String {
-                    String::from_utf8(format_bytes!(b"dirstate.{}.d", self.uuid)).unwrap()
+                    String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
                 }
             }
             pub fn read_docket(
                 on_disk: &[u8],
             ) -> Result<Docket<'_>, DirstateV2ParseError> {
                 let (header, uuid) =
                     DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
                 let uuid_size = header.uuid_size as usize;
                 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
                     Ok(Docket { header, uuid })
                 } else {
                     Err(DirstateV2ParseError)
                 }
             }
             pub(super) fn read<'on_disk>(
                 on_disk: &'on_disk [u8],
                 metadata: &[u8],
             ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
                 if on_disk.is_empty() {
                     return Ok(DirstateMap::empty(on_disk));
                 }
                 let (meta, _) = TreeMetadata::from_bytes(metadata)
                     .map_err(|_| DirstateV2ParseError)?;
                 let dirstate_map = DirstateMap {
                     on_disk,
                     root: dirstate_map::ChildNodes::OnDisk(read_nodes(
                         on_disk,
                         meta.root_nodes,
                     )?),
                     nodes_with_entry_count: meta.nodes_with_entry_count.get(),
                     nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
                     ignore_patterns_hash: meta.ignore_patterns_hash,
                     unreachable_bytes: meta.unreachable_bytes.get(),
                 };
                 Ok(dirstate_map)
             }
             impl Node {
                 pub(super) fn full_path<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
                     read_hg_path(on_disk, self.full_path)
                 }
                 pub(super) fn base_name_start<'on_disk>(
                     &self,
                 ) -> Result<usize, DirstateV2ParseError> {
                     let start = self.base_name_start.get();
                     if start < self.full_path.len.get() {
                         let start = usize::try_from(start)
                             // u32 -> usize, could only panic on a 16-bit CPU
                             .expect("dirstate-v2 base_name_start out of bounds");
                         Ok(start)
                     } else {
                         Err(DirstateV2ParseError)
                     }
                 }
                 pub(super) fn base_name<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
                     let full_path = self.full_path(on_disk)?;
                     let base_name_start = self.base_name_start()?;
                     Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
                 }
                 pub(super) fn path<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
                     Ok(WithBasename::from_raw_parts(
                         Cow::Borrowed(self.full_path(on_disk)?),
                         self.base_name_start()?,
                     ))
                 }
                 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
                     self.copy_source.start.get() != 0
                 }
                 pub(super) fn copy_source<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
                     Ok(if self.has_copy_source() {
                         Some(read_hg_path(on_disk, self.copy_source)?)
                     } else {
                         None
                     })
                 }
                 pub(super) fn node_data(
                     &self,
                 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
                     let entry = |state| {
                         dirstate_map::NodeData::Entry(self.entry_with_given_state(state))
                     };
                     match self.state {
                         b'\0' => Ok(dirstate_map::NodeData::None),
                         b'd' => Ok(dirstate_map::NodeData::CachedDirectory {
                             mtime: *self.data.as_timestamp(),
                         }),
                         b'n' => Ok(entry(EntryState::Normal)),
                         b'a' => Ok(entry(EntryState::Added)),
                         b'r' => Ok(entry(EntryState::Removed)),
                         b'm' => Ok(entry(EntryState::Merged)),
                         _ => Err(DirstateV2ParseError),
                     }
                 }
                 pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {
                     if self.state == b'd' {
                         Some(self.data.as_timestamp())
                     } else {
                         None
                     }
                 }
                 pub(super) fn state(
                     &self,
                 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
                     match self.state {
                         b'\0' | b'd' => Ok(None),
                         b'n' => Ok(Some(EntryState::Normal)),
                         b'a' => Ok(Some(EntryState::Added)),
                         b'r' => Ok(Some(EntryState::Removed)),
                         b'm' => Ok(Some(EntryState::Merged)),
                         _ => Err(DirstateV2ParseError),
                     }
                 }
                 fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry {
                     DirstateEntry {
                         state,
                         mode: self.data.mode.get(),
                         mtime: self.data.mtime.get(),
                         size: self.data.size.get(),
                     }
                 }
                 pub(super) fn entry(
                     &self,
                 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
                     Ok(self
                         .state()?
                         .map(|state| self.entry_with_given_state(state)))
                 }
                 pub(super) fn children<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
                     read_nodes(on_disk, self.children)
                 }
                 pub(super) fn to_in_memory_node<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
                     Ok(dirstate_map::Node {
                         children: dirstate_map::ChildNodes::OnDisk(
                             self.children(on_disk)?,
                         ),
                         copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
                         data: self.node_data()?,
                         descendants_with_entry_count: self
                             .descendants_with_entry_count
                             .get(),
                         tracked_descendants_count: self.tracked_descendants_count.get(),
                     })
                 }
             }
             impl Entry {
                 fn from_timestamp(timestamp: Timestamp) -> Self {
                     // Safety: both types implement the `ByteCast` trait, so we could
                     // safely use `as_bytes` and `from_bytes` to do this conversion. Using
                     // `transmute` instead makes the compiler check that the two types
                     // have the same size, which eliminates the error case of
                     // `from_bytes`.
                     unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }
                 }
                 fn as_timestamp(&self) -> &Timestamp {
                     // Safety: same as above in `from_timestamp`
                     unsafe { &*(self as *const Entry as *const Timestamp) }
                 }
             }
             impl Timestamp {
                 pub fn seconds(&self) -> i64 {
                     self.seconds.get()
                 }
             }
             impl From<SystemTime> for Timestamp {
                 fn from(system_time: SystemTime) -> Self {
                     let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {
                         Ok(duration) => {
                             (duration.as_secs() as i64, duration.subsec_nanos())
                         }
                         Err(error) => {
                             let negative = error.duration();
                             (-(negative.as_secs() as i64), negative.subsec_nanos())
                         }
                     };
                     Timestamp {
                         seconds: secs.into(),
                         nanoseconds: nanos.into(),
                     }
                 }
             }
             impl From<&'_ Timestamp> for SystemTime {
                 fn from(timestamp: &'_ Timestamp) -> Self {
                     let secs = timestamp.seconds.get();
                     let nanos = timestamp.nanoseconds.get();
                     if secs >= 0 {
                         UNIX_EPOCH + Duration::new(secs as u64, nanos)
                     } else {
                         UNIX_EPOCH - Duration::new((-secs) as u64, nanos)
                     }
                 }
             }
             fn read_hg_path(
                 on_disk: &[u8],
                 slice: PathSlice,
             ) -> Result<&HgPath, DirstateV2ParseError> {
                 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
             }
             fn read_nodes(
                 on_disk: &[u8],
                 slice: ChildNodes,
             ) -> Result<&[Node], DirstateV2ParseError> {
                 read_slice(on_disk, slice.start, slice.len.get())
             }
             fn read_slice<T, Len>(
                 on_disk: &[u8],
                 start: Offset,
                 len: Len,
             ) -> Result<&[T], DirstateV2ParseError>
             where
                 T: BytesCast,
                 Len: TryInto<usize>,
             {
                 // Either `usize::MAX` would result in "out of bounds" error since a single
                 // `&[u8]` cannot occupy the entire addess space.
                 let start = start.get().try_into().unwrap_or(std::usize::MAX);
                 let len = len.try_into().unwrap_or(std::usize::MAX);
                 on_disk
                     .get(start..)
                     .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
                     .map(|(slice, _rest)| slice)
                     .ok_or_else(|| DirstateV2ParseError)
             }
             pub(crate) fn for_each_tracked_path<'on_disk>(
                 on_disk: &'on_disk [u8],
                 metadata: &[u8],
                 mut f: impl FnMut(&'on_disk HgPath),
             ) -> Result<(), DirstateV2ParseError> {
                 let (meta, _) = TreeMetadata::from_bytes(metadata)
                     .map_err(|_| DirstateV2ParseError)?;
                 fn recur<'on_disk>(
                     on_disk: &'on_disk [u8],
                     nodes: ChildNodes,
                     f: &mut impl FnMut(&'on_disk HgPath),
                 ) -> Result<(), DirstateV2ParseError> {
                     for node in read_nodes(on_disk, nodes)? {
                         if let Some(state) = node.state()? {
                             if state.is_tracked() {
                                 f(node.full_path(on_disk)?)
                             }
                         }
                         recur(on_disk, node.children, f)?
                     }
                     Ok(())
                 }
                 recur(on_disk, meta.root_nodes, &mut f)
             }
             /// Returns new data and metadata, together with whether that data should be
             /// appended to the existing data file whose content is at
             /// `dirstate_map.on_disk` (true), instead of written to a new data file
             /// (false).
             pub(super) fn write(
                 dirstate_map: &mut DirstateMap,
                 can_append: bool,
             ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
                 let append = can_append && dirstate_map.write_should_append();
                 // This ignores the space for paths, and for nodes without an entry.
                 // TODO: better estimate? Skip the `Vec` and write to a file directly?
                 let size_guess = std::mem::size_of::<Node>()
                     * dirstate_map.nodes_with_entry_count as usize;
                 let mut writer = Writer {
                     dirstate_map,
                     append,
                     out: Vec::with_capacity(size_guess),
                 };
                 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
                 let meta = TreeMetadata {
                     root_nodes,
                     nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
                     nodes_with_copy_source_count: dirstate_map
                         .nodes_with_copy_source_count
                         .into(),
                     unreachable_bytes: dirstate_map.unreachable_bytes.into(),
                     unused: [0; 4],
                     ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
                 };
                 Ok((writer.out, meta.as_bytes().to_vec(), append))
             }
             struct Writer<'dmap, 'on_disk> {
                 dirstate_map: &'dmap DirstateMap<'on_disk>,
                 append: bool,
                 out: Vec<u8>,
             }
             impl Writer<'_, '_> {
                 fn write_nodes(
                     &mut self,
                     nodes: dirstate_map::ChildNodesRef,
                 ) -> Result<ChildNodes, DirstateError> {
                     // Reuse already-written nodes if possible
                     if self.append {
                         if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
                             let start = self.on_disk_offset_of(nodes_slice).expect(
                                 "dirstate-v2 OnDisk nodes not found within on_disk",
                             );
                             let len = child_nodes_len_from_usize(nodes_slice.len());
                             return Ok(ChildNodes { start, len });
                         }
                     }
                     // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
                     // undefined iteration order. Sort to enable binary search in the
                     // written file.
                     let nodes = nodes.sorted();
                     let nodes_len = nodes.len();
                     // First accumulate serialized nodes in a `Vec`
                     let mut on_disk_nodes = Vec::with_capacity(nodes_len);
                     for node in nodes {
                         let children =
                             self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
                         let full_path = node.full_path(self.dirstate_map.on_disk)?;
                         let full_path = self.write_path(full_path.as_bytes());
                         let copy_source = if let Some(source) =
                             node.copy_source(self.dirstate_map.on_disk)?
                         {
                             self.write_path(source.as_bytes())
                         } else {
                             PathSlice {
                                 start: 0.into(),
                                 len: 0.into(),
                             }
                         };
                         on_disk_nodes.push(match node {
                             NodeRef::InMemory(path, node) => {
                                 let (state, data) = match &node.data {
                                     dirstate_map::NodeData::Entry(entry) => (
                                         entry.state.into(),
                                         Entry {
                                             mode: entry.mode.into(),
                                             mtime: entry.mtime.into(),
                                             size: entry.size.into(),
                                         },
                                     ),
                                     dirstate_map::NodeData::CachedDirectory { mtime } => {
                                         (b'd', Entry::from_timestamp(*mtime))
                                     }
                                     dirstate_map::NodeData::None => (
                                         b'\0',
                                         Entry {
                                             mode: 0.into(),
                                             mtime: 0.into(),
                                             size: 0.into(),
                                         },
                                     ),
                                 };
                                 Node {
                                     children,
                                     copy_source,
                                     full_path,
                                     base_name_start: u16::try_from(path.base_name_start())
                                         // Could only panic for paths over 64 KiB
                                         .expect("dirstate-v2 path length overflow")
                                         .into(),
                                     descendants_with_entry_count: node
                                         .descendants_with_entry_count
                                         .into(),
                                     tracked_descendants_count: node
                                         .tracked_descendants_count
                                         .into(),
                                     state,
                                     data,
                                 }
                             }
                             NodeRef::OnDisk(node) => Node {
                                 children,
                                 copy_source,
                                 full_path,
                                 ..*node
                             },
                         })
                     }
                     // … so we can write them contiguously, after writing everything else
                     // they refer to.
                     let start = self.current_offset();
                     let len = child_nodes_len_from_usize(nodes_len);
                     self.out.extend(on_disk_nodes.as_bytes());
                     Ok(ChildNodes { start, len })
                 }
                 /// If the given slice of items is within `on_disk`, returns its offset
                 /// from the start of `on_disk`.
                 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
                 where
                     T: BytesCast,
                 {
                     fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
                         let start = slice.as_ptr() as usize;
                         let end = start + slice.len();
                         start..=end
                     }
                     let slice_addresses = address_range(slice.as_bytes());
                     let on_disk_addresses = address_range(self.dirstate_map.on_disk);
                     if on_disk_addresses.contains(slice_addresses.start())
                         && on_disk_addresses.contains(slice_addresses.end())
                     {
                         let offset = slice_addresses.start() - on_disk_addresses.start();
                         Some(offset_from_usize(offset))
                     } else {
                         None
                     }
                 }
                 fn current_offset(&mut self) -> Offset {
                     let mut offset = self.out.len();
                     if self.append {
                         offset += self.dirstate_map.on_disk.len()
                     }
                     offset_from_usize(offset)
                 }
                 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
                     let len = path_len_from_usize(slice.len());
                     // Reuse an already-written path if possible
                     if self.append {
                         if let Some(start) = self.on_disk_offset_of(slice) {
                             return PathSlice { start, len };
                         }
                     }
                     let start = self.current_offset();
                     self.out.extend(slice.as_bytes());
                     PathSlice { start, len }
                 }
             }
             fn offset_from_usize(x: usize) -> Offset {
                 u32::try_from(x)
                     // Could only panic for a dirstate file larger than 4 GiB
                     .expect("dirstate-v2 offset overflow")
                     .into()
             }
             fn child_nodes_len_from_usize(x: usize) -> Size {
                 u32::try_from(x)
                     // Could only panic with over 4 billion nodes
                     .expect("dirstate-v2 slice length overflow")
                     .into()
             }
             fn path_len_from_usize(x: usize) -> PathSize {
                 u16::try_from(x)
                     // Could only panic for paths over 64 KiB
                     .expect("dirstate-v2 path length overflow")
                     .into()
             }