upstream/mercurial-mirror Commit - r48977:d467e44f

1

//! The "version 2" disk representation of the dirstate

1

//! The "version 2" disk representation of the dirstate

2

//!

2

//!

3

//! # File format

3

//! # File format

4

//!

4

//!

5

//! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts

5

//! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts

6

//! with a fixed-sized header whose layout is defined by the `DocketHeader`

6

//! with a fixed-sized header whose layout is defined by the `DocketHeader`

7

//! struct, followed by the data file identifier.

7

//! struct, followed by the data file identifier.

8

//!

8

//!

9

//! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That

9

//! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That

10

//! file may be longer than the size given in the docket, but not shorter. Only

10

//! file may be longer than the size given in the docket, but not shorter. Only

11

//! the start of the data file up to the given size is considered. The

11

//! the start of the data file up to the given size is considered. The

12

//! fixed-size "root" of the dirstate tree whose layout is defined by the

12

//! fixed-size "root" of the dirstate tree whose layout is defined by the

13

//! `Root` struct is found at the end of that slice of data.

13

//! `Root` struct is found at the end of that slice of data.

14

//!

14

//!

15

//! Its `root_nodes` field contains the slice (offset and length) to

15

//! Its `root_nodes` field contains the slice (offset and length) to

16

//! the nodes representing the files and directories at the root of the

16

//! the nodes representing the files and directories at the root of the

17

//! repository. Each node is also fixed-size, defined by the `Node` struct.

17

//! repository. Each node is also fixed-size, defined by the `Node` struct.

18

//! Nodes in turn contain slices to variable-size paths, and to their own child

18

//! Nodes in turn contain slices to variable-size paths, and to their own child

19

//! nodes (if any) for nested files and directories.

19

//! nodes (if any) for nested files and directories.

20

21

use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};

21

use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};

22

use crate::dirstate_tree::path_with_basename::WithBasename;

22

use crate::dirstate_tree::path_with_basename::WithBasename;

23

use crate::errors::HgError;

23

use crate::errors::HgError;

24

use crate::utils::hg_path::HgPath;

24

use crate::utils::hg_path::HgPath;

25

use crate::DirstateEntry;

25

use crate::DirstateEntry;

26

use crate::DirstateError;

26

use crate::DirstateError;

27

use crate::DirstateParents;

27

use crate::DirstateParents;

28

use bitflags::bitflags;

28

use bitflags::bitflags;

29

use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be};

29

use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be};

30

use bytes_cast::BytesCast;

30

use bytes_cast::BytesCast;

31

use format_bytes::format_bytes;

31

use format_bytes::format_bytes;

32

use std::borrow::Cow;

32

use std::borrow::Cow;

33

use std::convert::{TryFrom, TryInto};

33

use std::convert::{TryFrom, TryInto};

34

use std::time::{Duration, SystemTime, UNIX_EPOCH};

34

use std::time::{Duration, SystemTime, UNIX_EPOCH};

35

36

/// Added at the start of `.hg/dirstate` when the "v2" format is used.

36

/// Added at the start of `.hg/dirstate` when the "v2" format is used.

37

/// This a redundant sanity check more than an actual "magic number" since

37

/// This a redundant sanity check more than an actual "magic number" since

38

/// `.hg/requires` already governs which format should be used.

38

/// `.hg/requires` already governs which format should be used.

39

pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";

39

pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";

40

41

/// Keep space for 256-bit hashes

41

/// Keep space for 256-bit hashes

42

const STORED_NODE_ID_BYTES: usize = 32;

42

const STORED_NODE_ID_BYTES: usize = 32;

43

44

/// … even though only 160 bits are used for now, with SHA-1

44

/// … even though only 160 bits are used for now, with SHA-1

45

const USED_NODE_ID_BYTES: usize = 20;

45

const USED_NODE_ID_BYTES: usize = 20;

46

47

pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;

47

pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;

48

pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];

48

pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];

49

50

/// Must match the constant of the same name in

50

/// Must match the constant of the same name in

51

/// `mercurial/dirstateutils/docket.py`

51

/// `mercurial/dirstateutils/docket.py`

52

const TREE_METADATA_SIZE: usize = 44;

52

const TREE_METADATA_SIZE: usize = 44;

53

54

/// Make sure that size-affecting changes are made knowingly

54

/// Make sure that size-affecting changes are made knowingly

55

#[allow(unused)]

55

#[allow(unused)]

56

fn static_assert_size_of() {

56

fn static_assert_size_of() {

57

let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;

57

let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;

58

let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;

58

let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;

59

let _ = std::mem::transmute::<Node, [u8; 43]>;

59

let _ = std::mem::transmute::<Node, [u8; 43]>;

60

}

60

}

61

62

// Must match `HEADER` in `mercurial/dirstateutils/docket.py`

62

// Must match `HEADER` in `mercurial/dirstateutils/docket.py`

63

#[derive(BytesCast)]

63

#[derive(BytesCast)]

64

#[repr(C)]

64

#[repr(C)]

65

struct DocketHeader {

65

struct DocketHeader {

66

marker: [u8; V2_FORMAT_MARKER.len()],

66

marker: [u8; V2_FORMAT_MARKER.len()],

67

parent_1: [u8; STORED_NODE_ID_BYTES],

67

parent_1: [u8; STORED_NODE_ID_BYTES],

68

parent_2: [u8; STORED_NODE_ID_BYTES],

68

parent_2: [u8; STORED_NODE_ID_BYTES],

69

70

metadata: TreeMetadata,

71

70

/// Counted in bytes

72

/// Counted in bytes

71

data_size: Size,

73

data_size: Size,

72

74

73

metadata: TreeMetadata,

74

75

uuid_size: u8,

75

uuid_size: u8,

76

}

76

}

77

78

pub struct Docket<'on_disk> {

78

pub struct Docket<'on_disk> {

79

header: &'on_disk DocketHeader,

79

header: &'on_disk DocketHeader,

80

uuid: &'on_disk [u8],

80

uuid: &'on_disk [u8],

81

}

81

}

82

83

#[derive(BytesCast)]

83

#[derive(BytesCast)]

84

#[repr(C)]

84

#[repr(C)]

85

struct TreeMetadata {

85

struct TreeMetadata {

86

root_nodes: ChildNodes,

86

root_nodes: ChildNodes,

87

nodes_with_entry_count: Size,

87

nodes_with_entry_count: Size,

88

nodes_with_copy_source_count: Size,

88

nodes_with_copy_source_count: Size,

89

90

/// How many bytes of this data file are not used anymore

90

/// How many bytes of this data file are not used anymore

91

unreachable_bytes: Size,

91

unreachable_bytes: Size,

92

93

/// Current version always sets these bytes to zero when creating or

93

/// Current version always sets these bytes to zero when creating or

94

/// updating a dirstate. Future versions could assign some bits to signal

94

/// updating a dirstate. Future versions could assign some bits to signal

95

/// for example "the version that last wrote/updated this dirstate did so

95

/// for example "the version that last wrote/updated this dirstate did so

96

/// in such and such way that can be relied on by versions that know to."

96

/// in such and such way that can be relied on by versions that know to."

97

unused: [u8; 4],

97

unused: [u8; 4],

98

99

/// If non-zero, a hash of ignore files that were used for some previous

99

/// If non-zero, a hash of ignore files that were used for some previous

100

/// run of the `status` algorithm.

100

/// run of the `status` algorithm.

101

///

101

///

102

/// We define:

102

/// We define:

103

///

103

///

104

/// * "Root" ignore files are `.hgignore` at the root of the repository if

104

/// * "Root" ignore files are `.hgignore` at the root of the repository if

105

/// it exists, and files from `ui.ignore.*` config. This set of files is

105

/// it exists, and files from `ui.ignore.*` config. This set of files is

106

/// then sorted by the string representation of their path.

106

/// then sorted by the string representation of their path.

107

/// * The "expanded contents" of an ignore files is the byte string made

107

/// * The "expanded contents" of an ignore files is the byte string made

108

/// by concatenating its contents with the "expanded contents" of other

108

/// by concatenating its contents with the "expanded contents" of other

109

/// files included with `include:` or `subinclude:` files, in inclusion

109

/// files included with `include:` or `subinclude:` files, in inclusion

110

/// order. This definition is recursive, as included files can

110

/// order. This definition is recursive, as included files can

111

/// themselves include more files.

111

/// themselves include more files.

112

///

112

///

113

/// This hash is defined as the SHA-1 of the concatenation (in sorted

113

/// This hash is defined as the SHA-1 of the concatenation (in sorted

114

/// order) of the "expanded contents" of each "root" ignore file.

114

/// order) of the "expanded contents" of each "root" ignore file.

115

/// (Note that computing this does not require actually concatenating byte

115

/// (Note that computing this does not require actually concatenating byte

116

/// strings into contiguous memory, instead SHA-1 hashing can be done

116

/// strings into contiguous memory, instead SHA-1 hashing can be done

117

/// incrementally.)

117

/// incrementally.)

118

ignore_patterns_hash: IgnorePatternsHash,

118

ignore_patterns_hash: IgnorePatternsHash,

119

}

119

}

120

121

#[derive(BytesCast)]

121

#[derive(BytesCast)]

122

#[repr(C)]

122

#[repr(C)]

123

pub(super) struct Node {

123

pub(super) struct Node {

124

full_path: PathSlice,

124

full_path: PathSlice,

125

126

/// In bytes from `self.full_path.start`

126

/// In bytes from `self.full_path.start`

127

base_name_start: PathSize,

127

base_name_start: PathSize,

128

129

copy_source: OptPathSlice,

129

copy_source: OptPathSlice,

130

children: ChildNodes,

130

children: ChildNodes,

131

pub(super) descendants_with_entry_count: Size,

131

pub(super) descendants_with_entry_count: Size,

132

pub(super) tracked_descendants_count: Size,

132

pub(super) tracked_descendants_count: Size,

133

134

/// Depending on the bits in `flags`:

134

/// Depending on the bits in `flags`:

135

///

135

///

136

/// * If any of `WDIR_TRACKED`, `P1_TRACKED`, or `P2_INFO` are set, the

136

/// * If any of `WDIR_TRACKED`, `P1_TRACKED`, or `P2_INFO` are set, the

137

/// node has an entry.

137

/// node has an entry.

138

///

138

///

139

/// - If `HAS_MODE_AND_SIZE` is set, `data.mode` and `data.size` are

139

/// - If `HAS_MODE_AND_SIZE` is set, `data.mode` and `data.size` are

140

/// meaningful. Otherwise they are set to zero

140

/// meaningful. Otherwise they are set to zero

141

/// - If `HAS_MTIME` is set, `data.mtime` is meaningful. Otherwise it is

141

/// - If `HAS_MTIME` is set, `data.mtime` is meaningful. Otherwise it is

142

/// set to zero.

142

/// set to zero.

143

///

143

///

144

/// * If none of `WDIR_TRACKED`, `P1_TRACKED`, `P2_INFO`, or `HAS_MTIME`

144

/// * If none of `WDIR_TRACKED`, `P1_TRACKED`, `P2_INFO`, or `HAS_MTIME`

145

/// are set, the node does not have an entry and `data` is set to all

145

/// are set, the node does not have an entry and `data` is set to all

146

/// zeros.

146

/// zeros.

147

///

147

///

148

/// * If none of `WDIR_TRACKED`, `P1_TRACKED`, `P2_INFO` are set, but

148

/// * If none of `WDIR_TRACKED`, `P1_TRACKED`, `P2_INFO` are set, but

149

/// `HAS_MTIME` is set, the bytes of `data` should instead be

149

/// `HAS_MTIME` is set, the bytes of `data` should instead be

150

/// interpreted as the `Timestamp` for the mtime of a cached directory.

150

/// interpreted as the `Timestamp` for the mtime of a cached directory.

151

///

151

///

152

/// The presence of this combination of flags means that at some point,

152

/// The presence of this combination of flags means that at some point,

153

/// this path in the working directory was observed:

153

/// this path in the working directory was observed:

154

///

154

///

155

/// - To be a directory

155

/// - To be a directory

156

/// - With the modification time as given by `Timestamp`

156

/// - With the modification time as given by `Timestamp`

157

/// - That timestamp was already strictly in the past when observed,

157

/// - That timestamp was already strictly in the past when observed,

158

/// meaning that later changes cannot happen in the same clock tick

158

/// meaning that later changes cannot happen in the same clock tick

159

/// and must cause a different modification time (unless the system

159

/// and must cause a different modification time (unless the system

160

/// clock jumps back and we get unlucky, which is not impossible but

160

/// clock jumps back and we get unlucky, which is not impossible but

161

/// but deemed unlikely enough).

161

/// but deemed unlikely enough).

162

/// - All direct children of this directory (as returned by

162

/// - All direct children of this directory (as returned by

163

/// `std::fs::read_dir`) either have a corresponding dirstate node, or

163

/// `std::fs::read_dir`) either have a corresponding dirstate node, or

164

/// are ignored by ignore patterns whose hash is in

164

/// are ignored by ignore patterns whose hash is in

165

/// `TreeMetadata::ignore_patterns_hash`.

165

/// `TreeMetadata::ignore_patterns_hash`.

166

///

166

///

167

/// This means that if `std::fs::symlink_metadata` later reports the

167

/// This means that if `std::fs::symlink_metadata` later reports the

168

/// same modification time and ignored patterns haven’t changed, a run

168

/// same modification time and ignored patterns haven’t changed, a run

169

/// of status that is not listing ignored files can skip calling

169

/// of status that is not listing ignored files can skip calling

170

/// `std::fs::read_dir` again for this directory, iterate child

170

/// `std::fs::read_dir` again for this directory, iterate child

171

/// dirstate nodes instead.

171

/// dirstate nodes instead.

172

flags: Flags,

172

flags: Flags,

173

data: Entry,

173

data: Entry,

174

}

174

}

175

176

bitflags! {

176

bitflags! {

177

#[derive(BytesCast)]

177

#[derive(BytesCast)]

178

#[repr(C)]

178

#[repr(C)]

179

struct Flags: u8 {

179

struct Flags: u8 {

180

const WDIR_TRACKED = 1 << 0;

180

const WDIR_TRACKED = 1 << 0;

181

const P1_TRACKED = 1 << 1;

181

const P1_TRACKED = 1 << 1;

182

const P2_INFO = 1 << 2;

182

const P2_INFO = 1 << 2;

183

const HAS_MODE_AND_SIZE = 1 << 3;

183

const HAS_MODE_AND_SIZE = 1 << 3;

184

const HAS_MTIME = 1 << 4;

184

const HAS_MTIME = 1 << 4;

185

}

185

}

186

}

186

}

187

188

#[derive(BytesCast, Copy, Clone, Debug)]

188

#[derive(BytesCast, Copy, Clone, Debug)]

189

#[repr(C)]

189

#[repr(C)]

190

struct Entry {

190

struct Entry {

191

mode: I32Be,

191

mode: I32Be,

192

mtime: I32Be,

192

mtime: I32Be,

193

size: I32Be,

193

size: I32Be,

194

}

194

}

195

196

/// Duration since the Unix epoch

196

/// Duration since the Unix epoch

197

#[derive(BytesCast, Copy, Clone, PartialEq)]

197

#[derive(BytesCast, Copy, Clone, PartialEq)]

198

#[repr(C)]

198

#[repr(C)]

199

pub(super) struct Timestamp {

199

pub(super) struct Timestamp {

200

seconds: I64Be,

200

seconds: I64Be,

201

202

/// In `0 .. 1_000_000_000`.

202

/// In `0 .. 1_000_000_000`.

203

///

203

///

204

/// This timestamp is later or earlier than `(seconds, 0)` by this many

204

/// This timestamp is later or earlier than `(seconds, 0)` by this many

205

/// nanoseconds, if `seconds` is non-negative or negative, respectively.

205

/// nanoseconds, if `seconds` is non-negative or negative, respectively.

206

nanoseconds: U32Be,

206

nanoseconds: U32Be,

207

}

207

}

208

209

/// Counted in bytes from the start of the file

209

/// Counted in bytes from the start of the file

210

///

210

///

211

/// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.

211

/// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.

212

type Offset = U32Be;

212

type Offset = U32Be;

213

214

/// Counted in number of items

214

/// Counted in number of items

215

///

215

///

216

/// NOTE: we choose not to support counting more than 4 billion nodes anywhere.

216

/// NOTE: we choose not to support counting more than 4 billion nodes anywhere.

217

type Size = U32Be;

217

type Size = U32Be;

218

219

/// Counted in bytes

219

/// Counted in bytes

220

///

220

///

221

/// NOTE: we choose not to support file names/paths longer than 64 KiB.

221

/// NOTE: we choose not to support file names/paths longer than 64 KiB.

222

type PathSize = U16Be;

222

type PathSize = U16Be;

223

224

/// A contiguous sequence of `len` times `Node`, representing the child nodes

224

/// A contiguous sequence of `len` times `Node`, representing the child nodes

225

/// of either some other node or of the repository root.

225

/// of either some other node or of the repository root.

226

///

226

///

227

/// Always sorted by ascending `full_path`, to allow binary search.

227

/// Always sorted by ascending `full_path`, to allow binary search.

228

/// Since nodes with the same parent nodes also have the same parent path,

228

/// Since nodes with the same parent nodes also have the same parent path,

229

/// only the `base_name`s need to be compared during binary search.

229

/// only the `base_name`s need to be compared during binary search.

230

#[derive(BytesCast, Copy, Clone)]

230

#[derive(BytesCast, Copy, Clone)]

231

#[repr(C)]

231

#[repr(C)]

232

struct ChildNodes {

232

struct ChildNodes {

233

start: Offset,

233

start: Offset,

234

len: Size,

234

len: Size,

235

}

235

}

236

237

/// A `HgPath` of `len` bytes

237

/// A `HgPath` of `len` bytes

238

#[derive(BytesCast, Copy, Clone)]

238

#[derive(BytesCast, Copy, Clone)]

239

#[repr(C)]

239

#[repr(C)]

240

struct PathSlice {

240

struct PathSlice {

241

start: Offset,

241

start: Offset,

242

len: PathSize,

242

len: PathSize,

243

}

243

}

244

245

/// Either nothing if `start == 0`, or a `HgPath` of `len` bytes

245

/// Either nothing if `start == 0`, or a `HgPath` of `len` bytes

246

type OptPathSlice = PathSlice;

246

type OptPathSlice = PathSlice;

247

248

/// Unexpected file format found in `.hg/dirstate` with the "v2" format.

248

/// Unexpected file format found in `.hg/dirstate` with the "v2" format.

249

///

249

///

250

/// This should only happen if Mercurial is buggy or a repository is corrupted.

250

/// This should only happen if Mercurial is buggy or a repository is corrupted.

251

#[derive(Debug)]

251

#[derive(Debug)]

252

pub struct DirstateV2ParseError;

252

pub struct DirstateV2ParseError;

253

254

impl From<DirstateV2ParseError> for HgError {

254

impl From<DirstateV2ParseError> for HgError {

255

fn from(_: DirstateV2ParseError) -> Self {

255

fn from(_: DirstateV2ParseError) -> Self {

256

HgError::corrupted("dirstate-v2 parse error")

256

HgError::corrupted("dirstate-v2 parse error")

257

}

257

}

258

}

258

}

259

260

impl From<DirstateV2ParseError> for crate::DirstateError {

260

impl From<DirstateV2ParseError> for crate::DirstateError {

261

fn from(error: DirstateV2ParseError) -> Self {

261

fn from(error: DirstateV2ParseError) -> Self {

262

HgError::from(error).into()

262

HgError::from(error).into()

263

}

263

}

264

}

264

}

265

266

impl<'on_disk> Docket<'on_disk> {

266

impl<'on_disk> Docket<'on_disk> {

267

pub fn parents(&self) -> DirstateParents {

267

pub fn parents(&self) -> DirstateParents {

268

use crate::Node;

268

use crate::Node;

269

let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])

269

let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])

270

.unwrap()

270

.unwrap()

271

.clone();

271

.clone();

272

let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])

272

let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])

273

.unwrap()

273

.unwrap()

274

.clone();

274

.clone();

275

DirstateParents { p1, p2 }

275

DirstateParents { p1, p2 }

276

}

276

}

277

278

pub fn tree_metadata(&self) -> &[u8] {

278

pub fn tree_metadata(&self) -> &[u8] {

279

self.header.metadata.as_bytes()

279

self.header.metadata.as_bytes()

280

}

280

}

281

282

pub fn data_size(&self) -> usize {

282

pub fn data_size(&self) -> usize {

283

// This `unwrap` could only panic on a 16-bit CPU

283

// This `unwrap` could only panic on a 16-bit CPU

284

self.header.data_size.get().try_into().unwrap()

284

self.header.data_size.get().try_into().unwrap()

285

}

285

}

286

287

pub fn data_filename(&self) -> String {

287

pub fn data_filename(&self) -> String {

288

String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()

288

String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()

289

}

289

}

290

}

290

}

291

292

pub fn read_docket(

292

pub fn read_docket(

293

on_disk: &[u8],

293

on_disk: &[u8],

294

) -> Result<Docket<'_>, DirstateV2ParseError> {

294

) -> Result<Docket<'_>, DirstateV2ParseError> {

295

let (header, uuid) =

295

let (header, uuid) =

296

DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;

296

DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;

297

let uuid_size = header.uuid_size as usize;

297

let uuid_size = header.uuid_size as usize;

298

if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {

298

if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {

299

Ok(Docket { header, uuid })

299

Ok(Docket { header, uuid })

300

} else {

300

} else {

301

Err(DirstateV2ParseError)

301

Err(DirstateV2ParseError)

302

}

302

}

303

}

303

}

304

305

pub(super) fn read<'on_disk>(

305

pub(super) fn read<'on_disk>(

306

on_disk: &'on_disk [u8],

306

on_disk: &'on_disk [u8],

307

metadata: &[u8],

307

metadata: &[u8],

308

) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {

308

) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {

309

if on_disk.is_empty() {

309

if on_disk.is_empty() {

310

return Ok(DirstateMap::empty(on_disk));

310

return Ok(DirstateMap::empty(on_disk));

311

}

311

}

312

let (meta, _) = TreeMetadata::from_bytes(metadata)

312

let (meta, _) = TreeMetadata::from_bytes(metadata)

313

.map_err(|_| DirstateV2ParseError)?;

313

.map_err(|_| DirstateV2ParseError)?;

314

let dirstate_map = DirstateMap {

314

let dirstate_map = DirstateMap {

315

on_disk,

315

on_disk,

316

root: dirstate_map::ChildNodes::OnDisk(read_nodes(

316

root: dirstate_map::ChildNodes::OnDisk(read_nodes(

317

on_disk,

317

on_disk,

318

meta.root_nodes,

318

meta.root_nodes,

319

)?),

319

)?),

320

nodes_with_entry_count: meta.nodes_with_entry_count.get(),

320

nodes_with_entry_count: meta.nodes_with_entry_count.get(),

321

nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),

321

nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),

322

ignore_patterns_hash: meta.ignore_patterns_hash,

322

ignore_patterns_hash: meta.ignore_patterns_hash,

323

unreachable_bytes: meta.unreachable_bytes.get(),

323

unreachable_bytes: meta.unreachable_bytes.get(),

324

};

324

};

325

Ok(dirstate_map)

325

Ok(dirstate_map)

326

}

326

}

327

328

impl Node {

328

impl Node {

329

pub(super) fn full_path<'on_disk>(

329

pub(super) fn full_path<'on_disk>(

330

&self,

330

&self,

331

on_disk: &'on_disk [u8],

331

on_disk: &'on_disk [u8],

332

) -> Result<&'on_disk HgPath, DirstateV2ParseError> {

332

) -> Result<&'on_disk HgPath, DirstateV2ParseError> {

333

read_hg_path(on_disk, self.full_path)

333

read_hg_path(on_disk, self.full_path)

334

}

334

}

335

336

pub(super) fn base_name_start<'on_disk>(

336

pub(super) fn base_name_start<'on_disk>(

337

&self,

337

&self,

338

) -> Result<usize, DirstateV2ParseError> {

338

) -> Result<usize, DirstateV2ParseError> {

339

let start = self.base_name_start.get();

339

let start = self.base_name_start.get();

340

if start < self.full_path.len.get() {

340

if start < self.full_path.len.get() {

341

let start = usize::try_from(start)

341

let start = usize::try_from(start)

342

// u32 -> usize, could only panic on a 16-bit CPU

342

// u32 -> usize, could only panic on a 16-bit CPU

343

.expect("dirstate-v2 base_name_start out of bounds");

343

.expect("dirstate-v2 base_name_start out of bounds");

344

Ok(start)

344

Ok(start)

345

} else {

345

} else {

346

Err(DirstateV2ParseError)

346

Err(DirstateV2ParseError)

347

}

347

}

348

}

348

}

349

350

pub(super) fn base_name<'on_disk>(

350

pub(super) fn base_name<'on_disk>(

351

&self,

351

&self,

352

on_disk: &'on_disk [u8],

352

on_disk: &'on_disk [u8],

353

) -> Result<&'on_disk HgPath, DirstateV2ParseError> {

353

) -> Result<&'on_disk HgPath, DirstateV2ParseError> {

354

let full_path = self.full_path(on_disk)?;

354

let full_path = self.full_path(on_disk)?;

355

let base_name_start = self.base_name_start()?;

355

let base_name_start = self.base_name_start()?;

356

Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))

356

Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))

357

}

357

}

358

359

pub(super) fn path<'on_disk>(

359

pub(super) fn path<'on_disk>(

360

&self,

360

&self,

361

on_disk: &'on_disk [u8],

361

on_disk: &'on_disk [u8],

362

) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {

362

) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {

363

Ok(WithBasename::from_raw_parts(

363

Ok(WithBasename::from_raw_parts(

364

Cow::Borrowed(self.full_path(on_disk)?),

364

Cow::Borrowed(self.full_path(on_disk)?),

365

self.base_name_start()?,

365

self.base_name_start()?,

366

))

366

))

367

}

367

}

368

369

pub(super) fn has_copy_source<'on_disk>(&self) -> bool {

369

pub(super) fn has_copy_source<'on_disk>(&self) -> bool {

370

self.copy_source.start.get() != 0

370

self.copy_source.start.get() != 0

371

}

371

}

372

373

pub(super) fn copy_source<'on_disk>(

373

pub(super) fn copy_source<'on_disk>(

374

&self,

374

&self,

375

on_disk: &'on_disk [u8],

375

on_disk: &'on_disk [u8],

376

) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {

376

) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {

377

Ok(if self.has_copy_source() {

377

Ok(if self.has_copy_source() {

378

Some(read_hg_path(on_disk, self.copy_source)?)

378

Some(read_hg_path(on_disk, self.copy_source)?)

379

} else {

379

} else {

380

None

380

None

381

})

381

})

382

}

382

}

383

384

fn has_entry(&self) -> bool {

384

fn has_entry(&self) -> bool {

385

self.flags.intersects(

385

self.flags.intersects(

386

Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,

386

Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,

387

)

387

)

388

}

388

}

389

390

pub(super) fn node_data(

390

pub(super) fn node_data(

391

&self,

391

&self,

392

) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {

392

) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {

393

if self.has_entry() {

393

if self.has_entry() {

394

Ok(dirstate_map::NodeData::Entry(self.assume_entry()))

394

Ok(dirstate_map::NodeData::Entry(self.assume_entry()))

395

} else if let Some(&mtime) = self.cached_directory_mtime() {

395

} else if let Some(&mtime) = self.cached_directory_mtime() {

396

Ok(dirstate_map::NodeData::CachedDirectory { mtime })

396

Ok(dirstate_map::NodeData::CachedDirectory { mtime })

397

} else {

397

} else {

398

Ok(dirstate_map::NodeData::None)

398

Ok(dirstate_map::NodeData::None)

399

}

399

}

400

}

400

}

401

402

pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {

402

pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {

403

if self.flags.contains(Flags::HAS_MTIME) && !self.has_entry() {

403

if self.flags.contains(Flags::HAS_MTIME) && !self.has_entry() {

404

Some(self.data.as_timestamp())

404

Some(self.data.as_timestamp())

405

} else {

405

} else {

406

None

406

None

407

}

407

}

408

}

408

}

409

410

fn assume_entry(&self) -> DirstateEntry {

410

fn assume_entry(&self) -> DirstateEntry {

411

// TODO: convert through raw bits instead?

411

// TODO: convert through raw bits instead?

412

let wdir_tracked = self.flags.contains(Flags::WDIR_TRACKED);

412

let wdir_tracked = self.flags.contains(Flags::WDIR_TRACKED);

413

let p1_tracked = self.flags.contains(Flags::P1_TRACKED);

413

let p1_tracked = self.flags.contains(Flags::P1_TRACKED);

414

let p2_info = self.flags.contains(Flags::P2_INFO);

414

let p2_info = self.flags.contains(Flags::P2_INFO);

415

let mode_size = if self.flags.contains(Flags::HAS_MODE_AND_SIZE) {

415

let mode_size = if self.flags.contains(Flags::HAS_MODE_AND_SIZE) {

416

Some((self.data.mode.into(), self.data.size.into()))

416

Some((self.data.mode.into(), self.data.size.into()))

417

} else {

417

} else {

418

None

418

None

419

};

419

};

420

let mtime = if self.flags.contains(Flags::HAS_MTIME) {

420

let mtime = if self.flags.contains(Flags::HAS_MTIME) {

421

Some(self.data.mtime.into())

421

Some(self.data.mtime.into())

422

} else {

422

} else {

423

None

423

None

424

};

424

};

425

DirstateEntry::from_v2_data(

425

DirstateEntry::from_v2_data(

426

wdir_tracked,

426

wdir_tracked,

427

p1_tracked,

427

p1_tracked,

428

p2_info,

428

p2_info,

429

mode_size,

429

mode_size,

430

mtime,

430

mtime,

431

)

431

)

432

}

432

}

433

434

pub(super) fn entry(

434

pub(super) fn entry(

435

&self,

435

&self,

436

) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {

436

) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {

437

if self.has_entry() {

437

if self.has_entry() {

438

Ok(Some(self.assume_entry()))

438

Ok(Some(self.assume_entry()))

439

} else {

439

} else {

440

Ok(None)

440

Ok(None)

441

}

441

}

442

}

442

}

443

444

pub(super) fn children<'on_disk>(

444

pub(super) fn children<'on_disk>(

445

&self,

445

&self,

446

on_disk: &'on_disk [u8],

446

on_disk: &'on_disk [u8],

447

) -> Result<&'on_disk [Node], DirstateV2ParseError> {

447

) -> Result<&'on_disk [Node], DirstateV2ParseError> {

448

read_nodes(on_disk, self.children)

448

read_nodes(on_disk, self.children)

449

}

449

}

450

451

pub(super) fn to_in_memory_node<'on_disk>(

451

pub(super) fn to_in_memory_node<'on_disk>(

452

&self,

452

&self,

453

on_disk: &'on_disk [u8],

453

on_disk: &'on_disk [u8],

454

) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {

454

) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {

455

Ok(dirstate_map::Node {

455

Ok(dirstate_map::Node {

456

children: dirstate_map::ChildNodes::OnDisk(

456

children: dirstate_map::ChildNodes::OnDisk(

457

self.children(on_disk)?,

457

self.children(on_disk)?,

458

),

458

),

459

copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),

459

copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),

460

data: self.node_data()?,

460

data: self.node_data()?,

461

descendants_with_entry_count: self

461

descendants_with_entry_count: self

462

.descendants_with_entry_count

462

.descendants_with_entry_count

463

.get(),

463

.get(),

464

tracked_descendants_count: self.tracked_descendants_count.get(),

464

tracked_descendants_count: self.tracked_descendants_count.get(),

465

})

465

})

466

}

466

}

467

}

467

}

468

469

impl Entry {

469

impl Entry {

470

fn from_dirstate_entry(entry: &DirstateEntry) -> (Flags, Self) {

470

fn from_dirstate_entry(entry: &DirstateEntry) -> (Flags, Self) {

471

let (wdir_tracked, p1_tracked, p2_info, mode_size_opt, mtime_opt) =

471

let (wdir_tracked, p1_tracked, p2_info, mode_size_opt, mtime_opt) =

472

entry.v2_data();

472

entry.v2_data();

473

// TODO: convert throug raw flag bits instead?

473

// TODO: convert throug raw flag bits instead?

474

let mut flags = Flags::empty();

474

let mut flags = Flags::empty();

475

flags.set(Flags::WDIR_TRACKED, wdir_tracked);

475

flags.set(Flags::WDIR_TRACKED, wdir_tracked);

476

flags.set(Flags::P1_TRACKED, p1_tracked);

476

flags.set(Flags::P1_TRACKED, p1_tracked);

477

flags.set(Flags::P2_INFO, p2_info);

477

flags.set(Flags::P2_INFO, p2_info);

478

let (mode, size, mtime);

478

let (mode, size, mtime);

479

if let Some((m, s)) = mode_size_opt {

479

if let Some((m, s)) = mode_size_opt {

480

mode = m;

480

mode = m;

481

size = s;

481

size = s;

482

flags.insert(Flags::HAS_MODE_AND_SIZE)

482

flags.insert(Flags::HAS_MODE_AND_SIZE)

483

} else {

483

} else {

484

mode = 0;

484

mode = 0;

485

size = 0;

485

size = 0;

486

}

486

}

487

if let Some(m) = mtime_opt {

487

if let Some(m) = mtime_opt {

488

mtime = m;

488

mtime = m;

489

flags.insert(Flags::HAS_MTIME);

489

flags.insert(Flags::HAS_MTIME);

490

} else {

490

} else {

491

mtime = 0;

491

mtime = 0;

492

}

492

}

493

let raw_entry = Entry {

493

let raw_entry = Entry {

494

mode: mode.into(),

494

mode: mode.into(),

495

size: size.into(),

495

size: size.into(),

496

mtime: mtime.into(),

496

mtime: mtime.into(),

497

};

497

};

498

(flags, raw_entry)

498

(flags, raw_entry)

499

}

499

}

500

501

fn from_timestamp(timestamp: Timestamp) -> Self {

501

fn from_timestamp(timestamp: Timestamp) -> Self {

502

// Safety: both types implement the `ByteCast` trait, so we could

502

// Safety: both types implement the `ByteCast` trait, so we could

503

// safely use `as_bytes` and `from_bytes` to do this conversion. Using

503

// safely use `as_bytes` and `from_bytes` to do this conversion. Using

504

// `transmute` instead makes the compiler check that the two types

504

// `transmute` instead makes the compiler check that the two types

505

// have the same size, which eliminates the error case of

505

// have the same size, which eliminates the error case of

506

// `from_bytes`.

506

// `from_bytes`.

507

unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }

507

unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }

508

}

508

}

509

510

fn as_timestamp(&self) -> &Timestamp {

510

fn as_timestamp(&self) -> &Timestamp {

511

// Safety: same as above in `from_timestamp`

511

// Safety: same as above in `from_timestamp`

512

unsafe { &*(self as *const Entry as *const Timestamp) }

512

unsafe { &*(self as *const Entry as *const Timestamp) }

513

}

513

}

514

}

514

}

515

516

impl Timestamp {

516

impl Timestamp {

517

pub fn seconds(&self) -> i64 {

517

pub fn seconds(&self) -> i64 {

518

self.seconds.get()

518

self.seconds.get()

519

}

519

}

520

}

520

}

521

522

impl From<SystemTime> for Timestamp {

522

impl From<SystemTime> for Timestamp {

523

fn from(system_time: SystemTime) -> Self {

523

fn from(system_time: SystemTime) -> Self {

524

let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {

524

let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {

525

Ok(duration) => {

525

Ok(duration) => {

526

(duration.as_secs() as i64, duration.subsec_nanos())

526

(duration.as_secs() as i64, duration.subsec_nanos())

527

}

527

}

528

Err(error) => {

528

Err(error) => {

529

let negative = error.duration();

529

let negative = error.duration();

530

(-(negative.as_secs() as i64), negative.subsec_nanos())

530

(-(negative.as_secs() as i64), negative.subsec_nanos())

531

}

531

}

532

};

532

};

533

Timestamp {

533

Timestamp {

534

seconds: secs.into(),

534

seconds: secs.into(),

535

nanoseconds: nanos.into(),

535

nanoseconds: nanos.into(),

536

}

536

}

537

}

537

}

538

}

538

}

539

540

impl From<&'_ Timestamp> for SystemTime {

540

impl From<&'_ Timestamp> for SystemTime {

541

fn from(timestamp: &'_ Timestamp) -> Self {

541

fn from(timestamp: &'_ Timestamp) -> Self {

542

let secs = timestamp.seconds.get();

542

let secs = timestamp.seconds.get();

543

let nanos = timestamp.nanoseconds.get();

543

let nanos = timestamp.nanoseconds.get();

544

if secs >= 0 {

544

if secs >= 0 {

545

UNIX_EPOCH + Duration::new(secs as u64, nanos)

545

UNIX_EPOCH + Duration::new(secs as u64, nanos)

546

} else {

546

} else {

547

UNIX_EPOCH - Duration::new((-secs) as u64, nanos)

547

UNIX_EPOCH - Duration::new((-secs) as u64, nanos)

548

}

548

}

549

}

549

}

550

}

550

}

551

552

fn read_hg_path(

552

fn read_hg_path(

553

on_disk: &[u8],

553

on_disk: &[u8],

554

slice: PathSlice,

554

slice: PathSlice,

555

) -> Result<&HgPath, DirstateV2ParseError> {

555

) -> Result<&HgPath, DirstateV2ParseError> {

556

read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)

556

read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)

557

}

557

}

558

559

fn read_nodes(

559

fn read_nodes(

560

on_disk: &[u8],

560

on_disk: &[u8],

561

slice: ChildNodes,

561

slice: ChildNodes,

562

) -> Result<&[Node], DirstateV2ParseError> {

562

) -> Result<&[Node], DirstateV2ParseError> {

563

read_slice(on_disk, slice.start, slice.len.get())

563

read_slice(on_disk, slice.start, slice.len.get())

564

}

564

}

565

566

fn read_slice<T, Len>(

566

fn read_slice<T, Len>(

567

on_disk: &[u8],

567

on_disk: &[u8],

568

start: Offset,

568

start: Offset,

569

len: Len,

569

len: Len,

570

) -> Result<&[T], DirstateV2ParseError>

570

) -> Result<&[T], DirstateV2ParseError>

571

where

571

where

572

T: BytesCast,

572

T: BytesCast,

573

Len: TryInto<usize>,

573

Len: TryInto<usize>,

574

{

574

{

575

// Either `usize::MAX` would result in "out of bounds" error since a single

575

// Either `usize::MAX` would result in "out of bounds" error since a single

576

// `&[u8]` cannot occupy the entire addess space.

576

// `&[u8]` cannot occupy the entire addess space.

577

let start = start.get().try_into().unwrap_or(std::usize::MAX);

577

let start = start.get().try_into().unwrap_or(std::usize::MAX);

578

let len = len.try_into().unwrap_or(std::usize::MAX);

578

let len = len.try_into().unwrap_or(std::usize::MAX);

579

on_disk

579

on_disk

580

.get(start..)

580

.get(start..)

581

.and_then(|bytes| T::slice_from_bytes(bytes, len).ok())

581

.and_then(|bytes| T::slice_from_bytes(bytes, len).ok())

582

.map(|(slice, _rest)| slice)

582

.map(|(slice, _rest)| slice)

583

.ok_or_else(|| DirstateV2ParseError)

583

.ok_or_else(|| DirstateV2ParseError)

584

}

584

}

585

586

pub(crate) fn for_each_tracked_path<'on_disk>(

586

pub(crate) fn for_each_tracked_path<'on_disk>(

587

on_disk: &'on_disk [u8],

587

on_disk: &'on_disk [u8],

588

metadata: &[u8],

588

metadata: &[u8],

589

mut f: impl FnMut(&'on_disk HgPath),

589

mut f: impl FnMut(&'on_disk HgPath),

590

) -> Result<(), DirstateV2ParseError> {

590

) -> Result<(), DirstateV2ParseError> {

591

let (meta, _) = TreeMetadata::from_bytes(metadata)

591

let (meta, _) = TreeMetadata::from_bytes(metadata)

592

.map_err(|_| DirstateV2ParseError)?;

592

.map_err(|_| DirstateV2ParseError)?;

593

fn recur<'on_disk>(

593

fn recur<'on_disk>(

594

on_disk: &'on_disk [u8],

594

on_disk: &'on_disk [u8],

595

nodes: ChildNodes,

595

nodes: ChildNodes,

596

f: &mut impl FnMut(&'on_disk HgPath),

596

f: &mut impl FnMut(&'on_disk HgPath),

597

) -> Result<(), DirstateV2ParseError> {

597

) -> Result<(), DirstateV2ParseError> {

598

for node in read_nodes(on_disk, nodes)? {

598

for node in read_nodes(on_disk, nodes)? {

599

if let Some(entry) = node.entry()? {

599

if let Some(entry) = node.entry()? {

600

if entry.state().is_tracked() {

600

if entry.state().is_tracked() {

601

f(node.full_path(on_disk)?)

601

f(node.full_path(on_disk)?)

602

}

602

}

603

}

603

}

604

recur(on_disk, node.children, f)?

604

recur(on_disk, node.children, f)?

605

}

605

}

606

Ok(())

606

Ok(())

607

}

607

}

608

recur(on_disk, meta.root_nodes, &mut f)

608

recur(on_disk, meta.root_nodes, &mut f)

609

}

609

}

610

611

/// Returns new data and metadata, together with whether that data should be

611

/// Returns new data and metadata, together with whether that data should be

612

/// appended to the existing data file whose content is at

612

/// appended to the existing data file whose content is at

613

/// `dirstate_map.on_disk` (true), instead of written to a new data file

613

/// `dirstate_map.on_disk` (true), instead of written to a new data file

614

/// (false).

614

/// (false).

615

pub(super) fn write(

615

pub(super) fn write(

616

dirstate_map: &mut DirstateMap,

616

dirstate_map: &mut DirstateMap,

617

can_append: bool,

617

can_append: bool,

618

) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {

618

) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {

619

let append = can_append && dirstate_map.write_should_append();

619

let append = can_append && dirstate_map.write_should_append();

620

621

// This ignores the space for paths, and for nodes without an entry.

621

// This ignores the space for paths, and for nodes without an entry.

622

// TODO: better estimate? Skip the `Vec` and write to a file directly?

622

// TODO: better estimate? Skip the `Vec` and write to a file directly?

623

let size_guess = std::mem::size_of::<Node>()

623

let size_guess = std::mem::size_of::<Node>()

624

* dirstate_map.nodes_with_entry_count as usize;

624

* dirstate_map.nodes_with_entry_count as usize;

625

626

let mut writer = Writer {

626

let mut writer = Writer {

627

dirstate_map,

627

dirstate_map,

628

append,

628

append,

629

out: Vec::with_capacity(size_guess),

629

out: Vec::with_capacity(size_guess),

630

};

630

};

631

632

let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;

632

let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;

633

634

let meta = TreeMetadata {

634

let meta = TreeMetadata {

635

root_nodes,

635

root_nodes,

636

nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),

636

nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),

637

nodes_with_copy_source_count: dirstate_map

637

nodes_with_copy_source_count: dirstate_map

638

.nodes_with_copy_source_count

638

.nodes_with_copy_source_count

639

.into(),

639

.into(),

640

unreachable_bytes: dirstate_map.unreachable_bytes.into(),

640

unreachable_bytes: dirstate_map.unreachable_bytes.into(),

641

unused: [0; 4],

641

unused: [0; 4],

642

ignore_patterns_hash: dirstate_map.ignore_patterns_hash,

642

ignore_patterns_hash: dirstate_map.ignore_patterns_hash,

643

};

643

};

644

Ok((writer.out, meta.as_bytes().to_vec(), append))

644

Ok((writer.out, meta.as_bytes().to_vec(), append))

645

}

645

}

646

647

struct Writer<'dmap, 'on_disk> {

647

struct Writer<'dmap, 'on_disk> {

648

dirstate_map: &'dmap DirstateMap<'on_disk>,

648

dirstate_map: &'dmap DirstateMap<'on_disk>,

649

append: bool,

649

append: bool,

650

out: Vec<u8>,

650

out: Vec<u8>,

651

}

651

}

652

653

impl Writer<'_, '_> {

653

impl Writer<'_, '_> {

654

fn write_nodes(

654

fn write_nodes(

655

&mut self,

655

&mut self,

656

nodes: dirstate_map::ChildNodesRef,

656

nodes: dirstate_map::ChildNodesRef,

657

) -> Result<ChildNodes, DirstateError> {

657

) -> Result<ChildNodes, DirstateError> {

658

// Reuse already-written nodes if possible

658

// Reuse already-written nodes if possible

659

if self.append {

659

if self.append {

660

if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {

660

if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {

661

let start = self.on_disk_offset_of(nodes_slice).expect(

661

let start = self.on_disk_offset_of(nodes_slice).expect(

662

"dirstate-v2 OnDisk nodes not found within on_disk",

662

"dirstate-v2 OnDisk nodes not found within on_disk",

663

);

663

);

664

let len = child_nodes_len_from_usize(nodes_slice.len());

664

let len = child_nodes_len_from_usize(nodes_slice.len());

665

return Ok(ChildNodes { start, len });

665

return Ok(ChildNodes { start, len });

666

}

666

}

667

}

667

}

668

669

// `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has

669

// `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has

670

// undefined iteration order. Sort to enable binary search in the

670

// undefined iteration order. Sort to enable binary search in the

671

// written file.

671

// written file.

672

let nodes = nodes.sorted();

672

let nodes = nodes.sorted();

673

let nodes_len = nodes.len();

673

let nodes_len = nodes.len();

674

675

// First accumulate serialized nodes in a `Vec`

675

// First accumulate serialized nodes in a `Vec`

676

let mut on_disk_nodes = Vec::with_capacity(nodes_len);

676

let mut on_disk_nodes = Vec::with_capacity(nodes_len);

677

for node in nodes {

677

for node in nodes {

678

let children =

678

let children =

679

self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;

679

self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;

680

let full_path = node.full_path(self.dirstate_map.on_disk)?;

680

let full_path = node.full_path(self.dirstate_map.on_disk)?;

681

let full_path = self.write_path(full_path.as_bytes());

681

let full_path = self.write_path(full_path.as_bytes());

682

let copy_source = if let Some(source) =

682

let copy_source = if let Some(source) =

683

node.copy_source(self.dirstate_map.on_disk)?

683

node.copy_source(self.dirstate_map.on_disk)?

684

{

684

{

685

self.write_path(source.as_bytes())

685

self.write_path(source.as_bytes())

686

} else {

686

} else {

687

PathSlice {

687

PathSlice {

688

start: 0.into(),

688

start: 0.into(),

689

len: 0.into(),

689

len: 0.into(),

690

}

690

}

691

};

691

};

692

on_disk_nodes.push(match node {

692

on_disk_nodes.push(match node {

693

NodeRef::InMemory(path, node) => {

693

NodeRef::InMemory(path, node) => {

694

let (flags, data) = match &node.data {

694

let (flags, data) = match &node.data {

695

dirstate_map::NodeData::Entry(entry) => {

695

dirstate_map::NodeData::Entry(entry) => {

696

Entry::from_dirstate_entry(entry)

696

Entry::from_dirstate_entry(entry)

697

}

697

}

698

dirstate_map::NodeData::CachedDirectory { mtime } => {

698

dirstate_map::NodeData::CachedDirectory { mtime } => {

699

(Flags::HAS_MTIME, Entry::from_timestamp(*mtime))

699

(Flags::HAS_MTIME, Entry::from_timestamp(*mtime))

700

}

700

}

701

dirstate_map::NodeData::None => (

701

dirstate_map::NodeData::None => (

702

Flags::empty(),

702

Flags::empty(),

703

Entry {

703

Entry {

704

mode: 0.into(),

704

mode: 0.into(),

705

size: 0.into(),

705

size: 0.into(),

706

mtime: 0.into(),

706

mtime: 0.into(),

707

},

707

},

708

),

708

),

709

};

709

};

710

Node {

710

Node {

711

children,

711

children,

712

copy_source,

712

copy_source,

713

full_path,

713

full_path,

714

base_name_start: u16::try_from(path.base_name_start())

714

base_name_start: u16::try_from(path.base_name_start())

715

// Could only panic for paths over 64 KiB

715

// Could only panic for paths over 64 KiB

716

.expect("dirstate-v2 path length overflow")

716

.expect("dirstate-v2 path length overflow")

717

.into(),

717

.into(),

718

descendants_with_entry_count: node

718

descendants_with_entry_count: node

719

.descendants_with_entry_count

719

.descendants_with_entry_count

720

.into(),

720

.into(),

721

tracked_descendants_count: node

721

tracked_descendants_count: node

722

.tracked_descendants_count

722

.tracked_descendants_count

723

.into(),

723

.into(),

724

flags,

724

flags,

725

data,

725

data,

726

}

726

}

727

}

727

}

728

NodeRef::OnDisk(node) => Node {

728

NodeRef::OnDisk(node) => Node {

729

children,

729

children,

730

copy_source,

730

copy_source,

731

full_path,

731

full_path,

732

..*node

732

..*node

733

},

733

},

734

})

734

})

735

}

735

}

736

// … so we can write them contiguously, after writing everything else

736

// … so we can write them contiguously, after writing everything else

737

// they refer to.

737

// they refer to.

738

let start = self.current_offset();

738

let start = self.current_offset();

739

let len = child_nodes_len_from_usize(nodes_len);

739

let len = child_nodes_len_from_usize(nodes_len);

740

self.out.extend(on_disk_nodes.as_bytes());

740

self.out.extend(on_disk_nodes.as_bytes());

741

Ok(ChildNodes { start, len })

741

Ok(ChildNodes { start, len })

742

}

742

}

743

744

/// If the given slice of items is within `on_disk`, returns its offset

744

/// If the given slice of items is within `on_disk`, returns its offset

745

/// from the start of `on_disk`.

745

/// from the start of `on_disk`.

746

fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>

746

fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>

747

where

747

where

748

T: BytesCast,

748

T: BytesCast,

749

{

749

{

750

fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {

750

fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {

751

let start = slice.as_ptr() as usize;

751

let start = slice.as_ptr() as usize;

752

let end = start + slice.len();

752

let end = start + slice.len();

753

start..=end

753

start..=end

754

}

754

}

755

let slice_addresses = address_range(slice.as_bytes());

755

let slice_addresses = address_range(slice.as_bytes());

756

let on_disk_addresses = address_range(self.dirstate_map.on_disk);

756

let on_disk_addresses = address_range(self.dirstate_map.on_disk);

757

if on_disk_addresses.contains(slice_addresses.start())

757

if on_disk_addresses.contains(slice_addresses.start())

758

&& on_disk_addresses.contains(slice_addresses.end())

758

&& on_disk_addresses.contains(slice_addresses.end())

759

{

759

{

760

let offset = slice_addresses.start() - on_disk_addresses.start();

760

let offset = slice_addresses.start() - on_disk_addresses.start();

761

Some(offset_from_usize(offset))

761

Some(offset_from_usize(offset))

762

} else {

762

} else {

763

None

763

None

764

}

764

}

765

}

765

}

766

767

fn current_offset(&mut self) -> Offset {

767

fn current_offset(&mut self) -> Offset {

768

let mut offset = self.out.len();

768

let mut offset = self.out.len();

769

if self.append {

769

if self.append {

770

offset += self.dirstate_map.on_disk.len()

770

offset += self.dirstate_map.on_disk.len()

771

}

771

}

772

offset_from_usize(offset)

772

offset_from_usize(offset)

773

}

773

}

774

775

fn write_path(&mut self, slice: &[u8]) -> PathSlice {

775

fn write_path(&mut self, slice: &[u8]) -> PathSlice {

776

let len = path_len_from_usize(slice.len());

776

let len = path_len_from_usize(slice.len());

777

// Reuse an already-written path if possible

777

// Reuse an already-written path if possible

778

if self.append {

778

if self.append {

779

if let Some(start) = self.on_disk_offset_of(slice) {

779

if let Some(start) = self.on_disk_offset_of(slice) {

780

return PathSlice { start, len };

780

return PathSlice { start, len };

781

}

781

}

782

}

782

}

783

let start = self.current_offset();

783

let start = self.current_offset();

784

self.out.extend(slice.as_bytes());

784

self.out.extend(slice.as_bytes());

785

PathSlice { start, len }

785

PathSlice { start, len }

786

}

786

}

787

}

787

}

788

789

fn offset_from_usize(x: usize) -> Offset {

789

fn offset_from_usize(x: usize) -> Offset {

790

u32::try_from(x)

790

u32::try_from(x)

791

// Could only panic for a dirstate file larger than 4 GiB

791

// Could only panic for a dirstate file larger than 4 GiB

792

.expect("dirstate-v2 offset overflow")

792

.expect("dirstate-v2 offset overflow")

793

.into()

793

.into()

794

}

794

}

795

796

fn child_nodes_len_from_usize(x: usize) -> Size {

796

fn child_nodes_len_from_usize(x: usize) -> Size {

797

u32::try_from(x)

797

u32::try_from(x)

798

// Could only panic with over 4 billion nodes

798

// Could only panic with over 4 billion nodes

799

.expect("dirstate-v2 slice length overflow")

799

.expect("dirstate-v2 slice length overflow")

800

.into()

800

.into()

801

}

801

}

802

803

fn path_len_from_usize(x: usize) -> PathSize {

803

fn path_len_from_usize(x: usize) -> PathSize {

804

u16::try_from(x)

804

u16::try_from(x)

805

// Could only panic for paths over 64 KiB

805

// Could only panic for paths over 64 KiB

806

.expect("dirstate-v2 path length overflow")

806

.expect("dirstate-v2 path length overflow")

807

.into()

807

.into()

808

}

808

}

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # dirstatedocket.py - docket file for dirstate-v2
             #
             # Copyright Mercurial Contributors
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import struct
             from ..revlogutils import docket as docket_mod
             V2_FORMAT_MARKER = b"dirstate-v2\n"
             # Must match the constant of the same name in
             # `rust/hg-core/src/dirstate_tree/on_disk.rs`
             TREE_METADATA_SIZE = 44
             # * 12 bytes: format marker
             # * 32 bytes: node ID of the working directory's first parent
             # * 32 bytes: node ID of the working directory's second parent
+            # * {TREE_METADATA_SIZE} bytes: tree metadata, parsed separately
             # * 4 bytes: big-endian used size of the data file
-            # * {TREE_METADATA_SIZE} bytes: tree metadata, parsed separately
             # * 1 byte: length of the data file's UUID
             # * variable: data file's UUID
             #
             # Node IDs are null-padded if shorter than 32 bytes.
             # A data file shorter than the specified used size is corrupted (truncated)
             HEADER = struct.Struct(
-                ">{}s32s32sL{}sB".format(len(V2_FORMAT_MARKER), TREE_METADATA_SIZE)
+                ">{}s32s32s{}sLB".format(len(V2_FORMAT_MARKER), TREE_METADATA_SIZE)
             )
             class DirstateDocket(object):
                 data_filename_pattern = b'dirstate.%s'
                 def __init__(self, parents, data_size, tree_metadata, uuid):
                     self.parents = parents
                     self.data_size = data_size
                     self.tree_metadata = tree_metadata
                     self.uuid = uuid
                 @classmethod
                 def with_new_uuid(cls, parents, data_size, tree_metadata):
                     return cls(parents, data_size, tree_metadata, docket_mod.make_uid())
                 @classmethod
                 def parse(cls, data, nodeconstants):
                     if not data:
                         parents = (nodeconstants.nullid, nodeconstants.nullid)
                         return cls(parents, 0, b'', None)
-                    marker, p1, p2, data_size, meta, uuid_size = HEADER.unpack_from(data)
+                    marker, p1, p2, meta, data_size, uuid_size = HEADER.unpack_from(data)
                     if marker != V2_FORMAT_MARKER:
                         raise ValueError("expected dirstate-v2 marker")
                     uuid = data[HEADER.size : HEADER.size + uuid_size]
                     p1 = p1[: nodeconstants.nodelen]
                     p2 = p2[: nodeconstants.nodelen]
                     return cls((p1, p2), data_size, meta, uuid)
                 def serialize(self):
                     p1, p2 = self.parents
                     header = HEADER.pack(
                         V2_FORMAT_MARKER,
                         p1,
                         p2,
+                        self.tree_metadata,
                         self.data_size,
-                        self.tree_metadata,
                         len(self.uuid),
                     )
                     return header + self.uuid
                 def data_filename(self):
                     return self.data_filename_pattern % self.uuid

             //! The "version 2" disk representation of the dirstate
             //!
             //! # File format
             //!
             //! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts
             //! with a fixed-sized header whose layout is defined by the `DocketHeader`
             //! struct, followed by the data file identifier.
             //!
             //! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That
             //! file may be longer than the size given in the docket, but not shorter. Only
             //! the start of the data file up to the given size is considered. The
             //! fixed-size "root" of the dirstate tree whose layout is defined by the
             //! `Root` struct is found at the end of that slice of data.
             //!
             //! Its `root_nodes` field contains the slice (offset and length) to
             //! the nodes representing the files and directories at the root of the
             //! repository. Each node is also fixed-size, defined by the `Node` struct.
             //! Nodes in turn contain slices to variable-size paths, and to their own child
             //! nodes (if any) for nested files and directories.
             use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
             use crate::dirstate_tree::path_with_basename::WithBasename;
             use crate::errors::HgError;
             use crate::utils::hg_path::HgPath;
             use crate::DirstateEntry;
             use crate::DirstateError;
             use crate::DirstateParents;
             use bitflags::bitflags;
             use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be};
             use bytes_cast::BytesCast;
             use format_bytes::format_bytes;
             use std::borrow::Cow;
             use std::convert::{TryFrom, TryInto};
             use std::time::{Duration, SystemTime, UNIX_EPOCH};
             /// Added at the start of `.hg/dirstate` when the "v2" format is used.
             /// This a redundant sanity check more than an actual "magic number" since
             /// `.hg/requires` already governs which format should be used.
             pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
             /// Keep space for 256-bit hashes
             const STORED_NODE_ID_BYTES: usize = 32;
             /// … even though only 160 bits are used for now, with SHA-1
             const USED_NODE_ID_BYTES: usize = 20;
             pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
             pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
             /// Must match the constant of the same name in
             /// `mercurial/dirstateutils/docket.py`
             const TREE_METADATA_SIZE: usize = 44;
             /// Make sure that size-affecting changes are made knowingly
             #[allow(unused)]
             fn static_assert_size_of() {
                 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
                 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
                 let _ = std::mem::transmute::<Node, [u8; 43]>;
             }
             // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
             #[derive(BytesCast)]
             #[repr(C)]
             struct DocketHeader {
                 marker: [u8; V2_FORMAT_MARKER.len()],
                 parent_1: [u8; STORED_NODE_ID_BYTES],
                 parent_2: [u8; STORED_NODE_ID_BYTES],
+                metadata: TreeMetadata,
                 /// Counted in bytes
                 data_size: Size,
-                metadata: TreeMetadata,
                 uuid_size: u8,
             }
             pub struct Docket<'on_disk> {
                 header: &'on_disk DocketHeader,
                 uuid: &'on_disk [u8],
             }
             #[derive(BytesCast)]
             #[repr(C)]
             struct TreeMetadata {
                 root_nodes: ChildNodes,
                 nodes_with_entry_count: Size,
                 nodes_with_copy_source_count: Size,
                 /// How many bytes of this data file are not used anymore
                 unreachable_bytes: Size,
                 /// Current version always sets these bytes to zero when creating or
                 /// updating a dirstate. Future versions could assign some bits to signal
                 /// for example "the version that last wrote/updated this dirstate did so
                 /// in such and such way that can be relied on by versions that know to."
                 unused: [u8; 4],
                 /// If non-zero, a hash of ignore files that were used for some previous
                 /// run of the `status` algorithm.
                 ///
                 /// We define:
                 ///
                 /// * "Root" ignore files are `.hgignore` at the root of the repository if
                 ///   it exists, and files from `ui.ignore.*` config. This set of files is
                 ///   then sorted by the string representation of their path.
                 /// * The "expanded contents" of an ignore files is the byte string made
                 ///   by concatenating its contents with the "expanded contents" of other
                 ///   files included with `include:` or `subinclude:` files, in inclusion
                 ///   order. This definition is recursive, as included files can
                 ///   themselves include more files.
                 ///
                 /// This hash is defined as the SHA-1 of the concatenation (in sorted
                 /// order) of the "expanded contents" of each "root" ignore file.
                 /// (Note that computing this does not require actually concatenating byte
                 /// strings into contiguous memory, instead SHA-1 hashing can be done
                 /// incrementally.)
                 ignore_patterns_hash: IgnorePatternsHash,
             }
             #[derive(BytesCast)]
             #[repr(C)]
             pub(super) struct Node {
                 full_path: PathSlice,
                 /// In bytes from `self.full_path.start`
                 base_name_start: PathSize,
                 copy_source: OptPathSlice,
                 children: ChildNodes,
                 pub(super) descendants_with_entry_count: Size,
                 pub(super) tracked_descendants_count: Size,
                 /// Depending on the bits in `flags`:
                 ///
                 /// * If any of `WDIR_TRACKED`, `P1_TRACKED`, or `P2_INFO` are set, the
                 ///   node has an entry.
                 ///
                 ///   - If `HAS_MODE_AND_SIZE` is set, `data.mode` and `data.size` are
                 ///     meaningful. Otherwise they are set to zero
                 ///   - If `HAS_MTIME` is set, `data.mtime` is meaningful. Otherwise it is
                 ///     set to zero.
                 ///
                 /// * If none of `WDIR_TRACKED`, `P1_TRACKED`, `P2_INFO`, or `HAS_MTIME`
                 ///   are set, the node does not have an entry and `data` is set to all
                 ///   zeros.
                 ///
                 /// * If none of `WDIR_TRACKED`, `P1_TRACKED`, `P2_INFO` are set, but
                 ///   `HAS_MTIME` is set, the bytes of `data` should instead be
                 ///   interpreted as the `Timestamp` for the mtime of a cached directory.
                 ///
                 ///   The presence of this combination of flags means that at some point,
                 ///   this path in the working directory was observed:
                 ///
                 ///   - To be a directory
                 ///   - With the modification time as given by `Timestamp`
                 ///   - That timestamp was already strictly in the past when observed,
                 ///     meaning that later changes cannot happen in the same clock tick
                 ///     and must cause a different modification time (unless the system
                 ///     clock jumps back and we get unlucky, which is not impossible but
                 ///     but deemed unlikely enough).
                 ///   - All direct children of this directory (as returned by
                 ///     `std::fs::read_dir`) either have a corresponding dirstate node, or
                 ///     are ignored by ignore patterns whose hash is in
                 ///     `TreeMetadata::ignore_patterns_hash`.
                 ///
                 ///   This means that if `std::fs::symlink_metadata` later reports the
                 ///   same modification time and ignored patterns haven’t changed, a run
                 ///   of status that is not listing ignored   files can skip calling
                 ///   `std::fs::read_dir` again for this directory,   iterate child
                 ///   dirstate nodes instead.
                 flags: Flags,
                 data: Entry,
             }
             bitflags! {
                 #[derive(BytesCast)]
                 #[repr(C)]
                 struct Flags: u8 {
                     const WDIR_TRACKED = 1 << 0;
                     const P1_TRACKED = 1 << 1;
                     const P2_INFO = 1 << 2;
                     const HAS_MODE_AND_SIZE = 1 << 3;
                     const HAS_MTIME = 1 << 4;
                 }
             }
             #[derive(BytesCast, Copy, Clone, Debug)]
             #[repr(C)]
             struct Entry {
                 mode: I32Be,
                 mtime: I32Be,
                 size: I32Be,
             }
             /// Duration since the Unix epoch
             #[derive(BytesCast, Copy, Clone, PartialEq)]
             #[repr(C)]
             pub(super) struct Timestamp {
                 seconds: I64Be,
                 /// In `0 .. 1_000_000_000`.
                 ///
                 /// This timestamp is later or earlier than `(seconds, 0)` by this many
                 /// nanoseconds, if `seconds` is non-negative or negative, respectively.
                 nanoseconds: U32Be,
             }
             /// Counted in bytes from the start of the file
             ///
             /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
             type Offset = U32Be;
             /// Counted in number of items
             ///
             /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
             type Size = U32Be;
             /// Counted in bytes
             ///
             /// NOTE: we choose not to support file names/paths longer than 64 KiB.
             type PathSize = U16Be;
             /// A contiguous sequence of `len` times `Node`, representing the child nodes
             /// of either some other node or of the repository root.
             ///
             /// Always sorted by ascending `full_path`, to allow binary search.
             /// Since nodes with the same parent nodes also have the same parent path,
             /// only the `base_name`s need to be compared during binary search.
             #[derive(BytesCast, Copy, Clone)]
             #[repr(C)]
             struct ChildNodes {
                 start: Offset,
                 len: Size,
             }
             /// A `HgPath` of `len` bytes
             #[derive(BytesCast, Copy, Clone)]
             #[repr(C)]
             struct PathSlice {
                 start: Offset,
                 len: PathSize,
             }
             /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
             type OptPathSlice = PathSlice;
             /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
             ///
             /// This should only happen if Mercurial is buggy or a repository is corrupted.
             #[derive(Debug)]
             pub struct DirstateV2ParseError;
             impl From<DirstateV2ParseError> for HgError {
                 fn from(_: DirstateV2ParseError) -> Self {
                     HgError::corrupted("dirstate-v2 parse error")
                 }
             }
             impl From<DirstateV2ParseError> for crate::DirstateError {
                 fn from(error: DirstateV2ParseError) -> Self {
                     HgError::from(error).into()
                 }
             }
             impl<'on_disk> Docket<'on_disk> {
                 pub fn parents(&self) -> DirstateParents {
                     use crate::Node;
                     let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
                         .unwrap()
                         .clone();
                     let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
                         .unwrap()
                         .clone();
                     DirstateParents { p1, p2 }
                 }
                 pub fn tree_metadata(&self) -> &[u8] {
                     self.header.metadata.as_bytes()
                 }
                 pub fn data_size(&self) -> usize {
                     // This `unwrap` could only panic on a 16-bit CPU
                     self.header.data_size.get().try_into().unwrap()
                 }
                 pub fn data_filename(&self) -> String {
                     String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
                 }
             }
             pub fn read_docket(
                 on_disk: &[u8],
             ) -> Result<Docket<'_>, DirstateV2ParseError> {
                 let (header, uuid) =
                     DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
                 let uuid_size = header.uuid_size as usize;
                 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
                     Ok(Docket { header, uuid })
                 } else {
                     Err(DirstateV2ParseError)
                 }
             }
             pub(super) fn read<'on_disk>(
                 on_disk: &'on_disk [u8],
                 metadata: &[u8],
             ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
                 if on_disk.is_empty() {
                     return Ok(DirstateMap::empty(on_disk));
                 }
                 let (meta, _) = TreeMetadata::from_bytes(metadata)
                     .map_err(|_| DirstateV2ParseError)?;
                 let dirstate_map = DirstateMap {
                     on_disk,
                     root: dirstate_map::ChildNodes::OnDisk(read_nodes(
                         on_disk,
                         meta.root_nodes,
                     )?),
                     nodes_with_entry_count: meta.nodes_with_entry_count.get(),
                     nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
                     ignore_patterns_hash: meta.ignore_patterns_hash,
                     unreachable_bytes: meta.unreachable_bytes.get(),
                 };
                 Ok(dirstate_map)
             }
             impl Node {
                 pub(super) fn full_path<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
                     read_hg_path(on_disk, self.full_path)
                 }
                 pub(super) fn base_name_start<'on_disk>(
                     &self,
                 ) -> Result<usize, DirstateV2ParseError> {
                     let start = self.base_name_start.get();
                     if start < self.full_path.len.get() {
                         let start = usize::try_from(start)
                             // u32 -> usize, could only panic on a 16-bit CPU
                             .expect("dirstate-v2 base_name_start out of bounds");
                         Ok(start)
                     } else {
                         Err(DirstateV2ParseError)
                     }
                 }
                 pub(super) fn base_name<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
                     let full_path = self.full_path(on_disk)?;
                     let base_name_start = self.base_name_start()?;
                     Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
                 }
                 pub(super) fn path<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
                     Ok(WithBasename::from_raw_parts(
                         Cow::Borrowed(self.full_path(on_disk)?),
                         self.base_name_start()?,
                     ))
                 }
                 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
                     self.copy_source.start.get() != 0
                 }
                 pub(super) fn copy_source<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
                     Ok(if self.has_copy_source() {
                         Some(read_hg_path(on_disk, self.copy_source)?)
                     } else {
                         None
                     })
                 }
                 fn has_entry(&self) -> bool {
                     self.flags.intersects(
                         Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
                     )
                 }
                 pub(super) fn node_data(
                     &self,
                 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
                     if self.has_entry() {
                         Ok(dirstate_map::NodeData::Entry(self.assume_entry()))
                     } else if let Some(&mtime) = self.cached_directory_mtime() {
                         Ok(dirstate_map::NodeData::CachedDirectory { mtime })
                     } else {
                         Ok(dirstate_map::NodeData::None)
                     }
                 }
                 pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {
                     if self.flags.contains(Flags::HAS_MTIME) && !self.has_entry() {
                         Some(self.data.as_timestamp())
                     } else {
                         None
                     }
                 }
                 fn assume_entry(&self) -> DirstateEntry {
                     // TODO: convert through raw bits instead?
                     let wdir_tracked = self.flags.contains(Flags::WDIR_TRACKED);
                     let p1_tracked = self.flags.contains(Flags::P1_TRACKED);
                     let p2_info = self.flags.contains(Flags::P2_INFO);
                     let mode_size = if self.flags.contains(Flags::HAS_MODE_AND_SIZE) {
                         Some((self.data.mode.into(), self.data.size.into()))
                     } else {
                         None
                     };
                     let mtime = if self.flags.contains(Flags::HAS_MTIME) {
                         Some(self.data.mtime.into())
                     } else {
                         None
                     };
                     DirstateEntry::from_v2_data(
                         wdir_tracked,
                         p1_tracked,
                         p2_info,
                         mode_size,
                         mtime,
                     )
                 }
                 pub(super) fn entry(
                     &self,
                 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
                     if self.has_entry() {
                         Ok(Some(self.assume_entry()))
                     } else {
                         Ok(None)
                     }
                 }
                 pub(super) fn children<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
                     read_nodes(on_disk, self.children)
                 }
                 pub(super) fn to_in_memory_node<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
                     Ok(dirstate_map::Node {
                         children: dirstate_map::ChildNodes::OnDisk(
                             self.children(on_disk)?,
                         ),
                         copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
                         data: self.node_data()?,
                         descendants_with_entry_count: self
                             .descendants_with_entry_count
                             .get(),
                         tracked_descendants_count: self.tracked_descendants_count.get(),
                     })
                 }
             }
             impl Entry {
                 fn from_dirstate_entry(entry: &DirstateEntry) -> (Flags, Self) {
                     let (wdir_tracked, p1_tracked, p2_info, mode_size_opt, mtime_opt) =
                         entry.v2_data();
                     // TODO: convert throug raw flag bits instead?
                     let mut flags = Flags::empty();
                     flags.set(Flags::WDIR_TRACKED, wdir_tracked);
                     flags.set(Flags::P1_TRACKED, p1_tracked);
                     flags.set(Flags::P2_INFO, p2_info);
                     let (mode, size, mtime);
                     if let Some((m, s)) = mode_size_opt {
                         mode = m;
                         size = s;
                         flags.insert(Flags::HAS_MODE_AND_SIZE)
                     } else {
                         mode = 0;
                         size = 0;
                     }
                     if let Some(m) = mtime_opt {
                         mtime = m;
                         flags.insert(Flags::HAS_MTIME);
                     } else {
                         mtime = 0;
                     }
                     let raw_entry = Entry {
                         mode: mode.into(),
                         size: size.into(),
                         mtime: mtime.into(),
                     };
                     (flags, raw_entry)
                 }
                 fn from_timestamp(timestamp: Timestamp) -> Self {
                     // Safety: both types implement the `ByteCast` trait, so we could
                     // safely use `as_bytes` and `from_bytes` to do this conversion. Using
                     // `transmute` instead makes the compiler check that the two types
                     // have the same size, which eliminates the error case of
                     // `from_bytes`.
                     unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }
                 }
                 fn as_timestamp(&self) -> &Timestamp {
                     // Safety: same as above in `from_timestamp`
                     unsafe { &*(self as *const Entry as *const Timestamp) }
                 }
             }
             impl Timestamp {
                 pub fn seconds(&self) -> i64 {
                     self.seconds.get()
                 }
             }
             impl From<SystemTime> for Timestamp {
                 fn from(system_time: SystemTime) -> Self {
                     let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {
                         Ok(duration) => {
                             (duration.as_secs() as i64, duration.subsec_nanos())
                         }
                         Err(error) => {
                             let negative = error.duration();
                             (-(negative.as_secs() as i64), negative.subsec_nanos())
                         }
                     };
                     Timestamp {
                         seconds: secs.into(),
                         nanoseconds: nanos.into(),
                     }
                 }
             }
             impl From<&'_ Timestamp> for SystemTime {
                 fn from(timestamp: &'_ Timestamp) -> Self {
                     let secs = timestamp.seconds.get();
                     let nanos = timestamp.nanoseconds.get();
                     if secs >= 0 {
                         UNIX_EPOCH + Duration::new(secs as u64, nanos)
                     } else {
                         UNIX_EPOCH - Duration::new((-secs) as u64, nanos)
                     }
                 }
             }
             fn read_hg_path(
                 on_disk: &[u8],
                 slice: PathSlice,
             ) -> Result<&HgPath, DirstateV2ParseError> {
                 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
             }
             fn read_nodes(
                 on_disk: &[u8],
                 slice: ChildNodes,
             ) -> Result<&[Node], DirstateV2ParseError> {
                 read_slice(on_disk, slice.start, slice.len.get())
             }
             fn read_slice<T, Len>(
                 on_disk: &[u8],
                 start: Offset,
                 len: Len,
             ) -> Result<&[T], DirstateV2ParseError>
             where
                 T: BytesCast,
                 Len: TryInto<usize>,
             {
                 // Either `usize::MAX` would result in "out of bounds" error since a single
                 // `&[u8]` cannot occupy the entire addess space.
                 let start = start.get().try_into().unwrap_or(std::usize::MAX);
                 let len = len.try_into().unwrap_or(std::usize::MAX);
                 on_disk
                     .get(start..)
                     .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
                     .map(|(slice, _rest)| slice)
                     .ok_or_else(|| DirstateV2ParseError)
             }
             pub(crate) fn for_each_tracked_path<'on_disk>(
                 on_disk: &'on_disk [u8],
                 metadata: &[u8],
                 mut f: impl FnMut(&'on_disk HgPath),
             ) -> Result<(), DirstateV2ParseError> {
                 let (meta, _) = TreeMetadata::from_bytes(metadata)
                     .map_err(|_| DirstateV2ParseError)?;
                 fn recur<'on_disk>(
                     on_disk: &'on_disk [u8],
                     nodes: ChildNodes,
                     f: &mut impl FnMut(&'on_disk HgPath),
                 ) -> Result<(), DirstateV2ParseError> {
                     for node in read_nodes(on_disk, nodes)? {
                         if let Some(entry) = node.entry()? {
                             if entry.state().is_tracked() {
                                 f(node.full_path(on_disk)?)
                             }
                         }
                         recur(on_disk, node.children, f)?
                     }
                     Ok(())
                 }
                 recur(on_disk, meta.root_nodes, &mut f)
             }
             /// Returns new data and metadata, together with whether that data should be
             /// appended to the existing data file whose content is at
             /// `dirstate_map.on_disk` (true), instead of written to a new data file
             /// (false).
             pub(super) fn write(
                 dirstate_map: &mut DirstateMap,
                 can_append: bool,
             ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
                 let append = can_append && dirstate_map.write_should_append();
                 // This ignores the space for paths, and for nodes without an entry.
                 // TODO: better estimate? Skip the `Vec` and write to a file directly?
                 let size_guess = std::mem::size_of::<Node>()
                     * dirstate_map.nodes_with_entry_count as usize;
                 let mut writer = Writer {
                     dirstate_map,
                     append,
                     out: Vec::with_capacity(size_guess),
                 };
                 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
                 let meta = TreeMetadata {
                     root_nodes,
                     nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
                     nodes_with_copy_source_count: dirstate_map
                         .nodes_with_copy_source_count
                         .into(),
                     unreachable_bytes: dirstate_map.unreachable_bytes.into(),
                     unused: [0; 4],
                     ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
                 };
                 Ok((writer.out, meta.as_bytes().to_vec(), append))
             }
             struct Writer<'dmap, 'on_disk> {
                 dirstate_map: &'dmap DirstateMap<'on_disk>,
                 append: bool,
                 out: Vec<u8>,
             }
             impl Writer<'_, '_> {
                 fn write_nodes(
                     &mut self,
                     nodes: dirstate_map::ChildNodesRef,
                 ) -> Result<ChildNodes, DirstateError> {
                     // Reuse already-written nodes if possible
                     if self.append {
                         if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
                             let start = self.on_disk_offset_of(nodes_slice).expect(
                                 "dirstate-v2 OnDisk nodes not found within on_disk",
                             );
                             let len = child_nodes_len_from_usize(nodes_slice.len());
                             return Ok(ChildNodes { start, len });
                         }
                     }
                     // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
                     // undefined iteration order. Sort to enable binary search in the
                     // written file.
                     let nodes = nodes.sorted();
                     let nodes_len = nodes.len();
                     // First accumulate serialized nodes in a `Vec`
                     let mut on_disk_nodes = Vec::with_capacity(nodes_len);
                     for node in nodes {
                         let children =
                             self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
                         let full_path = node.full_path(self.dirstate_map.on_disk)?;
                         let full_path = self.write_path(full_path.as_bytes());
                         let copy_source = if let Some(source) =
                             node.copy_source(self.dirstate_map.on_disk)?
                         {
                             self.write_path(source.as_bytes())
                         } else {
                             PathSlice {
                                 start: 0.into(),
                                 len: 0.into(),
                             }
                         };
                         on_disk_nodes.push(match node {
                             NodeRef::InMemory(path, node) => {
                                 let (flags, data) = match &node.data {
                                     dirstate_map::NodeData::Entry(entry) => {
                                         Entry::from_dirstate_entry(entry)
                                     }
                                     dirstate_map::NodeData::CachedDirectory { mtime } => {
                                         (Flags::HAS_MTIME, Entry::from_timestamp(*mtime))
                                     }
                                     dirstate_map::NodeData::None => (
                                         Flags::empty(),
                                         Entry {
                                             mode: 0.into(),
                                             size: 0.into(),
                                             mtime: 0.into(),
                                         },
                                     ),
                                 };
                                 Node {
                                     children,
                                     copy_source,
                                     full_path,
                                     base_name_start: u16::try_from(path.base_name_start())
                                         // Could only panic for paths over 64 KiB
                                         .expect("dirstate-v2 path length overflow")
                                         .into(),
                                     descendants_with_entry_count: node
                                         .descendants_with_entry_count
                                         .into(),
                                     tracked_descendants_count: node
                                         .tracked_descendants_count
                                         .into(),
                                     flags,
                                     data,
                                 }
                             }
                             NodeRef::OnDisk(node) => Node {
                                 children,
                                 copy_source,
                                 full_path,
                                 ..*node
                             },
                         })
                     }
                     // … so we can write them contiguously, after writing everything else
                     // they refer to.
                     let start = self.current_offset();
                     let len = child_nodes_len_from_usize(nodes_len);
                     self.out.extend(on_disk_nodes.as_bytes());
                     Ok(ChildNodes { start, len })
                 }
                 /// If the given slice of items is within `on_disk`, returns its offset
                 /// from the start of `on_disk`.
                 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
                 where
                     T: BytesCast,
                 {
                     fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
                         let start = slice.as_ptr() as usize;
                         let end = start + slice.len();
                         start..=end
                     }
                     let slice_addresses = address_range(slice.as_bytes());
                     let on_disk_addresses = address_range(self.dirstate_map.on_disk);
                     if on_disk_addresses.contains(slice_addresses.start())
                         && on_disk_addresses.contains(slice_addresses.end())
                     {
                         let offset = slice_addresses.start() - on_disk_addresses.start();
                         Some(offset_from_usize(offset))
                     } else {
                         None
                     }
                 }
                 fn current_offset(&mut self) -> Offset {
                     let mut offset = self.out.len();
                     if self.append {
                         offset += self.dirstate_map.on_disk.len()
                     }
                     offset_from_usize(offset)
                 }
                 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
                     let len = path_len_from_usize(slice.len());
                     // Reuse an already-written path if possible
                     if self.append {
                         if let Some(start) = self.on_disk_offset_of(slice) {
                             return PathSlice { start, len };
                         }
                     }
                     let start = self.current_offset();
                     self.out.extend(slice.as_bytes());
                     PathSlice { start, len }
                 }
             }
             fn offset_from_usize(x: usize) -> Offset {
                 u32::try_from(x)
                     // Could only panic for a dirstate file larger than 4 GiB
                     .expect("dirstate-v2 offset overflow")
                     .into()
             }
             fn child_nodes_len_from_usize(x: usize) -> Size {
                 u32::try_from(x)
                     // Could only panic with over 4 billion nodes
                     .expect("dirstate-v2 slice length overflow")
                     .into()
             }
             fn path_len_from_usize(x: usize) -> PathSize {
                 u16::try_from(x)
                     // Could only panic for paths over 64 KiB
                     .expect("dirstate-v2 path length overflow")
                     .into()
             }