upstream/mercurial-mirror Commit - r48484:852262e2

1

//! The "version 2" disk representation of the dirstate

1

//! The "version 2" disk representation of the dirstate

2

//!

2

//!

3

//! # File format

3

//! # File format

4

//!

4

//!

5

//! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts

5

//! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts

6

//! with a fixed-sized header whose layout is defined by the `DocketHeader`

6

//! with a fixed-sized header whose layout is defined by the `DocketHeader`

7

//! struct, followed by the data file identifier.

7

//! struct, followed by the data file identifier.

8

//!

8

//!

9

//! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That

9

//! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That

10

//! file may be longer than the size given in the docket, but not shorter. Only

10

//! file may be longer than the size given in the docket, but not shorter. Only

11

//! the start of the data file up to the given size is considered. The

11

//! the start of the data file up to the given size is considered. The

12

//! fixed-size "root" of the dirstate tree whose layout is defined by the

12

//! fixed-size "root" of the dirstate tree whose layout is defined by the

13

//! `Root` struct is found at the end of that slice of data.

13

//! `Root` struct is found at the end of that slice of data.

14

//!

14

//!

15

//! Its `root_nodes` field contains the slice (offset and length) to

15

//! Its `root_nodes` field contains the slice (offset and length) to

16

//! the nodes representing the files and directories at the root of the

16

//! the nodes representing the files and directories at the root of the

17

//! repository. Each node is also fixed-size, defined by the `Node` struct.

17

//! repository. Each node is also fixed-size, defined by the `Node` struct.

18

//! Nodes in turn contain slices to variable-size paths, and to their own child

18

//! Nodes in turn contain slices to variable-size paths, and to their own child

19

//! nodes (if any) for nested files and directories.

19

//! nodes (if any) for nested files and directories.

20

21

use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};

21

use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};

22

use crate::dirstate_tree::path_with_basename::WithBasename;

22

use crate::dirstate_tree::path_with_basename::WithBasename;

23

use crate::errors::HgError;

23

use crate::errors::HgError;

24

use crate::utils::hg_path::HgPath;

24

use crate::utils::hg_path::HgPath;

25

use crate::DirstateEntry;

25

use crate::DirstateEntry;

26

use crate::DirstateError;

26

use crate::DirstateError;

27

use crate::DirstateParents;

27

use crate::DirstateParents;

28

use crate::EntryState;

28

use crate::EntryState;

29

use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be};

29

use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be};

30

use bytes_cast::BytesCast;

30

use bytes_cast::BytesCast;

31

use format_bytes::format_bytes;

31

use format_bytes::format_bytes;

32

use std::borrow::Cow;

32

use std::borrow::Cow;

33

use std::convert::{TryFrom, TryInto};

33

use std::convert::{TryFrom, TryInto};

34

use std::time::{Duration, SystemTime, UNIX_EPOCH};

34

use std::time::{Duration, SystemTime, UNIX_EPOCH};

35

36

/// Added at the start of `.hg/dirstate` when the "v2" format is used.

36

/// Added at the start of `.hg/dirstate` when the "v2" format is used.

37

/// This a redundant sanity check more than an actual "magic number" since

37

/// This a redundant sanity check more than an actual "magic number" since

38

/// `.hg/requires` already governs which format should be used.

38

/// `.hg/requires` already governs which format should be used.

39

pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";

39

pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";

40

41

/// Keep space for 256-bit hashes

41

/// Keep space for 256-bit hashes

42

const STORED_NODE_ID_BYTES: usize = 32;

42

const STORED_NODE_ID_BYTES: usize = 32;

43

44

/// … even though only 160 bits are used for now, with SHA-1

44

/// … even though only 160 bits are used for now, with SHA-1

45

const USED_NODE_ID_BYTES: usize = 20;

45

const USED_NODE_ID_BYTES: usize = 20;

46

47

pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;

47

pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;

48

pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];

48

pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];

49

50

/// Must match the constant of the same name in

50

/// Must match the constant of the same name in

51

/// `mercurial/dirstateutils/docket.py`

51

/// `mercurial/dirstateutils/docket.py`

52

const TREE_METADATA_SIZE: usize = 40;

52

const TREE_METADATA_SIZE: usize = 44;

53

54

/// Make sure that size-affecting changes are made knowingly

54

/// Make sure that size-affecting changes are made knowingly

55

#[allow(unused)]

55

#[allow(unused)]

56

fn static_assert_size_of() {

56

fn static_assert_size_of() {

57

let _ = std::mem::transmute::<DocketHeader, [u8; 121]>;

58

let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;

57

let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;

58

let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;

59

let _ = std::mem::transmute::<Node, [u8; 43]>;

59

let _ = std::mem::transmute::<Node, [u8; 43]>;

60

}

60

}

61

62

// Must match `HEADER` in `mercurial/dirstateutils/docket.py`

62

// Must match `HEADER` in `mercurial/dirstateutils/docket.py`

63

#[derive(BytesCast)]

63

#[derive(BytesCast)]

64

#[repr(C)]

64

#[repr(C)]

65

struct DocketHeader {

65

struct DocketHeader {

66

marker: [u8; V2_FORMAT_MARKER.len()],

66

marker: [u8; V2_FORMAT_MARKER.len()],

67

parent_1: [u8; STORED_NODE_ID_BYTES],

67

parent_1: [u8; STORED_NODE_ID_BYTES],

68

parent_2: [u8; STORED_NODE_ID_BYTES],

68

parent_2: [u8; STORED_NODE_ID_BYTES],

69

70

/// Counted in bytes

70

/// Counted in bytes

71

data_size: Size,

71

data_size: Size,

72

73

metadata: TreeMetadata,

73

metadata: TreeMetadata,

74

75

uuid_size: u8,

75

uuid_size: u8,

76

}

76

}

77

78

pub struct Docket<'on_disk> {

78

pub struct Docket<'on_disk> {

79

header: &'on_disk DocketHeader,

79

header: &'on_disk DocketHeader,

80

uuid: &'on_disk [u8],

80

uuid: &'on_disk [u8],

81

}

81

}

82

83

#[derive(BytesCast)]

83

#[derive(BytesCast)]

84

#[repr(C)]

84

#[repr(C)]

85

struct TreeMetadata {

85

struct TreeMetadata {

86

root_nodes: ChildNodes,

86

root_nodes: ChildNodes,

87

nodes_with_entry_count: Size,

87

nodes_with_entry_count: Size,

88

nodes_with_copy_source_count: Size,

88

nodes_with_copy_source_count: Size,

89

90

/// How many bytes of this data file are not used anymore

90

/// How many bytes of this data file are not used anymore

91

unreachable_bytes: Size,

91

unreachable_bytes: Size,

92

93

/// Current version always sets these bytes to zero when creating or

94

/// updating a dirstate. Future versions could assign some bits to signal

95

/// for example "the version that last wrote/updated this dirstate did so

96

/// in such and such way that can be relied on by versions that know to."

97

unused: [u8; 4],

98

93

/// If non-zero, a hash of ignore files that were used for some previous

99

/// If non-zero, a hash of ignore files that were used for some previous

94

/// run of the `status` algorithm.

100

/// run of the `status` algorithm.

95

///

101

///

96

/// We define:

102

/// We define:

97

///

103

///

98

/// * "Root" ignore files are `.hgignore` at the root of the repository if

104

/// * "Root" ignore files are `.hgignore` at the root of the repository if

99

/// it exists, and files from `ui.ignore.*` config. This set of files is

105

/// it exists, and files from `ui.ignore.*` config. This set of files is

100

/// then sorted by the string representation of their path.

106

/// then sorted by the string representation of their path.

101

/// * The "expanded contents" of an ignore files is the byte string made

107

/// * The "expanded contents" of an ignore files is the byte string made

102

/// by concatenating its contents with the "expanded contents" of other

108

/// by concatenating its contents with the "expanded contents" of other

103

/// files included with `include:` or `subinclude:` files, in inclusion

109

/// files included with `include:` or `subinclude:` files, in inclusion

104

/// order. This definition is recursive, as included files can

110

/// order. This definition is recursive, as included files can

105

/// themselves include more files.

111

/// themselves include more files.

106

///

112

///

107

/// This hash is defined as the SHA-1 of the concatenation (in sorted

113

/// This hash is defined as the SHA-1 of the concatenation (in sorted

108

/// order) of the "expanded contents" of each "root" ignore file.

114

/// order) of the "expanded contents" of each "root" ignore file.

109

/// (Note that computing this does not require actually concatenating byte

115

/// (Note that computing this does not require actually concatenating byte

110

/// strings into contiguous memory, instead SHA-1 hashing can be done

116

/// strings into contiguous memory, instead SHA-1 hashing can be done

111

/// incrementally.)

117

/// incrementally.)

112

ignore_patterns_hash: IgnorePatternsHash,

118

ignore_patterns_hash: IgnorePatternsHash,

113

}

119

}

114

120

115

#[derive(BytesCast)]

121

#[derive(BytesCast)]

116

#[repr(C)]

122

#[repr(C)]

117

pub(super) struct Node {

123

pub(super) struct Node {

118

full_path: PathSlice,

124

full_path: PathSlice,

119

125

120

/// In bytes from `self.full_path.start`

126

/// In bytes from `self.full_path.start`

121

base_name_start: PathSize,

127

base_name_start: PathSize,

122

128

123

copy_source: OptPathSlice,

129

copy_source: OptPathSlice,

124

children: ChildNodes,

130

children: ChildNodes,

125

pub(super) descendants_with_entry_count: Size,

131

pub(super) descendants_with_entry_count: Size,

126

pub(super) tracked_descendants_count: Size,

132

pub(super) tracked_descendants_count: Size,

127

133

128

/// Depending on the value of `state`:

134

/// Depending on the value of `state`:

129

///

135

///

130

/// * A null byte: `data` is not used.

136

/// * A null byte: `data` is not used.

131

///

137

///

132

/// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together

138

/// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together

133

/// represent a dirstate entry like in the v1 format.

139

/// represent a dirstate entry like in the v1 format.

134

///

140

///

135

/// * A `d` ASCII byte: the bytes of `data` should instead be interpreted

141

/// * A `d` ASCII byte: the bytes of `data` should instead be interpreted

136

/// as the `Timestamp` for the mtime of a cached directory.

142

/// as the `Timestamp` for the mtime of a cached directory.

137

///

143

///

138

/// The presence of this state means that at some point, this path in

144

/// The presence of this state means that at some point, this path in

139

/// the working directory was observed:

145

/// the working directory was observed:

140

///

146

///

141

/// - To be a directory

147

/// - To be a directory

142

/// - With the modification time as given by `Timestamp`

148

/// - With the modification time as given by `Timestamp`

143

/// - That timestamp was already strictly in the past when observed,

149

/// - That timestamp was already strictly in the past when observed,

144

/// meaning that later changes cannot happen in the same clock tick

150

/// meaning that later changes cannot happen in the same clock tick

145

/// and must cause a different modification time (unless the system

151

/// and must cause a different modification time (unless the system

146

/// clock jumps back and we get unlucky, which is not impossible but

152

/// clock jumps back and we get unlucky, which is not impossible but

147

/// but deemed unlikely enough).

153

/// but deemed unlikely enough).

148

/// - All direct children of this directory (as returned by

154

/// - All direct children of this directory (as returned by

149

/// `std::fs::read_dir`) either have a corresponding dirstate node, or

155

/// `std::fs::read_dir`) either have a corresponding dirstate node, or

150

/// are ignored by ignore patterns whose hash is in

156

/// are ignored by ignore patterns whose hash is in

151

/// `TreeMetadata::ignore_patterns_hash`.

157

/// `TreeMetadata::ignore_patterns_hash`.

152

///

158

///

153

/// This means that if `std::fs::symlink_metadata` later reports the

159

/// This means that if `std::fs::symlink_metadata` later reports the

154

/// same modification time and ignored patterns haven’t changed, a run

160

/// same modification time and ignored patterns haven’t changed, a run

155

/// of status that is not listing ignored files can skip calling

161

/// of status that is not listing ignored files can skip calling

156

/// `std::fs::read_dir` again for this directory, iterate child

162

/// `std::fs::read_dir` again for this directory, iterate child

157

/// dirstate nodes instead.

163

/// dirstate nodes instead.

158

state: u8,

164

state: u8,

159

data: Entry,

165

data: Entry,

160

}

166

}

161

167

162

#[derive(BytesCast, Copy, Clone)]

168

#[derive(BytesCast, Copy, Clone)]

163

#[repr(C)]

169

#[repr(C)]

164

struct Entry {

170

struct Entry {

165

mode: I32Be,

171

mode: I32Be,

166

mtime: I32Be,

172

mtime: I32Be,

167

size: I32Be,

173

size: I32Be,

168

}

174

}

169

175

170

/// Duration since the Unix epoch

176

/// Duration since the Unix epoch

171

#[derive(BytesCast, Copy, Clone, PartialEq)]

177

#[derive(BytesCast, Copy, Clone, PartialEq)]

172

#[repr(C)]

178

#[repr(C)]

173

pub(super) struct Timestamp {

179

pub(super) struct Timestamp {

174

seconds: I64Be,

180

seconds: I64Be,

175

181

176

/// In `0 .. 1_000_000_000`.

182

/// In `0 .. 1_000_000_000`.

177

///

183

///

178

/// This timestamp is later or earlier than `(seconds, 0)` by this many

184

/// This timestamp is later or earlier than `(seconds, 0)` by this many

179

/// nanoseconds, if `seconds` is non-negative or negative, respectively.

185

/// nanoseconds, if `seconds` is non-negative or negative, respectively.

180

nanoseconds: U32Be,

186

nanoseconds: U32Be,

181

}

187

}

182

188

183

/// Counted in bytes from the start of the file

189

/// Counted in bytes from the start of the file

184

///

190

///

185

/// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.

191

/// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.

186

type Offset = U32Be;

192

type Offset = U32Be;

187

193

188

/// Counted in number of items

194

/// Counted in number of items

189

///

195

///

190

/// NOTE: we choose not to support counting more than 4 billion nodes anywhere.

196

/// NOTE: we choose not to support counting more than 4 billion nodes anywhere.

191

type Size = U32Be;

197

type Size = U32Be;

192

198

193

/// Counted in bytes

199

/// Counted in bytes

194

///

200

///

195

/// NOTE: we choose not to support file names/paths longer than 64 KiB.

201

/// NOTE: we choose not to support file names/paths longer than 64 KiB.

196

type PathSize = U16Be;

202

type PathSize = U16Be;

197

203

198

/// A contiguous sequence of `len` times `Node`, representing the child nodes

204

/// A contiguous sequence of `len` times `Node`, representing the child nodes

199

/// of either some other node or of the repository root.

205

/// of either some other node or of the repository root.

200

///

206

///

201

/// Always sorted by ascending `full_path`, to allow binary search.

207

/// Always sorted by ascending `full_path`, to allow binary search.

202

/// Since nodes with the same parent nodes also have the same parent path,

208

/// Since nodes with the same parent nodes also have the same parent path,

203

/// only the `base_name`s need to be compared during binary search.

209

/// only the `base_name`s need to be compared during binary search.

204

#[derive(BytesCast, Copy, Clone)]

210

#[derive(BytesCast, Copy, Clone)]

205

#[repr(C)]

211

#[repr(C)]

206

struct ChildNodes {

212

struct ChildNodes {

207

start: Offset,

213

start: Offset,

208

len: Size,

214

len: Size,

209

}

215

}

210

216

211

/// A `HgPath` of `len` bytes

217

/// A `HgPath` of `len` bytes

212

#[derive(BytesCast, Copy, Clone)]

218

#[derive(BytesCast, Copy, Clone)]

213

#[repr(C)]

219

#[repr(C)]

214

struct PathSlice {

220

struct PathSlice {

215

start: Offset,

221

start: Offset,

216

len: PathSize,

222

len: PathSize,

217

}

223

}

218

224

219

/// Either nothing if `start == 0`, or a `HgPath` of `len` bytes

225

/// Either nothing if `start == 0`, or a `HgPath` of `len` bytes

220

type OptPathSlice = PathSlice;

226

type OptPathSlice = PathSlice;

221

227

222

/// Unexpected file format found in `.hg/dirstate` with the "v2" format.

228

/// Unexpected file format found in `.hg/dirstate` with the "v2" format.

223

///

229

///

224

/// This should only happen if Mercurial is buggy or a repository is corrupted.

230

/// This should only happen if Mercurial is buggy or a repository is corrupted.

225

#[derive(Debug)]

231

#[derive(Debug)]

226

pub struct DirstateV2ParseError;

232

pub struct DirstateV2ParseError;

227

233

228

impl From<DirstateV2ParseError> for HgError {

234

impl From<DirstateV2ParseError> for HgError {

229

fn from(_: DirstateV2ParseError) -> Self {

235

fn from(_: DirstateV2ParseError) -> Self {

230

HgError::corrupted("dirstate-v2 parse error")

236

HgError::corrupted("dirstate-v2 parse error")

231

}

237

}

232

}

238

}

233

239

234

impl From<DirstateV2ParseError> for crate::DirstateError {

240

impl From<DirstateV2ParseError> for crate::DirstateError {

235

fn from(error: DirstateV2ParseError) -> Self {

241

fn from(error: DirstateV2ParseError) -> Self {

236

HgError::from(error).into()

242

HgError::from(error).into()

237

}

243

}

238

}

244

}

239

245

240

impl<'on_disk> Docket<'on_disk> {

246

impl<'on_disk> Docket<'on_disk> {

241

pub fn parents(&self) -> DirstateParents {

247

pub fn parents(&self) -> DirstateParents {

242

use crate::Node;

248

use crate::Node;

243

let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])

249

let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])

244

.unwrap()

250

.unwrap()

245

.clone();

251

.clone();

246

let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])

252

let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])

247

.unwrap()

253

.unwrap()

248

.clone();

254

.clone();

249

DirstateParents { p1, p2 }

255

DirstateParents { p1, p2 }

250

}

256

}

251

257

252

pub fn tree_metadata(&self) -> &[u8] {

258

pub fn tree_metadata(&self) -> &[u8] {

253

self.header.metadata.as_bytes()

259

self.header.metadata.as_bytes()

254

}

260

}

255

261

256

pub fn data_size(&self) -> usize {

262

pub fn data_size(&self) -> usize {

257

// This `unwrap` could only panic on a 16-bit CPU

263

// This `unwrap` could only panic on a 16-bit CPU

258

self.header.data_size.get().try_into().unwrap()

264

self.header.data_size.get().try_into().unwrap()

259

}

265

}

260

266

261

pub fn data_filename(&self) -> String {

267

pub fn data_filename(&self) -> String {

262

String::from_utf8(format_bytes!(b"dirstate.{}.d", self.uuid)).unwrap()

268

String::from_utf8(format_bytes!(b"dirstate.{}.d", self.uuid)).unwrap()

263

}

269

}

264

}

270

}

265

271

266

pub fn read_docket(

272

pub fn read_docket(

267

on_disk: &[u8],

273

on_disk: &[u8],

268

) -> Result<Docket<'_>, DirstateV2ParseError> {

274

) -> Result<Docket<'_>, DirstateV2ParseError> {

269

let (header, uuid) =

275

let (header, uuid) =

270

DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;

276

DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;

271

let uuid_size = header.uuid_size as usize;

277

let uuid_size = header.uuid_size as usize;

272

if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {

278

if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {

273

Ok(Docket { header, uuid })

279

Ok(Docket { header, uuid })

274

} else {

280

} else {

275

Err(DirstateV2ParseError)

281

Err(DirstateV2ParseError)

276

}

282

}

277

}

283

}

278

284

279

pub(super) fn read<'on_disk>(

285

pub(super) fn read<'on_disk>(

280

on_disk: &'on_disk [u8],

286

on_disk: &'on_disk [u8],

281

metadata: &[u8],

287

metadata: &[u8],

282

) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {

288

) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {

283

if on_disk.is_empty() {

289

if on_disk.is_empty() {

284

return Ok(DirstateMap::empty(on_disk));

290

return Ok(DirstateMap::empty(on_disk));

285

}

291

}

286

let (meta, _) = TreeMetadata::from_bytes(metadata)

292

let (meta, _) = TreeMetadata::from_bytes(metadata)

287

.map_err(|_| DirstateV2ParseError)?;

293

.map_err(|_| DirstateV2ParseError)?;

288

let dirstate_map = DirstateMap {

294

let dirstate_map = DirstateMap {

289

on_disk,

295

on_disk,

290

root: dirstate_map::ChildNodes::OnDisk(read_nodes(

296

root: dirstate_map::ChildNodes::OnDisk(read_nodes(

291

on_disk,

297

on_disk,

292

meta.root_nodes,

298

meta.root_nodes,

293

)?),

299

)?),

294

nodes_with_entry_count: meta.nodes_with_entry_count.get(),

300

nodes_with_entry_count: meta.nodes_with_entry_count.get(),

295

nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),

301

nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),

296

ignore_patterns_hash: meta.ignore_patterns_hash,

302

ignore_patterns_hash: meta.ignore_patterns_hash,

297

unreachable_bytes: meta.unreachable_bytes.get(),

303

unreachable_bytes: meta.unreachable_bytes.get(),

298

};

304

};

299

Ok(dirstate_map)

305

Ok(dirstate_map)

300

}

306

}

301

307

302

impl Node {

308

impl Node {

303

pub(super) fn full_path<'on_disk>(

309

pub(super) fn full_path<'on_disk>(

304

&self,

310

&self,

305

on_disk: &'on_disk [u8],

311

on_disk: &'on_disk [u8],

306

) -> Result<&'on_disk HgPath, DirstateV2ParseError> {

312

) -> Result<&'on_disk HgPath, DirstateV2ParseError> {

307

read_hg_path(on_disk, self.full_path)

313

read_hg_path(on_disk, self.full_path)

308

}

314

}

309

315

310

pub(super) fn base_name_start<'on_disk>(

316

pub(super) fn base_name_start<'on_disk>(

311

&self,

317

&self,

312

) -> Result<usize, DirstateV2ParseError> {

318

) -> Result<usize, DirstateV2ParseError> {

313

let start = self.base_name_start.get();

319

let start = self.base_name_start.get();

314

if start < self.full_path.len.get() {

320

if start < self.full_path.len.get() {

315

let start = usize::try_from(start)

321

let start = usize::try_from(start)

316

// u32 -> usize, could only panic on a 16-bit CPU

322

// u32 -> usize, could only panic on a 16-bit CPU

317

.expect("dirstate-v2 base_name_start out of bounds");

323

.expect("dirstate-v2 base_name_start out of bounds");

318

Ok(start)

324

Ok(start)

319

} else {

325

} else {

320

Err(DirstateV2ParseError)

326

Err(DirstateV2ParseError)

321

}

327

}

322

}

328

}

323

329

324

pub(super) fn base_name<'on_disk>(

330

pub(super) fn base_name<'on_disk>(

325

&self,

331

&self,

326

on_disk: &'on_disk [u8],

332

on_disk: &'on_disk [u8],

327

) -> Result<&'on_disk HgPath, DirstateV2ParseError> {

333

) -> Result<&'on_disk HgPath, DirstateV2ParseError> {

328

let full_path = self.full_path(on_disk)?;

334

let full_path = self.full_path(on_disk)?;

329

let base_name_start = self.base_name_start()?;

335

let base_name_start = self.base_name_start()?;

330

Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))

336

Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))

331

}

337

}

332

338

333

pub(super) fn path<'on_disk>(

339

pub(super) fn path<'on_disk>(

334

&self,

340

&self,

335

on_disk: &'on_disk [u8],

341

on_disk: &'on_disk [u8],

336

) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {

342

) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {

337

Ok(WithBasename::from_raw_parts(

343

Ok(WithBasename::from_raw_parts(

338

Cow::Borrowed(self.full_path(on_disk)?),

344

Cow::Borrowed(self.full_path(on_disk)?),

339

self.base_name_start()?,

345

self.base_name_start()?,

340

))

346

))

341

}

347

}

342

348

343

pub(super) fn has_copy_source<'on_disk>(&self) -> bool {

349

pub(super) fn has_copy_source<'on_disk>(&self) -> bool {

344

self.copy_source.start.get() != 0

350

self.copy_source.start.get() != 0

345

}

351

}

346

352

347

pub(super) fn copy_source<'on_disk>(

353

pub(super) fn copy_source<'on_disk>(

348

&self,

354

&self,

349

on_disk: &'on_disk [u8],

355

on_disk: &'on_disk [u8],

350

) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {

356

) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {

351

Ok(if self.has_copy_source() {

357

Ok(if self.has_copy_source() {

352

Some(read_hg_path(on_disk, self.copy_source)?)

358

Some(read_hg_path(on_disk, self.copy_source)?)

353

} else {

359

} else {

354

None

360

None

355

})

361

})

356

}

362

}

357

363

358

pub(super) fn node_data(

364

pub(super) fn node_data(

359

&self,

365

&self,

360

) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {

366

) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {

361

let entry = |state| {

367

let entry = |state| {

362

dirstate_map::NodeData::Entry(self.entry_with_given_state(state))

368

dirstate_map::NodeData::Entry(self.entry_with_given_state(state))

363

};

369

};

364

370

365

match self.state {

371

match self.state {

366

b'\0' => Ok(dirstate_map::NodeData::None),

372

b'\0' => Ok(dirstate_map::NodeData::None),

367

b'd' => Ok(dirstate_map::NodeData::CachedDirectory {

373

b'd' => Ok(dirstate_map::NodeData::CachedDirectory {

368

mtime: *self.data.as_timestamp(),

374

mtime: *self.data.as_timestamp(),

369

}),

375

}),

370

b'n' => Ok(entry(EntryState::Normal)),

376

b'n' => Ok(entry(EntryState::Normal)),

371

b'a' => Ok(entry(EntryState::Added)),

377

b'a' => Ok(entry(EntryState::Added)),

372

b'r' => Ok(entry(EntryState::Removed)),

378

b'r' => Ok(entry(EntryState::Removed)),

373

b'm' => Ok(entry(EntryState::Merged)),

379

b'm' => Ok(entry(EntryState::Merged)),

374

_ => Err(DirstateV2ParseError),

380

_ => Err(DirstateV2ParseError),

375

}

381

}

376

}

382

}

377

383

378

pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {

384

pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {

379

if self.state == b'd' {

385

if self.state == b'd' {

380

Some(self.data.as_timestamp())

386

Some(self.data.as_timestamp())

381

} else {

387

} else {

382

None

388

None

383

}

389

}

384

}

390

}

385

391

386

pub(super) fn state(

392

pub(super) fn state(

387

&self,

393

&self,

388

) -> Result<Option<EntryState>, DirstateV2ParseError> {

394

) -> Result<Option<EntryState>, DirstateV2ParseError> {

389

match self.state {

395

match self.state {

390

b'\0' | b'd' => Ok(None),

396

b'\0' | b'd' => Ok(None),

391

b'n' => Ok(Some(EntryState::Normal)),

397

b'n' => Ok(Some(EntryState::Normal)),

392

b'a' => Ok(Some(EntryState::Added)),

398

b'a' => Ok(Some(EntryState::Added)),

393

b'r' => Ok(Some(EntryState::Removed)),

399

b'r' => Ok(Some(EntryState::Removed)),

394

b'm' => Ok(Some(EntryState::Merged)),

400

b'm' => Ok(Some(EntryState::Merged)),

395

_ => Err(DirstateV2ParseError),

401

_ => Err(DirstateV2ParseError),

396

}

402

}

397

}

403

}

398

404

399

fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry {

405

fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry {

400

DirstateEntry {

406

DirstateEntry {

401

state,

407

state,

402

mode: self.data.mode.get(),

408

mode: self.data.mode.get(),

403

mtime: self.data.mtime.get(),

409

mtime: self.data.mtime.get(),

404

size: self.data.size.get(),

410

size: self.data.size.get(),

405

}

411

}

406

}

412

}

407

413

408

pub(super) fn entry(

414

pub(super) fn entry(

409

&self,

415

&self,

410

) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {

416

) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {

411

Ok(self

417

Ok(self

412

.state()?

418

.state()?

413

.map(|state| self.entry_with_given_state(state)))

419

.map(|state| self.entry_with_given_state(state)))

414

}

420

}

415

421

416

pub(super) fn children<'on_disk>(

422

pub(super) fn children<'on_disk>(

417

&self,

423

&self,

418

on_disk: &'on_disk [u8],

424

on_disk: &'on_disk [u8],

419

) -> Result<&'on_disk [Node], DirstateV2ParseError> {

425

) -> Result<&'on_disk [Node], DirstateV2ParseError> {

420

read_nodes(on_disk, self.children)

426

read_nodes(on_disk, self.children)

421

}

427

}

422

428

423

pub(super) fn to_in_memory_node<'on_disk>(

429

pub(super) fn to_in_memory_node<'on_disk>(

424

&self,

430

&self,

425

on_disk: &'on_disk [u8],

431

on_disk: &'on_disk [u8],

426

) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {

432

) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {

427

Ok(dirstate_map::Node {

433

Ok(dirstate_map::Node {

428

children: dirstate_map::ChildNodes::OnDisk(

434

children: dirstate_map::ChildNodes::OnDisk(

429

self.children(on_disk)?,

435

self.children(on_disk)?,

430

),

436

),

431

copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),

437

copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),

432

data: self.node_data()?,

438

data: self.node_data()?,

433

descendants_with_entry_count: self

439

descendants_with_entry_count: self

434

.descendants_with_entry_count

440

.descendants_with_entry_count

435

.get(),

441

.get(),

436

tracked_descendants_count: self.tracked_descendants_count.get(),

442

tracked_descendants_count: self.tracked_descendants_count.get(),

437

})

443

})

438

}

444

}

439

}

445

}

440

446

441

impl Entry {

447

impl Entry {

442

fn from_timestamp(timestamp: Timestamp) -> Self {

448

fn from_timestamp(timestamp: Timestamp) -> Self {

443

// Safety: both types implement the `ByteCast` trait, so we could

449

// Safety: both types implement the `ByteCast` trait, so we could

444

// safely use `as_bytes` and `from_bytes` to do this conversion. Using

450

// safely use `as_bytes` and `from_bytes` to do this conversion. Using

445

// `transmute` instead makes the compiler check that the two types

451

// `transmute` instead makes the compiler check that the two types

446

// have the same size, which eliminates the error case of

452

// have the same size, which eliminates the error case of

447

// `from_bytes`.

453

// `from_bytes`.

448

unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }

454

unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }

449

}

455

}

450

456

451

fn as_timestamp(&self) -> &Timestamp {

457

fn as_timestamp(&self) -> &Timestamp {

452

// Safety: same as above in `from_timestamp`

458

// Safety: same as above in `from_timestamp`

453

unsafe { &*(self as *const Entry as *const Timestamp) }

459

unsafe { &*(self as *const Entry as *const Timestamp) }

454

}

460

}

455

}

461

}

456

462

457

impl Timestamp {

463

impl Timestamp {

458

pub fn seconds(&self) -> i64 {

464

pub fn seconds(&self) -> i64 {

459

self.seconds.get()

465

self.seconds.get()

460

}

466

}

461

}

467

}

462

468

463

impl From<SystemTime> for Timestamp {

469

impl From<SystemTime> for Timestamp {

464

fn from(system_time: SystemTime) -> Self {

470

fn from(system_time: SystemTime) -> Self {

465

let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {

471

let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {

466

Ok(duration) => {

472

Ok(duration) => {

467

(duration.as_secs() as i64, duration.subsec_nanos())

473

(duration.as_secs() as i64, duration.subsec_nanos())

468

}

474

}

469

Err(error) => {

475

Err(error) => {

470

let negative = error.duration();

476

let negative = error.duration();

471

(-(negative.as_secs() as i64), negative.subsec_nanos())

477

(-(negative.as_secs() as i64), negative.subsec_nanos())

472

}

478

}

473

};

479

};

474

Timestamp {

480

Timestamp {

475

seconds: secs.into(),

481

seconds: secs.into(),

476

nanoseconds: nanos.into(),

482

nanoseconds: nanos.into(),

477

}

483

}

478

}

484

}

479

}

485

}

480

486

481

impl From<&'_ Timestamp> for SystemTime {

487

impl From<&'_ Timestamp> for SystemTime {

482

fn from(timestamp: &'_ Timestamp) -> Self {

488

fn from(timestamp: &'_ Timestamp) -> Self {

483

let secs = timestamp.seconds.get();

489

let secs = timestamp.seconds.get();

484

let nanos = timestamp.nanoseconds.get();

490

let nanos = timestamp.nanoseconds.get();

485

if secs >= 0 {

491

if secs >= 0 {

486

UNIX_EPOCH + Duration::new(secs as u64, nanos)

492

UNIX_EPOCH + Duration::new(secs as u64, nanos)

487

} else {

493

} else {

488

UNIX_EPOCH - Duration::new((-secs) as u64, nanos)

494

UNIX_EPOCH - Duration::new((-secs) as u64, nanos)

489

}

495

}

490

}

496

}

491

}

497

}

492

498

493

fn read_hg_path(

499

fn read_hg_path(

494

on_disk: &[u8],

500

on_disk: &[u8],

495

slice: PathSlice,

501

slice: PathSlice,

496

) -> Result<&HgPath, DirstateV2ParseError> {

502

) -> Result<&HgPath, DirstateV2ParseError> {

497

read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)

503

read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)

498

}

504

}

499

505

500

fn read_nodes(

506

fn read_nodes(

501

on_disk: &[u8],

507

on_disk: &[u8],

502

slice: ChildNodes,

508

slice: ChildNodes,

503

) -> Result<&[Node], DirstateV2ParseError> {

509

) -> Result<&[Node], DirstateV2ParseError> {

504

read_slice(on_disk, slice.start, slice.len.get())

510

read_slice(on_disk, slice.start, slice.len.get())

505

}

511

}

506

512

507

fn read_slice<T, Len>(

513

fn read_slice<T, Len>(

508

on_disk: &[u8],

514

on_disk: &[u8],

509

start: Offset,

515

start: Offset,

510

len: Len,

516

len: Len,

511

) -> Result<&[T], DirstateV2ParseError>

517

) -> Result<&[T], DirstateV2ParseError>

512

where

518

where

513

T: BytesCast,

519

T: BytesCast,

514

Len: TryInto<usize>,

520

Len: TryInto<usize>,

515

{

521

{

516

// Either `usize::MAX` would result in "out of bounds" error since a single

522

// Either `usize::MAX` would result in "out of bounds" error since a single

517

// `&[u8]` cannot occupy the entire addess space.

523

// `&[u8]` cannot occupy the entire addess space.

518

let start = start.get().try_into().unwrap_or(std::usize::MAX);

524

let start = start.get().try_into().unwrap_or(std::usize::MAX);

519

let len = len.try_into().unwrap_or(std::usize::MAX);

525

let len = len.try_into().unwrap_or(std::usize::MAX);

520

on_disk

526

on_disk

521

.get(start..)

527

.get(start..)

522

.and_then(|bytes| T::slice_from_bytes(bytes, len).ok())

528

.and_then(|bytes| T::slice_from_bytes(bytes, len).ok())

523

.map(|(slice, _rest)| slice)

529

.map(|(slice, _rest)| slice)

524

.ok_or_else(|| DirstateV2ParseError)

530

.ok_or_else(|| DirstateV2ParseError)

525

}

531

}

526

532

527

pub(crate) fn for_each_tracked_path<'on_disk>(

533

pub(crate) fn for_each_tracked_path<'on_disk>(

528

on_disk: &'on_disk [u8],

534

on_disk: &'on_disk [u8],

529

metadata: &[u8],

535

metadata: &[u8],

530

mut f: impl FnMut(&'on_disk HgPath),

536

mut f: impl FnMut(&'on_disk HgPath),

531

) -> Result<(), DirstateV2ParseError> {

537

) -> Result<(), DirstateV2ParseError> {

532

let (meta, _) = TreeMetadata::from_bytes(metadata)

538

let (meta, _) = TreeMetadata::from_bytes(metadata)

533

.map_err(|_| DirstateV2ParseError)?;

539

.map_err(|_| DirstateV2ParseError)?;

534

fn recur<'on_disk>(

540

fn recur<'on_disk>(

535

on_disk: &'on_disk [u8],

541

on_disk: &'on_disk [u8],

536

nodes: ChildNodes,

542

nodes: ChildNodes,

537

f: &mut impl FnMut(&'on_disk HgPath),

543

f: &mut impl FnMut(&'on_disk HgPath),

538

) -> Result<(), DirstateV2ParseError> {

544

) -> Result<(), DirstateV2ParseError> {

539

for node in read_nodes(on_disk, nodes)? {

545

for node in read_nodes(on_disk, nodes)? {

540

if let Some(state) = node.state()? {

546

if let Some(state) = node.state()? {

541

if state.is_tracked() {

547

if state.is_tracked() {

542

f(node.full_path(on_disk)?)

548

f(node.full_path(on_disk)?)

543

}

549

}

544

}

550

}

545

recur(on_disk, node.children, f)?

551

recur(on_disk, node.children, f)?

546

}

552

}

547

Ok(())

553

Ok(())

548

}

554

}

549

recur(on_disk, meta.root_nodes, &mut f)

555

recur(on_disk, meta.root_nodes, &mut f)

550

}

556

}

551

557

552

/// Returns new data and metadata, together with whether that data should be

558

/// Returns new data and metadata, together with whether that data should be

553

/// appended to the existing data file whose content is at

559

/// appended to the existing data file whose content is at

554

/// `dirstate_map.on_disk` (true), instead of written to a new data file

560

/// `dirstate_map.on_disk` (true), instead of written to a new data file

555

/// (false).

561

/// (false).

556

pub(super) fn write(

562

pub(super) fn write(

557

dirstate_map: &mut DirstateMap,

563

dirstate_map: &mut DirstateMap,

558

can_append: bool,

564

can_append: bool,

559

) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {

565

) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {

560

let append = can_append && dirstate_map.write_should_append();

566

let append = can_append && dirstate_map.write_should_append();

561

567

562

// This ignores the space for paths, and for nodes without an entry.

568

// This ignores the space for paths, and for nodes without an entry.

563

// TODO: better estimate? Skip the `Vec` and write to a file directly?

569

// TODO: better estimate? Skip the `Vec` and write to a file directly?

564

let size_guess = std::mem::size_of::<Node>()

570

let size_guess = std::mem::size_of::<Node>()

565

* dirstate_map.nodes_with_entry_count as usize;

571

* dirstate_map.nodes_with_entry_count as usize;

566

572

567

let mut writer = Writer {

573

let mut writer = Writer {

568

dirstate_map,

574

dirstate_map,

569

append,

575

append,

570

out: Vec::with_capacity(size_guess),

576

out: Vec::with_capacity(size_guess),

571

};

577

};

572

578

573

let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;

579

let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;

574

580

575

let meta = TreeMetadata {

581

let meta = TreeMetadata {

576

root_nodes,

582

root_nodes,

577

nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),

583

nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),

578

nodes_with_copy_source_count: dirstate_map

584

nodes_with_copy_source_count: dirstate_map

579

.nodes_with_copy_source_count

585

.nodes_with_copy_source_count

580

.into(),

586

.into(),

581

unreachable_bytes: dirstate_map.unreachable_bytes.into(),

587

unreachable_bytes: dirstate_map.unreachable_bytes.into(),

588

unused: [0; 4],

582

ignore_patterns_hash: dirstate_map.ignore_patterns_hash,

589

ignore_patterns_hash: dirstate_map.ignore_patterns_hash,

583

};

590

};

584

Ok((writer.out, meta.as_bytes().to_vec(), append))

591

Ok((writer.out, meta.as_bytes().to_vec(), append))

585

}

592

}

586

593

587

struct Writer<'dmap, 'on_disk> {

594

struct Writer<'dmap, 'on_disk> {

588

dirstate_map: &'dmap DirstateMap<'on_disk>,

595

dirstate_map: &'dmap DirstateMap<'on_disk>,

589

append: bool,

596

append: bool,

590

out: Vec<u8>,

597

out: Vec<u8>,

591

}

598

}

592

599

593

impl Writer<'_, '_> {

600

impl Writer<'_, '_> {

594

fn write_nodes(

601

fn write_nodes(

595

&mut self,

602

&mut self,

596

nodes: dirstate_map::ChildNodesRef,

603

nodes: dirstate_map::ChildNodesRef,

597

) -> Result<ChildNodes, DirstateError> {

604

) -> Result<ChildNodes, DirstateError> {

598

// Reuse already-written nodes if possible

605

// Reuse already-written nodes if possible

599

if self.append {

606

if self.append {

600

if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {

607

if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {

601

let start = self.on_disk_offset_of(nodes_slice).expect(

608

let start = self.on_disk_offset_of(nodes_slice).expect(

602

"dirstate-v2 OnDisk nodes not found within on_disk",

609

"dirstate-v2 OnDisk nodes not found within on_disk",

603

);

610

);

604

let len = child_nodes_len_from_usize(nodes_slice.len());

611

let len = child_nodes_len_from_usize(nodes_slice.len());

605

return Ok(ChildNodes { start, len });

612

return Ok(ChildNodes { start, len });

606

}

613

}

607

}

614

}

608

615

609

// `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has

616

// `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has

610

// undefined iteration order. Sort to enable binary search in the

617

// undefined iteration order. Sort to enable binary search in the

611

// written file.

618

// written file.

612

let nodes = nodes.sorted();

619

let nodes = nodes.sorted();

613

let nodes_len = nodes.len();

620

let nodes_len = nodes.len();

614

621

615

// First accumulate serialized nodes in a `Vec`

622

// First accumulate serialized nodes in a `Vec`

616

let mut on_disk_nodes = Vec::with_capacity(nodes_len);

623

let mut on_disk_nodes = Vec::with_capacity(nodes_len);

617

for node in nodes {

624

for node in nodes {

618

let children =

625

let children =

619

self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;

626

self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;

620

let full_path = node.full_path(self.dirstate_map.on_disk)?;

627

let full_path = node.full_path(self.dirstate_map.on_disk)?;

621

let full_path = self.write_path(full_path.as_bytes());

628

let full_path = self.write_path(full_path.as_bytes());

622

let copy_source = if let Some(source) =

629

let copy_source = if let Some(source) =

623

node.copy_source(self.dirstate_map.on_disk)?

630

node.copy_source(self.dirstate_map.on_disk)?

624

{

631

{

625

self.write_path(source.as_bytes())

632

self.write_path(source.as_bytes())

626

} else {

633

} else {

627

PathSlice {

634

PathSlice {

628

start: 0.into(),

635

start: 0.into(),

629

len: 0.into(),

636

len: 0.into(),

630

}

637

}

631

};

638

};

632

on_disk_nodes.push(match node {

639

on_disk_nodes.push(match node {

633

NodeRef::InMemory(path, node) => {

640

NodeRef::InMemory(path, node) => {

634

let (state, data) = match &node.data {

641

let (state, data) = match &node.data {

635

dirstate_map::NodeData::Entry(entry) => (

642

dirstate_map::NodeData::Entry(entry) => (

636

entry.state.into(),

643

entry.state.into(),

637

Entry {

644

Entry {

638

mode: entry.mode.into(),

645

mode: entry.mode.into(),

639

mtime: entry.mtime.into(),

646

mtime: entry.mtime.into(),

640

size: entry.size.into(),

647

size: entry.size.into(),

641

},

648

},

642

),

649

),

643

dirstate_map::NodeData::CachedDirectory { mtime } => {

650

dirstate_map::NodeData::CachedDirectory { mtime } => {

644

(b'd', Entry::from_timestamp(*mtime))

651

(b'd', Entry::from_timestamp(*mtime))

645

}

652

}

646

dirstate_map::NodeData::None => (

653

dirstate_map::NodeData::None => (

647

b'\0',

654

b'\0',

648

Entry {

655

Entry {

649

mode: 0.into(),

656

mode: 0.into(),

650

mtime: 0.into(),

657

mtime: 0.into(),

651

size: 0.into(),

658

size: 0.into(),

652

},

659

},

653

),

660

),

654

};

661

};

655

Node {

662

Node {

656

children,

663

children,

657

copy_source,

664

copy_source,

658

full_path,

665

full_path,

659

base_name_start: u16::try_from(path.base_name_start())

666

base_name_start: u16::try_from(path.base_name_start())

660

// Could only panic for paths over 64 KiB

667

// Could only panic for paths over 64 KiB

661

.expect("dirstate-v2 path length overflow")

668

.expect("dirstate-v2 path length overflow")

662

.into(),

669

.into(),

663

descendants_with_entry_count: node

670

descendants_with_entry_count: node

664

.descendants_with_entry_count

671

.descendants_with_entry_count

665

.into(),

672

.into(),

666

tracked_descendants_count: node

673

tracked_descendants_count: node

667

.tracked_descendants_count

674

.tracked_descendants_count

668

.into(),

675

.into(),

669

state,

676

state,

670

data,

677

data,

671

}

678

}

672

}

679

}

673

NodeRef::OnDisk(node) => Node {

680

NodeRef::OnDisk(node) => Node {

674

children,

681

children,

675

copy_source,

682

copy_source,

676

full_path,

683

full_path,

677

..*node

684

..*node

678

},

685

},

679

})

686

})

680

}

687

}

681

// … so we can write them contiguously, after writing everything else

688

// … so we can write them contiguously, after writing everything else

682

// they refer to.

689

// they refer to.

683

let start = self.current_offset();

690

let start = self.current_offset();

684

let len = child_nodes_len_from_usize(nodes_len);

691

let len = child_nodes_len_from_usize(nodes_len);

685

self.out.extend(on_disk_nodes.as_bytes());

692

self.out.extend(on_disk_nodes.as_bytes());

686

Ok(ChildNodes { start, len })

693

Ok(ChildNodes { start, len })

687

}

694

}

688

695

689

/// If the given slice of items is within `on_disk`, returns its offset

696

/// If the given slice of items is within `on_disk`, returns its offset

690

/// from the start of `on_disk`.

697

/// from the start of `on_disk`.

691

fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>

698

fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>

692

where

699

where

693

T: BytesCast,

700

T: BytesCast,

694

{

701

{

695

fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {

702

fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {

696

let start = slice.as_ptr() as usize;

703

let start = slice.as_ptr() as usize;

697

let end = start + slice.len();

704

let end = start + slice.len();

698

start..=end

705

start..=end

699

}

706

}

700

let slice_addresses = address_range(slice.as_bytes());

707

let slice_addresses = address_range(slice.as_bytes());

701

let on_disk_addresses = address_range(self.dirstate_map.on_disk);

708

let on_disk_addresses = address_range(self.dirstate_map.on_disk);

702

if on_disk_addresses.contains(slice_addresses.start())

709

if on_disk_addresses.contains(slice_addresses.start())

703

&& on_disk_addresses.contains(slice_addresses.end())

710

&& on_disk_addresses.contains(slice_addresses.end())

704

{

711

{

705

let offset = slice_addresses.start() - on_disk_addresses.start();

712

let offset = slice_addresses.start() - on_disk_addresses.start();

706

Some(offset_from_usize(offset))

713

Some(offset_from_usize(offset))

707

} else {

714

} else {

708

None

715

None

709

}

716

}

710

}

717

}

711

718

712

fn current_offset(&mut self) -> Offset {

719

fn current_offset(&mut self) -> Offset {

713

let mut offset = self.out.len();

720

let mut offset = self.out.len();

714

if self.append {

721

if self.append {

715

offset += self.dirstate_map.on_disk.len()

722

offset += self.dirstate_map.on_disk.len()

716

}

723

}

717

offset_from_usize(offset)

724

offset_from_usize(offset)

718

}

725

}

719

726

720

fn write_path(&mut self, slice: &[u8]) -> PathSlice {

727

fn write_path(&mut self, slice: &[u8]) -> PathSlice {

721

let len = path_len_from_usize(slice.len());

728

let len = path_len_from_usize(slice.len());

722

// Reuse an already-written path if possible

729

// Reuse an already-written path if possible

723

if self.append {

730

if self.append {

724

if let Some(start) = self.on_disk_offset_of(slice) {

731

if let Some(start) = self.on_disk_offset_of(slice) {

725

return PathSlice { start, len };

732

return PathSlice { start, len };

726

}

733

}

727

}

734

}

728

let start = self.current_offset();

735

let start = self.current_offset();

729

self.out.extend(slice.as_bytes());

736

self.out.extend(slice.as_bytes());

730

PathSlice { start, len }

737

PathSlice { start, len }

731

}

738

}

732

}

739

}

733

740

734

fn offset_from_usize(x: usize) -> Offset {

741

fn offset_from_usize(x: usize) -> Offset {

735

u32::try_from(x)

742

u32::try_from(x)

736

// Could only panic for a dirstate file larger than 4 GiB

743

// Could only panic for a dirstate file larger than 4 GiB

737

.expect("dirstate-v2 offset overflow")

744

.expect("dirstate-v2 offset overflow")

738

.into()

745

.into()

739

}

746

}

740

747

741

fn child_nodes_len_from_usize(x: usize) -> Size {

748

fn child_nodes_len_from_usize(x: usize) -> Size {

742

u32::try_from(x)

749

u32::try_from(x)

743

// Could only panic with over 4 billion nodes

750

// Could only panic with over 4 billion nodes

744

.expect("dirstate-v2 slice length overflow")

751

.expect("dirstate-v2 slice length overflow")

745

.into()

752

.into()

746

}

753

}

747

754

748

fn path_len_from_usize(x: usize) -> PathSize {

755

fn path_len_from_usize(x: usize) -> PathSize {

749

u16::try_from(x)

756

u16::try_from(x)

750

// Could only panic for paths over 64 KiB

757

// Could only panic for paths over 64 KiB

751

.expect("dirstate-v2 path length overflow")

758

.expect("dirstate-v2 path length overflow")

752

.into()

759

.into()

753

}

760

}

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # dirstatedocket.py - docket file for dirstate-v2
             #
             # Copyright Mercurial Contributors
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import struct
             from ..revlogutils import docket as docket_mod
             V2_FORMAT_MARKER = b"dirstate-v2\n"
             # Must match the constant of the same name in
             # `rust/hg-core/src/dirstate_tree/on_disk.rs`
-            TREE_METADATA_SIZE = 40
+            TREE_METADATA_SIZE = 44
             # * 12 bytes: format marker
             # * 32 bytes: node ID of the working directory's first parent
             # * 32 bytes: node ID of the working directory's second parent
             # * 4 bytes: big-endian used size of the data file
             # * {TREE_METADATA_SIZE} bytes: tree metadata, parsed separately
             # * 1 byte: length of the data file's UUID
             # * variable: data file's UUID
             #
             # Node IDs are null-padded if shorter than 32 bytes.
             # A data file shorter than the specified used size is corrupted (truncated)
             HEADER = struct.Struct(
                 ">{}s32s32sL{}sB".format(len(V2_FORMAT_MARKER), TREE_METADATA_SIZE)
             )
             class DirstateDocket(object):
                 data_filename_pattern = b'dirstate.%s.d'
                 def __init__(self, parents, data_size, tree_metadata, uuid):
                     self.parents = parents
                     self.data_size = data_size
                     self.tree_metadata = tree_metadata
                     self.uuid = uuid
                 @classmethod
                 def with_new_uuid(cls, parents, data_size, tree_metadata):
                     return cls(parents, data_size, tree_metadata, docket_mod.make_uid())
                 @classmethod
                 def parse(cls, data, nodeconstants):
                     if not data:
                         parents = (nodeconstants.nullid, nodeconstants.nullid)
                         return cls(parents, 0, b'', None)
                     marker, p1, p2, data_size, meta, uuid_size = HEADER.unpack_from(data)
                     if marker != V2_FORMAT_MARKER:
                         raise ValueError("expected dirstate-v2 marker")
                     uuid = data[HEADER.size : HEADER.size + uuid_size]
                     p1 = p1[: nodeconstants.nodelen]
                     p2 = p2[: nodeconstants.nodelen]
                     return cls((p1, p2), data_size, meta, uuid)
                 def serialize(self):
                     p1, p2 = self.parents
                     header = HEADER.pack(
                         V2_FORMAT_MARKER,
                         p1,
                         p2,
                         self.data_size,
                         self.tree_metadata,
                         len(self.uuid),
                     )
                     return header + self.uuid
                 def data_filename(self):
                     return self.data_filename_pattern % self.uuid

             //! The "version 2" disk representation of the dirstate
             //!
             //! # File format
             //!
             //! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts
             //! with a fixed-sized header whose layout is defined by the `DocketHeader`
             //! struct, followed by the data file identifier.
             //!
             //! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That
             //! file may be longer than the size given in the docket, but not shorter. Only
             //! the start of the data file up to the given size is considered. The
             //! fixed-size "root" of the dirstate tree whose layout is defined by the
             //! `Root` struct is found at the end of that slice of data.
             //!
             //! Its `root_nodes` field contains the slice (offset and length) to
             //! the nodes representing the files and directories at the root of the
             //! repository. Each node is also fixed-size, defined by the `Node` struct.
             //! Nodes in turn contain slices to variable-size paths, and to their own child
             //! nodes (if any) for nested files and directories.
             use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
             use crate::dirstate_tree::path_with_basename::WithBasename;
             use crate::errors::HgError;
             use crate::utils::hg_path::HgPath;
             use crate::DirstateEntry;
             use crate::DirstateError;
             use crate::DirstateParents;
             use crate::EntryState;
             use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be};
             use bytes_cast::BytesCast;
             use format_bytes::format_bytes;
             use std::borrow::Cow;
             use std::convert::{TryFrom, TryInto};
             use std::time::{Duration, SystemTime, UNIX_EPOCH};
             /// Added at the start of `.hg/dirstate` when the "v2" format is used.
             /// This a redundant sanity check more than an actual "magic number" since
             /// `.hg/requires` already governs which format should be used.
             pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
             /// Keep space for 256-bit hashes
             const STORED_NODE_ID_BYTES: usize = 32;
             /// … even though only 160 bits are used for now, with SHA-1
             const USED_NODE_ID_BYTES: usize = 20;
             pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
             pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
             /// Must match the constant of the same name in
             /// `mercurial/dirstateutils/docket.py`
-            const TREE_METADATA_SIZE: usize = 40;
+            const TREE_METADATA_SIZE: usize = 44;
             /// Make sure that size-affecting changes are made knowingly
             #[allow(unused)]
             fn static_assert_size_of() {
-                let _ = std::mem::transmute::<DocketHeader, [u8; 121]>;
                 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
+                let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
                 let _ = std::mem::transmute::<Node, [u8; 43]>;
             }
             // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
             #[derive(BytesCast)]
             #[repr(C)]
             struct DocketHeader {
                 marker: [u8; V2_FORMAT_MARKER.len()],
                 parent_1: [u8; STORED_NODE_ID_BYTES],
                 parent_2: [u8; STORED_NODE_ID_BYTES],
                 /// Counted in bytes
                 data_size: Size,
                 metadata: TreeMetadata,
                 uuid_size: u8,
             }
             pub struct Docket<'on_disk> {
                 header: &'on_disk DocketHeader,
                 uuid: &'on_disk [u8],
             }
             #[derive(BytesCast)]
             #[repr(C)]
             struct TreeMetadata {
                 root_nodes: ChildNodes,
                 nodes_with_entry_count: Size,
                 nodes_with_copy_source_count: Size,
                 /// How many bytes of this data file are not used anymore
                 unreachable_bytes: Size,
+                /// Current version always sets these bytes to zero when creating or
+                /// updating a dirstate. Future versions could assign some bits to signal
+                /// for example "the version that last wrote/updated this dirstate did so
+                /// in such and such way that can be relied on by versions that know to."
+                unused: [u8; 4],
                 /// If non-zero, a hash of ignore files that were used for some previous
                 /// run of the `status` algorithm.
                 ///
                 /// We define:
                 ///
                 /// * "Root" ignore files are `.hgignore` at the root of the repository if
                 ///   it exists, and files from `ui.ignore.*` config. This set of files is
                 ///   then sorted by the string representation of their path.
                 /// * The "expanded contents" of an ignore files is the byte string made
                 ///   by concatenating its contents with the "expanded contents" of other
                 ///   files included with `include:` or `subinclude:` files, in inclusion
                 ///   order. This definition is recursive, as included files can
                 ///   themselves include more files.
                 ///
                 /// This hash is defined as the SHA-1 of the concatenation (in sorted
                 /// order) of the "expanded contents" of each "root" ignore file.
                 /// (Note that computing this does not require actually concatenating byte
                 /// strings into contiguous memory, instead SHA-1 hashing can be done
                 /// incrementally.)
                 ignore_patterns_hash: IgnorePatternsHash,
             }
             #[derive(BytesCast)]
             #[repr(C)]
             pub(super) struct Node {
                 full_path: PathSlice,
                 /// In bytes from `self.full_path.start`
                 base_name_start: PathSize,
                 copy_source: OptPathSlice,
                 children: ChildNodes,
                 pub(super) descendants_with_entry_count: Size,
                 pub(super) tracked_descendants_count: Size,
                 /// Depending on the value of `state`:
                 ///
                 /// * A null byte: `data` is not used.
                 ///
                 /// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together
                 ///   represent a dirstate entry like in the v1 format.
                 ///
                 /// * A `d` ASCII byte: the bytes of `data` should instead be interpreted
                 ///   as the `Timestamp` for the mtime of a cached directory.
                 ///
                 ///   The presence of this state means that at some point, this path in
                 ///   the working directory was observed:
                 ///
                 ///   - To be a directory
                 ///   - With the modification time as given by `Timestamp`
                 ///   - That timestamp was already strictly in the past when observed,
                 ///     meaning that later changes cannot happen in the same clock tick
                 ///     and must cause a different modification time (unless the system
                 ///     clock jumps back and we get unlucky, which is not impossible but
                 ///     but deemed unlikely enough).
                 ///   - All direct children of this directory (as returned by
                 ///     `std::fs::read_dir`) either have a corresponding dirstate node, or
                 ///     are ignored by ignore patterns whose hash is in
                 ///     `TreeMetadata::ignore_patterns_hash`.
                 ///
                 ///   This means that if `std::fs::symlink_metadata` later reports the
                 ///   same modification time and ignored patterns haven’t changed, a run
                 ///   of status that is not listing ignored   files can skip calling
                 ///   `std::fs::read_dir` again for this directory,   iterate child
                 ///   dirstate nodes instead.
                 state: u8,
                 data: Entry,
             }
             #[derive(BytesCast, Copy, Clone)]
             #[repr(C)]
             struct Entry {
                 mode: I32Be,
                 mtime: I32Be,
                 size: I32Be,
             }
             /// Duration since the Unix epoch
             #[derive(BytesCast, Copy, Clone, PartialEq)]
             #[repr(C)]
             pub(super) struct Timestamp {
                 seconds: I64Be,
                 /// In `0 .. 1_000_000_000`.
                 ///
                 /// This timestamp is later or earlier than `(seconds, 0)` by this many
                 /// nanoseconds, if `seconds` is non-negative or negative, respectively.
                 nanoseconds: U32Be,
             }
             /// Counted in bytes from the start of the file
             ///
             /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
             type Offset = U32Be;
             /// Counted in number of items
             ///
             /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
             type Size = U32Be;
             /// Counted in bytes
             ///
             /// NOTE: we choose not to support file names/paths longer than 64 KiB.
             type PathSize = U16Be;
             /// A contiguous sequence of `len` times `Node`, representing the child nodes
             /// of either some other node or of the repository root.
             ///
             /// Always sorted by ascending `full_path`, to allow binary search.
             /// Since nodes with the same parent nodes also have the same parent path,
             /// only the `base_name`s need to be compared during binary search.
             #[derive(BytesCast, Copy, Clone)]
             #[repr(C)]
             struct ChildNodes {
                 start: Offset,
                 len: Size,
             }
             /// A `HgPath` of `len` bytes
             #[derive(BytesCast, Copy, Clone)]
             #[repr(C)]
             struct PathSlice {
                 start: Offset,
                 len: PathSize,
             }
             /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
             type OptPathSlice = PathSlice;
             /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
             ///
             /// This should only happen if Mercurial is buggy or a repository is corrupted.
             #[derive(Debug)]
             pub struct DirstateV2ParseError;
             impl From<DirstateV2ParseError> for HgError {
                 fn from(_: DirstateV2ParseError) -> Self {
                     HgError::corrupted("dirstate-v2 parse error")
                 }
             }
             impl From<DirstateV2ParseError> for crate::DirstateError {
                 fn from(error: DirstateV2ParseError) -> Self {
                     HgError::from(error).into()
                 }
             }
             impl<'on_disk> Docket<'on_disk> {
                 pub fn parents(&self) -> DirstateParents {
                     use crate::Node;
                     let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
                         .unwrap()
                         .clone();
                     let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
                         .unwrap()
                         .clone();
                     DirstateParents { p1, p2 }
                 }
                 pub fn tree_metadata(&self) -> &[u8] {
                     self.header.metadata.as_bytes()
                 }
                 pub fn data_size(&self) -> usize {
                     // This `unwrap` could only panic on a 16-bit CPU
                     self.header.data_size.get().try_into().unwrap()
                 }
                 pub fn data_filename(&self) -> String {
                     String::from_utf8(format_bytes!(b"dirstate.{}.d", self.uuid)).unwrap()
                 }
             }
             pub fn read_docket(
                 on_disk: &[u8],
             ) -> Result<Docket<'_>, DirstateV2ParseError> {
                 let (header, uuid) =
                     DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
                 let uuid_size = header.uuid_size as usize;
                 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
                     Ok(Docket { header, uuid })
                 } else {
                     Err(DirstateV2ParseError)
                 }
             }
             pub(super) fn read<'on_disk>(
                 on_disk: &'on_disk [u8],
                 metadata: &[u8],
             ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
                 if on_disk.is_empty() {
                     return Ok(DirstateMap::empty(on_disk));
                 }
                 let (meta, _) = TreeMetadata::from_bytes(metadata)
                     .map_err(|_| DirstateV2ParseError)?;
                 let dirstate_map = DirstateMap {
                     on_disk,
                     root: dirstate_map::ChildNodes::OnDisk(read_nodes(
                         on_disk,
                         meta.root_nodes,
                     )?),
                     nodes_with_entry_count: meta.nodes_with_entry_count.get(),
                     nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
                     ignore_patterns_hash: meta.ignore_patterns_hash,
                     unreachable_bytes: meta.unreachable_bytes.get(),
                 };
                 Ok(dirstate_map)
             }
             impl Node {
                 pub(super) fn full_path<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
                     read_hg_path(on_disk, self.full_path)
                 }
                 pub(super) fn base_name_start<'on_disk>(
                     &self,
                 ) -> Result<usize, DirstateV2ParseError> {
                     let start = self.base_name_start.get();
                     if start < self.full_path.len.get() {
                         let start = usize::try_from(start)
                             // u32 -> usize, could only panic on a 16-bit CPU
                             .expect("dirstate-v2 base_name_start out of bounds");
                         Ok(start)
                     } else {
                         Err(DirstateV2ParseError)
                     }
                 }
                 pub(super) fn base_name<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
                     let full_path = self.full_path(on_disk)?;
                     let base_name_start = self.base_name_start()?;
                     Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
                 }
                 pub(super) fn path<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
                     Ok(WithBasename::from_raw_parts(
                         Cow::Borrowed(self.full_path(on_disk)?),
                         self.base_name_start()?,
                     ))
                 }
                 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
                     self.copy_source.start.get() != 0
                 }
                 pub(super) fn copy_source<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
                     Ok(if self.has_copy_source() {
                         Some(read_hg_path(on_disk, self.copy_source)?)
                     } else {
                         None
                     })
                 }
                 pub(super) fn node_data(
                     &self,
                 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
                     let entry = |state| {
                         dirstate_map::NodeData::Entry(self.entry_with_given_state(state))
                     };
                     match self.state {
                         b'\0' => Ok(dirstate_map::NodeData::None),
                         b'd' => Ok(dirstate_map::NodeData::CachedDirectory {
                             mtime: *self.data.as_timestamp(),
                         }),
                         b'n' => Ok(entry(EntryState::Normal)),
                         b'a' => Ok(entry(EntryState::Added)),
                         b'r' => Ok(entry(EntryState::Removed)),
                         b'm' => Ok(entry(EntryState::Merged)),
                         _ => Err(DirstateV2ParseError),
                     }
                 }
                 pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {
                     if self.state == b'd' {
                         Some(self.data.as_timestamp())
                     } else {
                         None
                     }
                 }
                 pub(super) fn state(
                     &self,
                 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
                     match self.state {
                         b'\0' | b'd' => Ok(None),
                         b'n' => Ok(Some(EntryState::Normal)),
                         b'a' => Ok(Some(EntryState::Added)),
                         b'r' => Ok(Some(EntryState::Removed)),
                         b'm' => Ok(Some(EntryState::Merged)),
                         _ => Err(DirstateV2ParseError),
                     }
                 }
                 fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry {
                     DirstateEntry {
                         state,
                         mode: self.data.mode.get(),
                         mtime: self.data.mtime.get(),
                         size: self.data.size.get(),
                     }
                 }
                 pub(super) fn entry(
                     &self,
                 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
                     Ok(self
                         .state()?
                         .map(|state| self.entry_with_given_state(state)))
                 }
                 pub(super) fn children<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
                     read_nodes(on_disk, self.children)
                 }
                 pub(super) fn to_in_memory_node<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
                     Ok(dirstate_map::Node {
                         children: dirstate_map::ChildNodes::OnDisk(
                             self.children(on_disk)?,
                         ),
                         copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
                         data: self.node_data()?,
                         descendants_with_entry_count: self
                             .descendants_with_entry_count
                             .get(),
                         tracked_descendants_count: self.tracked_descendants_count.get(),
                     })
                 }
             }
             impl Entry {
                 fn from_timestamp(timestamp: Timestamp) -> Self {
                     // Safety: both types implement the `ByteCast` trait, so we could
                     // safely use `as_bytes` and `from_bytes` to do this conversion. Using
                     // `transmute` instead makes the compiler check that the two types
                     // have the same size, which eliminates the error case of
                     // `from_bytes`.
                     unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }
                 }
                 fn as_timestamp(&self) -> &Timestamp {
                     // Safety: same as above in `from_timestamp`
                     unsafe { &*(self as *const Entry as *const Timestamp) }
                 }
             }
             impl Timestamp {
                 pub fn seconds(&self) -> i64 {
                     self.seconds.get()
                 }
             }
             impl From<SystemTime> for Timestamp {
                 fn from(system_time: SystemTime) -> Self {
                     let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {
                         Ok(duration) => {
                             (duration.as_secs() as i64, duration.subsec_nanos())
                         }
                         Err(error) => {
                             let negative = error.duration();
                             (-(negative.as_secs() as i64), negative.subsec_nanos())
                         }
                     };
                     Timestamp {
                         seconds: secs.into(),
                         nanoseconds: nanos.into(),
                     }
                 }
             }
             impl From<&'_ Timestamp> for SystemTime {
                 fn from(timestamp: &'_ Timestamp) -> Self {
                     let secs = timestamp.seconds.get();
                     let nanos = timestamp.nanoseconds.get();
                     if secs >= 0 {
                         UNIX_EPOCH + Duration::new(secs as u64, nanos)
                     } else {
                         UNIX_EPOCH - Duration::new((-secs) as u64, nanos)
                     }
                 }
             }
             fn read_hg_path(
                 on_disk: &[u8],
                 slice: PathSlice,
             ) -> Result<&HgPath, DirstateV2ParseError> {
                 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
             }
             fn read_nodes(
                 on_disk: &[u8],
                 slice: ChildNodes,
             ) -> Result<&[Node], DirstateV2ParseError> {
                 read_slice(on_disk, slice.start, slice.len.get())
             }
             fn read_slice<T, Len>(
                 on_disk: &[u8],
                 start: Offset,
                 len: Len,
             ) -> Result<&[T], DirstateV2ParseError>
             where
                 T: BytesCast,
                 Len: TryInto<usize>,
             {
                 // Either `usize::MAX` would result in "out of bounds" error since a single
                 // `&[u8]` cannot occupy the entire addess space.
                 let start = start.get().try_into().unwrap_or(std::usize::MAX);
                 let len = len.try_into().unwrap_or(std::usize::MAX);
                 on_disk
                     .get(start..)
                     .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
                     .map(|(slice, _rest)| slice)
                     .ok_or_else(|| DirstateV2ParseError)
             }
             pub(crate) fn for_each_tracked_path<'on_disk>(
                 on_disk: &'on_disk [u8],
                 metadata: &[u8],
                 mut f: impl FnMut(&'on_disk HgPath),
             ) -> Result<(), DirstateV2ParseError> {
                 let (meta, _) = TreeMetadata::from_bytes(metadata)
                     .map_err(|_| DirstateV2ParseError)?;
                 fn recur<'on_disk>(
                     on_disk: &'on_disk [u8],
                     nodes: ChildNodes,
                     f: &mut impl FnMut(&'on_disk HgPath),
                 ) -> Result<(), DirstateV2ParseError> {
                     for node in read_nodes(on_disk, nodes)? {
                         if let Some(state) = node.state()? {
                             if state.is_tracked() {
                                 f(node.full_path(on_disk)?)
                             }
                         }
                         recur(on_disk, node.children, f)?
                     }
                     Ok(())
                 }
                 recur(on_disk, meta.root_nodes, &mut f)
             }
             /// Returns new data and metadata, together with whether that data should be
             /// appended to the existing data file whose content is at
             /// `dirstate_map.on_disk` (true), instead of written to a new data file
             /// (false).
             pub(super) fn write(
                 dirstate_map: &mut DirstateMap,
                 can_append: bool,
             ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
                 let append = can_append && dirstate_map.write_should_append();
                 // This ignores the space for paths, and for nodes without an entry.
                 // TODO: better estimate? Skip the `Vec` and write to a file directly?
                 let size_guess = std::mem::size_of::<Node>()
                     * dirstate_map.nodes_with_entry_count as usize;
                 let mut writer = Writer {
                     dirstate_map,
                     append,
                     out: Vec::with_capacity(size_guess),
                 };
                 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
                 let meta = TreeMetadata {
                     root_nodes,
                     nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
                     nodes_with_copy_source_count: dirstate_map
                         .nodes_with_copy_source_count
                         .into(),
                     unreachable_bytes: dirstate_map.unreachable_bytes.into(),
+                    unused: [0; 4],
                     ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
                 };
                 Ok((writer.out, meta.as_bytes().to_vec(), append))
             }
             struct Writer<'dmap, 'on_disk> {
                 dirstate_map: &'dmap DirstateMap<'on_disk>,
                 append: bool,
                 out: Vec<u8>,
             }
             impl Writer<'_, '_> {
                 fn write_nodes(
                     &mut self,
                     nodes: dirstate_map::ChildNodesRef,
                 ) -> Result<ChildNodes, DirstateError> {
                     // Reuse already-written nodes if possible
                     if self.append {
                         if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
                             let start = self.on_disk_offset_of(nodes_slice).expect(
                                 "dirstate-v2 OnDisk nodes not found within on_disk",
                             );
                             let len = child_nodes_len_from_usize(nodes_slice.len());
                             return Ok(ChildNodes { start, len });
                         }
                     }
                     // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
                     // undefined iteration order. Sort to enable binary search in the
                     // written file.
                     let nodes = nodes.sorted();
                     let nodes_len = nodes.len();
                     // First accumulate serialized nodes in a `Vec`
                     let mut on_disk_nodes = Vec::with_capacity(nodes_len);
                     for node in nodes {
                         let children =
                             self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
                         let full_path = node.full_path(self.dirstate_map.on_disk)?;
                         let full_path = self.write_path(full_path.as_bytes());
                         let copy_source = if let Some(source) =
                             node.copy_source(self.dirstate_map.on_disk)?
                         {
                             self.write_path(source.as_bytes())
                         } else {
                             PathSlice {
                                 start: 0.into(),
                                 len: 0.into(),
                             }
                         };
                         on_disk_nodes.push(match node {
                             NodeRef::InMemory(path, node) => {
                                 let (state, data) = match &node.data {
                                     dirstate_map::NodeData::Entry(entry) => (
                                         entry.state.into(),
                                         Entry {
                                             mode: entry.mode.into(),
                                             mtime: entry.mtime.into(),
                                             size: entry.size.into(),
                                         },
                                     ),
                                     dirstate_map::NodeData::CachedDirectory { mtime } => {
                                         (b'd', Entry::from_timestamp(*mtime))
                                     }
                                     dirstate_map::NodeData::None => (
                                         b'\0',
                                         Entry {
                                             mode: 0.into(),
                                             mtime: 0.into(),
                                             size: 0.into(),
                                         },
                                     ),
                                 };
                                 Node {
                                     children,
                                     copy_source,
                                     full_path,
                                     base_name_start: u16::try_from(path.base_name_start())
                                         // Could only panic for paths over 64 KiB
                                         .expect("dirstate-v2 path length overflow")
                                         .into(),
                                     descendants_with_entry_count: node
                                         .descendants_with_entry_count
                                         .into(),
                                     tracked_descendants_count: node
                                         .tracked_descendants_count
                                         .into(),
                                     state,
                                     data,
                                 }
                             }
                             NodeRef::OnDisk(node) => Node {
                                 children,
                                 copy_source,
                                 full_path,
                                 ..*node
                             },
                         })
                     }
                     // … so we can write them contiguously, after writing everything else
                     // they refer to.
                     let start = self.current_offset();
                     let len = child_nodes_len_from_usize(nodes_len);
                     self.out.extend(on_disk_nodes.as_bytes());
                     Ok(ChildNodes { start, len })
                 }
                 /// If the given slice of items is within `on_disk`, returns its offset
                 /// from the start of `on_disk`.
                 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
                 where
                     T: BytesCast,
                 {
                     fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
                         let start = slice.as_ptr() as usize;
                         let end = start + slice.len();
                         start..=end
                     }
                     let slice_addresses = address_range(slice.as_bytes());
                     let on_disk_addresses = address_range(self.dirstate_map.on_disk);
                     if on_disk_addresses.contains(slice_addresses.start())
                         && on_disk_addresses.contains(slice_addresses.end())
                     {
                         let offset = slice_addresses.start() - on_disk_addresses.start();
                         Some(offset_from_usize(offset))
                     } else {
                         None
                     }
                 }
                 fn current_offset(&mut self) -> Offset {
                     let mut offset = self.out.len();
                     if self.append {
                         offset += self.dirstate_map.on_disk.len()
                     }
                     offset_from_usize(offset)
                 }
                 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
                     let len = path_len_from_usize(slice.len());
                     // Reuse an already-written path if possible
                     if self.append {
                         if let Some(start) = self.on_disk_offset_of(slice) {
                             return PathSlice { start, len };
                         }
                     }
                     let start = self.current_offset();
                     self.out.extend(slice.as_bytes());
                     PathSlice { start, len }
                 }
             }
             fn offset_from_usize(x: usize) -> Offset {
                 u32::try_from(x)
                     // Could only panic for a dirstate file larger than 4 GiB
                     .expect("dirstate-v2 offset overflow")
                     .into()
             }
             fn child_nodes_len_from_usize(x: usize) -> Size {
                 u32::try_from(x)
                     // Could only panic with over 4 billion nodes
                     .expect("dirstate-v2 slice length overflow")
                     .into()
             }
             fn path_len_from_usize(x: usize) -> PathSize {
                 u16::try_from(x)
                     // Could only panic for paths over 64 KiB
                     .expect("dirstate-v2 path length overflow")
                     .into()
             }