upstream/mercurial-mirror Commit - r48477:da1c0cd6

1

//! The "version 2" disk representation of the dirstate

1

//! The "version 2" disk representation of the dirstate

2

//!

2

//!

3

//! # File format

3

//! # File format

4

//!

4

//!

5

//! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts

5

//! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts

6

//! with a fixed-sized header whose layout is defined by the `DocketHeader`

6

//! with a fixed-sized header whose layout is defined by the `DocketHeader`

7

//! struct, followed by the data file identifier.

7

//! struct, followed by the data file identifier.

8

//!

8

//!

9

//! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That

9

//! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That

10

//! file may be longer than the size given in the docket, but not shorter. Only

10

//! file may be longer than the size given in the docket, but not shorter. Only

11

//! the start of the data file up to the given size is considered. The

11

//! the start of the data file up to the given size is considered. The

12

//! fixed-size "root" of the dirstate tree whose layout is defined by the

12

//! fixed-size "root" of the dirstate tree whose layout is defined by the

13

//! `Root` struct is found at the end of that slice of data.

13

//! `Root` struct is found at the end of that slice of data.

14

//!

14

//!

15

//! Its `root_nodes` field contains the slice (offset and length) to

15

//! Its `root_nodes` field contains the slice (offset and length) to

16

//! the nodes representing the files and directories at the root of the

16

//! the nodes representing the files and directories at the root of the

17

//! repository. Each node is also fixed-size, defined by the `Node` struct.

17

//! repository. Each node is also fixed-size, defined by the `Node` struct.

18

//! Nodes in turn contain slices to variable-size paths, and to their own child

18

//! Nodes in turn contain slices to variable-size paths, and to their own child

19

//! nodes (if any) for nested files and directories.

19

//! nodes (if any) for nested files and directories.

20

21

use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};

21

use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};

22

use crate::dirstate_tree::path_with_basename::WithBasename;

22

use crate::dirstate_tree::path_with_basename::WithBasename;

23

use crate::errors::HgError;

23

use crate::errors::HgError;

24

use crate::utils::hg_path::HgPath;

24

use crate::utils::hg_path::HgPath;

25

use crate::DirstateEntry;

25

use crate::DirstateEntry;

26

use crate::DirstateError;

26

use crate::DirstateError;

27

use crate::DirstateParents;

27

use crate::DirstateParents;

28

use crate::EntryState;

28

use crate::EntryState;

29

use bytes_cast::unaligned::{I32Be, I64Be, U32Be};

29

use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be};

30

use bytes_cast::BytesCast;

30

use bytes_cast::BytesCast;

31

use format_bytes::format_bytes;

31

use format_bytes::format_bytes;

32

use std::borrow::Cow;

32

use std::borrow::Cow;

33

use std::convert::{TryFrom, TryInto};

33

use std::convert::{TryFrom, TryInto};

34

use std::time::{Duration, SystemTime, UNIX_EPOCH};

34

use std::time::{Duration, SystemTime, UNIX_EPOCH};

35

36

/// Added at the start of `.hg/dirstate` when the "v2" format is used.

36

/// Added at the start of `.hg/dirstate` when the "v2" format is used.

37

/// This a redundant sanity check more than an actual "magic number" since

37

/// This a redundant sanity check more than an actual "magic number" since

38

/// `.hg/requires` already governs which format should be used.

38

/// `.hg/requires` already governs which format should be used.

39

pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";

39

pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";

40

41

/// Keep space for 256-bit hashes

41

/// Keep space for 256-bit hashes

42

const STORED_NODE_ID_BYTES: usize = 32;

42

const STORED_NODE_ID_BYTES: usize = 32;

43

44

/// … even though only 160 bits are used for now, with SHA-1

44

/// … even though only 160 bits are used for now, with SHA-1

45

const USED_NODE_ID_BYTES: usize = 20;

45

const USED_NODE_ID_BYTES: usize = 20;

46

47

pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;

47

pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;

48

pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];

48

pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];

49

50

// Must match `HEADER` in `mercurial/dirstateutils/docket.py`

50

// Must match `HEADER` in `mercurial/dirstateutils/docket.py`

51

#[derive(BytesCast)]

51

#[derive(BytesCast)]

52

#[repr(C)]

52

#[repr(C)]

53

struct DocketHeader {

53

struct DocketHeader {

54

marker: [u8; V2_FORMAT_MARKER.len()],

54

marker: [u8; V2_FORMAT_MARKER.len()],

55

parent_1: [u8; STORED_NODE_ID_BYTES],

55

parent_1: [u8; STORED_NODE_ID_BYTES],

56

parent_2: [u8; STORED_NODE_ID_BYTES],

56

parent_2: [u8; STORED_NODE_ID_BYTES],

57

58

/// Counted in bytes

57

data_size: Size,

59

data_size: Size,

60

58

uuid_size: u8,

61

uuid_size: u8,

59

}

62

}

60

63

61

pub struct Docket<'on_disk> {

64

pub struct Docket<'on_disk> {

62

header: &'on_disk DocketHeader,

65

header: &'on_disk DocketHeader,

63

uuid: &'on_disk [u8],

66

uuid: &'on_disk [u8],

64

}

67

}

65

68

66

#[derive(BytesCast)]

69

#[derive(BytesCast)]

67

#[repr(C)]

70

#[repr(C)]

68

struct Root {

71

struct Root {

69

root_nodes: ChildNodes,

72

root_nodes: ChildNodes,

70

nodes_with_entry_count: Size,

73

nodes_with_entry_count: Size,

71

nodes_with_copy_source_count: Size,

74

nodes_with_copy_source_count: Size,

72

75

73

/// If non-zero, a hash of ignore files that were used for some previous

76

/// If non-zero, a hash of ignore files that were used for some previous

74

/// run of the `status` algorithm.

77

/// run of the `status` algorithm.

75

///

78

///

76

/// We define:

79

/// We define:

77

///

80

///

78

/// * "Root" ignore files are `.hgignore` at the root of the repository if

81

/// * "Root" ignore files are `.hgignore` at the root of the repository if

79

/// it exists, and files from `ui.ignore.*` config. This set of files is

82

/// it exists, and files from `ui.ignore.*` config. This set of files is

80

/// then sorted by the string representation of their path.

83

/// then sorted by the string representation of their path.

81

/// * The "expanded contents" of an ignore files is the byte string made

84

/// * The "expanded contents" of an ignore files is the byte string made

82

/// by concatenating its contents with the "expanded contents" of other

85

/// by concatenating its contents with the "expanded contents" of other

83

/// files included with `include:` or `subinclude:` files, in inclusion

86

/// files included with `include:` or `subinclude:` files, in inclusion

84

/// order. This definition is recursive, as included files can

87

/// order. This definition is recursive, as included files can

85

/// themselves include more files.

88

/// themselves include more files.

86

///

89

///

87

/// This hash is defined as the SHA-1 of the concatenation (in sorted

90

/// This hash is defined as the SHA-1 of the concatenation (in sorted

88

/// order) of the "expanded contents" of each "root" ignore file.

91

/// order) of the "expanded contents" of each "root" ignore file.

89

/// (Note that computing this does not require actually concatenating byte

92

/// (Note that computing this does not require actually concatenating byte

90

/// strings into contiguous memory, instead SHA-1 hashing can be done

93

/// strings into contiguous memory, instead SHA-1 hashing can be done

91

/// incrementally.)

94

/// incrementally.)

92

ignore_patterns_hash: IgnorePatternsHash,

95

ignore_patterns_hash: IgnorePatternsHash,

93

}

96

}

94

97

95

#[derive(BytesCast)]

98

#[derive(BytesCast)]

96

#[repr(C)]

99

#[repr(C)]

97

pub(super) struct Node {

100

pub(super) struct Node {

98

full_path: PathSlice,

101

full_path: PathSlice,

99

102

100

/// In bytes from `self.full_path.start`

103

/// In bytes from `self.full_path.start`

101

base_name_start: Size,

104

base_name_start: PathSize,

102

105

103

copy_source: OptPathSlice,

106

copy_source: OptPathSlice,

104

children: ChildNodes,

107

children: ChildNodes,

105

pub(super) descendants_with_entry_count: Size,

108

pub(super) descendants_with_entry_count: Size,

106

pub(super) tracked_descendants_count: Size,

109

pub(super) tracked_descendants_count: Size,

107

110

108

/// Depending on the value of `state`:

111

/// Depending on the value of `state`:

109

///

112

///

110

/// * A null byte: `data` is not used.

113

/// * A null byte: `data` is not used.

111

///

114

///

112

/// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together

115

/// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together

113

/// represent a dirstate entry like in the v1 format.

116

/// represent a dirstate entry like in the v1 format.

114

///

117

///

115

/// * A `d` ASCII byte: the bytes of `data` should instead be interpreted

118

/// * A `d` ASCII byte: the bytes of `data` should instead be interpreted

116

/// as the `Timestamp` for the mtime of a cached directory.

119

/// as the `Timestamp` for the mtime of a cached directory.

117

///

120

///

118

/// The presence of this state means that at some point, this path in

121

/// The presence of this state means that at some point, this path in

119

/// the working directory was observed:

122

/// the working directory was observed:

120

///

123

///

121

/// - To be a directory

124

/// - To be a directory

122

/// - With the modification time as given by `Timestamp`

125

/// - With the modification time as given by `Timestamp`

123

/// - That timestamp was already strictly in the past when observed,

126

/// - That timestamp was already strictly in the past when observed,

124

/// meaning that later changes cannot happen in the same clock tick

127

/// meaning that later changes cannot happen in the same clock tick

125

/// and must cause a different modification time (unless the system

128

/// and must cause a different modification time (unless the system

126

/// clock jumps back and we get unlucky, which is not impossible but

129

/// clock jumps back and we get unlucky, which is not impossible but

127

/// but deemed unlikely enough).

130

/// but deemed unlikely enough).

128

/// - All direct children of this directory (as returned by

131

/// - All direct children of this directory (as returned by

129

/// `std::fs::read_dir`) either have a corresponding dirstate node, or

132

/// `std::fs::read_dir`) either have a corresponding dirstate node, or

130

/// are ignored by ignore patterns whose hash is in

133

/// are ignored by ignore patterns whose hash is in

131

/// `Root::ignore_patterns_hash`.

134

/// `Root::ignore_patterns_hash`.

132

///

135

///

133

/// This means that if `std::fs::symlink_metadata` later reports the

136

/// This means that if `std::fs::symlink_metadata` later reports the

134

/// same modification time and ignored patterns haven’t changed, a run

137

/// same modification time and ignored patterns haven’t changed, a run

135

/// of status that is not listing ignored files can skip calling

138

/// of status that is not listing ignored files can skip calling

136

/// `std::fs::read_dir` again for this directory, iterate child

139

/// `std::fs::read_dir` again for this directory, iterate child

137

/// dirstate nodes instead.

140

/// dirstate nodes instead.

138

state: u8,

141

state: u8,

139

data: Entry,

142

data: Entry,

140

}

143

}

141

144

142

#[derive(BytesCast, Copy, Clone)]

145

#[derive(BytesCast, Copy, Clone)]

143

#[repr(C)]

146

#[repr(C)]

144

struct Entry {

147

struct Entry {

145

mode: I32Be,

148

mode: I32Be,

146

mtime: I32Be,

149

mtime: I32Be,

147

size: I32Be,

150

size: I32Be,

148

}

151

}

149

152

150

/// Duration since the Unix epoch

153

/// Duration since the Unix epoch

151

#[derive(BytesCast, Copy, Clone, PartialEq)]

154

#[derive(BytesCast, Copy, Clone, PartialEq)]

152

#[repr(C)]

155

#[repr(C)]

153

pub(super) struct Timestamp {

156

pub(super) struct Timestamp {

154

seconds: I64Be,

157

seconds: I64Be,

155

158

156

/// In `0 .. 1_000_000_000`.

159

/// In `0 .. 1_000_000_000`.

157

///

160

///

158

/// This timestamp is later or earlier than `(seconds, 0)` by this many

161

/// This timestamp is later or earlier than `(seconds, 0)` by this many

159

/// nanoseconds, if `seconds` is non-negative or negative, respectively.

162

/// nanoseconds, if `seconds` is non-negative or negative, respectively.

160

nanoseconds: U32Be,

163

nanoseconds: U32Be,

161

}

164

}

162

165

163

/// Counted in bytes from the start of the file

166

/// Counted in bytes from the start of the file

164

///

167

///

165

/// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.

168

/// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.

166

type Offset = U32Be;

169

type Offset = U32Be;

167

170

168

/// Counted in number of items

171

/// Counted in number of items

169

///

172

///

170

/// NOTE: not support~~ing directories with~~ more than 4 billion ~~direct children,~~

173

/// NOTE: we choose not to support counting more than 4 billion nodes anywhere.

171

/// or filenames more than 4 GiB.

172

type Size = U32Be;

174

type Size = U32Be;

173

175

174

/// Location of consecutive, fixed-size items.

176

/// Counted in bytes

175

///

177

///

176

/// An item can be a single byte for paths, or a struct with

178

/// NOTE: we choose not to support file names/paths longer than 64 KiB.

177

/// `derive(BytesCast)`.

179

type PathSize = U16Be;

178

#[derive(BytesCast, Copy, Clone)]

179

#[repr(C)]

180

struct Slice {

181

start: Offset,

182

len: Size,

183

}

184

180

185

/// A contiguous sequence of `len` times `Node`, representing the child nodes

181

/// A contiguous sequence of `len` times `Node`, representing the child nodes

186

/// of either some other node or of the repository root.

182

/// of either some other node or of the repository root.

187

///

183

///

188

/// Always sorted by ascending `full_path`, to allow binary search.

184

/// Always sorted by ascending `full_path`, to allow binary search.

189

/// Since nodes with the same parent nodes also have the same parent path,

185

/// Since nodes with the same parent nodes also have the same parent path,

190

/// only the `base_name`s need to be compared during binary search.

186

/// only the `base_name`s need to be compared during binary search.

191

type ChildNodes = Slice;

187

#[derive(BytesCast, Copy, Clone)]

188

#[repr(C)]

189

struct ChildNodes {

190

start: Offset,

191

len: Size,

192

}

192

193

/// A `HgPath` of `len` bytes

194

/// A `HgPath` of `len` bytes

194

type PathSlice = Slice;

195

#[derive(BytesCast, Copy, Clone)]

196

#[repr(C)]

197

struct PathSlice {

198

start: Offset,

199

len: PathSize,

200

}

195

201

196

/// Either nothing if `start == 0`, or a `HgPath` of `len` bytes

202

/// Either nothing if `start == 0`, or a `HgPath` of `len` bytes

197

type OptPathSlice = Slice;

203

type OptPathSlice = PathSlice;

198

204

199

/// Make sure that size-affecting changes are made knowingly

205

/// Make sure that size-affecting changes are made knowingly

200

fn _static_assert_size_of() {

206

fn _static_assert_size_of() {

201

let _ = std::mem::transmute::<DocketHeader, [u8; 81]>;

207

let _ = std::mem::transmute::<DocketHeader, [u8; 81]>;

202

let _ = std::mem::transmute::<Root, [u8; 36]>;

208

let _ = std::mem::transmute::<Root, [u8; 36]>;

203

let _ = std::mem::transmute::<Node, [u8; 49]>;

209

let _ = std::mem::transmute::<Node, [u8; 43]>;

204

}

210

}

205

211

206

/// Unexpected file format found in `.hg/dirstate` with the "v2" format.

212

/// Unexpected file format found in `.hg/dirstate` with the "v2" format.

207

///

213

///

208

/// This should only happen if Mercurial is buggy or a repository is corrupted.

214

/// This should only happen if Mercurial is buggy or a repository is corrupted.

209

#[derive(Debug)]

215

#[derive(Debug)]

210

pub struct DirstateV2ParseError;

216

pub struct DirstateV2ParseError;

211

217

212

impl From<DirstateV2ParseError> for HgError {

218

impl From<DirstateV2ParseError> for HgError {

213

fn from(_: DirstateV2ParseError) -> Self {

219

fn from(_: DirstateV2ParseError) -> Self {

214

HgError::corrupted("dirstate-v2 parse error")

220

HgError::corrupted("dirstate-v2 parse error")

215

}

221

}

216

}

222

}

217

223

218

impl From<DirstateV2ParseError> for crate::DirstateError {

224

impl From<DirstateV2ParseError> for crate::DirstateError {

219

fn from(error: DirstateV2ParseError) -> Self {

225

fn from(error: DirstateV2ParseError) -> Self {

220

HgError::from(error).into()

226

HgError::from(error).into()

221

}

227

}

222

}

228

}

223

229

224

impl<'on_disk> Docket<'on_disk> {

230

impl<'on_disk> Docket<'on_disk> {

225

pub fn parents(&self) -> DirstateParents {

231

pub fn parents(&self) -> DirstateParents {

226

use crate::Node;

232

use crate::Node;

227

let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])

233

let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])

228

.unwrap()

234

.unwrap()

229

.clone();

235

.clone();

230

let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])

236

let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])

231

.unwrap()

237

.unwrap()

232

.clone();

238

.clone();

233

DirstateParents { p1, p2 }

239

DirstateParents { p1, p2 }

234

}

240

}

235

241

236

pub fn data_size(&self) -> usize {

242

pub fn data_size(&self) -> usize {

237

// This `unwrap` could only panic on a 16-bit CPU

243

// This `unwrap` could only panic on a 16-bit CPU

238

self.header.data_size.get().try_into().unwrap()

244

self.header.data_size.get().try_into().unwrap()

239

}

245

}

240

246

241

pub fn data_filename(&self) -> String {

247

pub fn data_filename(&self) -> String {

242

String::from_utf8(format_bytes!(b"dirstate.{}.d", self.uuid)).unwrap()

248

String::from_utf8(format_bytes!(b"dirstate.{}.d", self.uuid)).unwrap()

243

}

249

}

244

}

250

}

245

251

246

pub fn read_docket(

252

pub fn read_docket(

247

on_disk: &[u8],

253

on_disk: &[u8],

248

) -> Result<Docket<'_>, DirstateV2ParseError> {

254

) -> Result<Docket<'_>, DirstateV2ParseError> {

249

let (header, uuid) =

255

let (header, uuid) =

250

DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;

256

DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;

251

let uuid_size = header.uuid_size as usize;

257

let uuid_size = header.uuid_size as usize;

252

if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {

258

if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {

253

Ok(Docket { header, uuid })

259

Ok(Docket { header, uuid })

254

} else {

260

} else {

255

Err(DirstateV2ParseError)

261

Err(DirstateV2ParseError)

256

}

262

}

257

}

263

}

258

264

259

fn read_root<'on_disk>(

265

fn read_root<'on_disk>(

260

on_disk: &'on_disk [u8],

266

on_disk: &'on_disk [u8],

261

) -> Result<&'on_disk Root, DirstateV2ParseError> {

267

) -> Result<&'on_disk Root, DirstateV2ParseError> {

262

// Find the `Root` at the end of the given slice

268

// Find the `Root` at the end of the given slice

263

let root_offset = on_disk

269

let root_offset = on_disk

264

.len()

270

.len()

265

.checked_sub(std::mem::size_of::<Root>())

271

.checked_sub(std::mem::size_of::<Root>())

266

// A non-empty slice too short is an error

272

// A non-empty slice too short is an error

267

.ok_or(DirstateV2ParseError)?;

273

.ok_or(DirstateV2ParseError)?;

268

let (root, _) = Root::from_bytes(&on_disk[root_offset..])

274

let (root, _) = Root::from_bytes(&on_disk[root_offset..])

269

.map_err(|_| DirstateV2ParseError)?;

275

.map_err(|_| DirstateV2ParseError)?;

270

Ok(root)

276

Ok(root)

271

}

277

}

272

278

273

pub(super) fn read<'on_disk>(

279

pub(super) fn read<'on_disk>(

274

on_disk: &'on_disk [u8],

280

on_disk: &'on_disk [u8],

275

) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {

281

) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {

276

if on_disk.is_empty() {

282

if on_disk.is_empty() {

277

return Ok(DirstateMap::empty(on_disk));

283

return Ok(DirstateMap::empty(on_disk));

278

}

284

}

279

let root = read_root(on_disk)?;

285

let root = read_root(on_disk)?;

280

let dirstate_map = DirstateMap {

286

let dirstate_map = DirstateMap {

281

on_disk,

287

on_disk,

282

root: dirstate_map::ChildNodes::OnDisk(read_~~slice~~::<~~Node~~>(

288

root: dirstate_map::ChildNodes::OnDisk(read_nodes(

283

on_disk,

289

on_disk,

284

root.root_nodes,

290

root.root_nodes,

285

)?),

291

)?),

286

nodes_with_entry_count: root.nodes_with_entry_count.get(),

292

nodes_with_entry_count: root.nodes_with_entry_count.get(),

287

nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(),

293

nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(),

288

ignore_patterns_hash: root.ignore_patterns_hash,

294

ignore_patterns_hash: root.ignore_patterns_hash,

289

};

295

};

290

Ok(dirstate_map)

296

Ok(dirstate_map)

291

}

297

}

292

298

293

impl Node {

299

impl Node {

294

pub(super) fn full_path<'on_disk>(

300

pub(super) fn full_path<'on_disk>(

295

&self,

301

&self,

296

on_disk: &'on_disk [u8],

302

on_disk: &'on_disk [u8],

297

) -> Result<&'on_disk HgPath, DirstateV2ParseError> {

303

) -> Result<&'on_disk HgPath, DirstateV2ParseError> {

298

read_hg_path(on_disk, self.full_path)

304

read_hg_path(on_disk, self.full_path)

299

}

305

}

300

306

301

pub(super) fn base_name_start<'on_disk>(

307

pub(super) fn base_name_start<'on_disk>(

302

&self,

308

&self,

303

) -> Result<usize, DirstateV2ParseError> {

309

) -> Result<usize, DirstateV2ParseError> {

304

let start = self.base_name_start.get();

310

let start = self.base_name_start.get();

305

if start < self.full_path.len.get() {

311

if start < self.full_path.len.get() {

306

let start = usize::try_from(start)

312

let start = usize::try_from(start)

307

// u32 -> usize, could only panic on a 16-bit CPU

313

// u32 -> usize, could only panic on a 16-bit CPU

308

.expect("dirstate-v2 base_name_start out of bounds");

314

.expect("dirstate-v2 base_name_start out of bounds");

309

Ok(start)

315

Ok(start)

310

} else {

316

} else {

311

Err(DirstateV2ParseError)

317

Err(DirstateV2ParseError)

312

}

318

}

313

}

319

}

314

320

315

pub(super) fn base_name<'on_disk>(

321

pub(super) fn base_name<'on_disk>(

316

&self,

322

&self,

317

on_disk: &'on_disk [u8],

323

on_disk: &'on_disk [u8],

318

) -> Result<&'on_disk HgPath, DirstateV2ParseError> {

324

) -> Result<&'on_disk HgPath, DirstateV2ParseError> {

319

let full_path = self.full_path(on_disk)?;

325

let full_path = self.full_path(on_disk)?;

320

let base_name_start = self.base_name_start()?;

326

let base_name_start = self.base_name_start()?;

321

Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))

327

Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))

322

}

328

}

323

329

324

pub(super) fn path<'on_disk>(

330

pub(super) fn path<'on_disk>(

325

&self,

331

&self,

326

on_disk: &'on_disk [u8],

332

on_disk: &'on_disk [u8],

327

) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {

333

) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {

328

Ok(WithBasename::from_raw_parts(

334

Ok(WithBasename::from_raw_parts(

329

Cow::Borrowed(self.full_path(on_disk)?),

335

Cow::Borrowed(self.full_path(on_disk)?),

330

self.base_name_start()?,

336

self.base_name_start()?,

331

))

337

))

332

}

338

}

333

339

334

pub(super) fn has_copy_source<'on_disk>(&self) -> bool {

340

pub(super) fn has_copy_source<'on_disk>(&self) -> bool {

335

self.copy_source.start.get() != 0

341

self.copy_source.start.get() != 0

336

}

342

}

337

343

338

pub(super) fn copy_source<'on_disk>(

344

pub(super) fn copy_source<'on_disk>(

339

&self,

345

&self,

340

on_disk: &'on_disk [u8],

346

on_disk: &'on_disk [u8],

341

) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {

347

) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {

342

Ok(if self.has_copy_source() {

348

Ok(if self.has_copy_source() {

343

Some(read_hg_path(on_disk, self.copy_source)?)

349

Some(read_hg_path(on_disk, self.copy_source)?)

344

} else {

350

} else {

345

None

351

None

346

})

352

})

347

}

353

}

348

354

349

pub(super) fn node_data(

355

pub(super) fn node_data(

350

&self,

356

&self,

351

) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {

357

) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {

352

let entry = |state| {

358

let entry = |state| {

353

dirstate_map::NodeData::Entry(self.entry_with_given_state(state))

359

dirstate_map::NodeData::Entry(self.entry_with_given_state(state))

354

};

360

};

355

361

356

match self.state {

362

match self.state {

357

b'\0' => Ok(dirstate_map::NodeData::None),

363

b'\0' => Ok(dirstate_map::NodeData::None),

358

b'd' => Ok(dirstate_map::NodeData::CachedDirectory {

364

b'd' => Ok(dirstate_map::NodeData::CachedDirectory {

359

mtime: *self.data.as_timestamp(),

365

mtime: *self.data.as_timestamp(),

360

}),

366

}),

361

b'n' => Ok(entry(EntryState::Normal)),

367

b'n' => Ok(entry(EntryState::Normal)),

362

b'a' => Ok(entry(EntryState::Added)),

368

b'a' => Ok(entry(EntryState::Added)),

363

b'r' => Ok(entry(EntryState::Removed)),

369

b'r' => Ok(entry(EntryState::Removed)),

364

b'm' => Ok(entry(EntryState::Merged)),

370

b'm' => Ok(entry(EntryState::Merged)),

365

_ => Err(DirstateV2ParseError),

371

_ => Err(DirstateV2ParseError),

366

}

372

}

367

}

373

}

368

374

369

pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {

375

pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {

370

if self.state == b'd' {

376

if self.state == b'd' {

371

Some(self.data.as_timestamp())

377

Some(self.data.as_timestamp())

372

} else {

378

} else {

373

None

379

None

374

}

380

}

375

}

381

}

376

382

377

pub(super) fn state(

383

pub(super) fn state(

378

&self,

384

&self,

379

) -> Result<Option<EntryState>, DirstateV2ParseError> {

385

) -> Result<Option<EntryState>, DirstateV2ParseError> {

380

match self.state {

386

match self.state {

381

b'\0' | b'd' => Ok(None),

387

b'\0' | b'd' => Ok(None),

382

b'n' => Ok(Some(EntryState::Normal)),

388

b'n' => Ok(Some(EntryState::Normal)),

383

b'a' => Ok(Some(EntryState::Added)),

389

b'a' => Ok(Some(EntryState::Added)),

384

b'r' => Ok(Some(EntryState::Removed)),

390

b'r' => Ok(Some(EntryState::Removed)),

385

b'm' => Ok(Some(EntryState::Merged)),

391

b'm' => Ok(Some(EntryState::Merged)),

386

_ => Err(DirstateV2ParseError),

392

_ => Err(DirstateV2ParseError),

387

}

393

}

388

}

394

}

389

395

390

fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry {

396

fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry {

391

DirstateEntry {

397

DirstateEntry {

392

state,

398

state,

393

mode: self.data.mode.get(),

399

mode: self.data.mode.get(),

394

mtime: self.data.mtime.get(),

400

mtime: self.data.mtime.get(),

395

size: self.data.size.get(),

401

size: self.data.size.get(),

396

}

402

}

397

}

403

}

398

404

399

pub(super) fn entry(

405

pub(super) fn entry(

400

&self,

406

&self,

401

) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {

407

) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {

402

Ok(self

408

Ok(self

403

.state()?

409

.state()?

404

.map(|state| self.entry_with_given_state(state)))

410

.map(|state| self.entry_with_given_state(state)))

405

}

411

}

406

412

407

pub(super) fn children<'on_disk>(

413

pub(super) fn children<'on_disk>(

408

&self,

414

&self,

409

on_disk: &'on_disk [u8],

415

on_disk: &'on_disk [u8],

410

) -> Result<&'on_disk [Node], DirstateV2ParseError> {

416

) -> Result<&'on_disk [Node], DirstateV2ParseError> {

411

read_~~slice~~::<~~Node~~>(on_disk, self.children)

417

read_nodes(on_disk, self.children)

412

}

418

}

413

419

414

pub(super) fn to_in_memory_node<'on_disk>(

420

pub(super) fn to_in_memory_node<'on_disk>(

415

&self,

421

&self,

416

on_disk: &'on_disk [u8],

422

on_disk: &'on_disk [u8],

417

) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {

423

) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {

418

Ok(dirstate_map::Node {

424

Ok(dirstate_map::Node {

419

children: dirstate_map::ChildNodes::OnDisk(

425

children: dirstate_map::ChildNodes::OnDisk(

420

self.children(on_disk)?,

426

self.children(on_disk)?,

421

),

427

),

422

copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),

428

copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),

423

data: self.node_data()?,

429

data: self.node_data()?,

424

descendants_with_entry_count: self

430

descendants_with_entry_count: self

425

.descendants_with_entry_count

431

.descendants_with_entry_count

426

.get(),

432

.get(),

427

tracked_descendants_count: self.tracked_descendants_count.get(),

433

tracked_descendants_count: self.tracked_descendants_count.get(),

428

})

434

})

429

}

435

}

430

}

436

}

431

437

432

impl Entry {

438

impl Entry {

433

fn from_timestamp(timestamp: Timestamp) -> Self {

439

fn from_timestamp(timestamp: Timestamp) -> Self {

434

// Safety: both types implement the `ByteCast` trait, so we could

440

// Safety: both types implement the `ByteCast` trait, so we could

435

// safely use `as_bytes` and `from_bytes` to do this conversion. Using

441

// safely use `as_bytes` and `from_bytes` to do this conversion. Using

436

// `transmute` instead makes the compiler check that the two types

442

// `transmute` instead makes the compiler check that the two types

437

// have the same size, which eliminates the error case of

443

// have the same size, which eliminates the error case of

438

// `from_bytes`.

444

// `from_bytes`.

439

unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }

445

unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }

440

}

446

}

441

447

442

fn as_timestamp(&self) -> &Timestamp {

448

fn as_timestamp(&self) -> &Timestamp {

443

// Safety: same as above in `from_timestamp`

449

// Safety: same as above in `from_timestamp`

444

unsafe { &*(self as *const Entry as *const Timestamp) }

450

unsafe { &*(self as *const Entry as *const Timestamp) }

445

}

451

}

446

}

452

}

447

453

448

impl Timestamp {

454

impl Timestamp {

449

pub fn seconds(&self) -> i64 {

455

pub fn seconds(&self) -> i64 {

450

self.seconds.get()

456

self.seconds.get()

451

}

457

}

452

}

458

}

453

459

454

impl From<SystemTime> for Timestamp {

460

impl From<SystemTime> for Timestamp {

455

fn from(system_time: SystemTime) -> Self {

461

fn from(system_time: SystemTime) -> Self {

456

let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {

462

let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {

457

Ok(duration) => {

463

Ok(duration) => {

458

(duration.as_secs() as i64, duration.subsec_nanos())

464

(duration.as_secs() as i64, duration.subsec_nanos())

459

}

465

}

460

Err(error) => {

466

Err(error) => {

461

let negative = error.duration();

467

let negative = error.duration();

462

(-(negative.as_secs() as i64), negative.subsec_nanos())

468

(-(negative.as_secs() as i64), negative.subsec_nanos())

463

}

469

}

464

};

470

};

465

Timestamp {

471

Timestamp {

466

seconds: secs.into(),

472

seconds: secs.into(),

467

nanoseconds: nanos.into(),

473

nanoseconds: nanos.into(),

468

}

474

}

469

}

475

}

470

}

476

}

471

477

472

impl From<&'_ Timestamp> for SystemTime {

478

impl From<&'_ Timestamp> for SystemTime {

473

fn from(timestamp: &'_ Timestamp) -> Self {

479

fn from(timestamp: &'_ Timestamp) -> Self {

474

let secs = timestamp.seconds.get();

480

let secs = timestamp.seconds.get();

475

let nanos = timestamp.nanoseconds.get();

481

let nanos = timestamp.nanoseconds.get();

476

if secs >= 0 {

482

if secs >= 0 {

477

UNIX_EPOCH + Duration::new(secs as u64, nanos)

483

UNIX_EPOCH + Duration::new(secs as u64, nanos)

478

} else {

484

} else {

479

UNIX_EPOCH - Duration::new((-secs) as u64, nanos)

485

UNIX_EPOCH - Duration::new((-secs) as u64, nanos)

480

}

486

}

481

}

487

}

482

}

488

}

483

489

484

fn read_hg_path(

490

fn read_hg_path(

485

on_disk: &[u8],

491

on_disk: &[u8],

486

slice: Slice,

492

slice: PathSlice,

487

) -> Result<&HgPath, DirstateV2ParseError> {

493

) -> Result<&HgPath, DirstateV2ParseError> {

488

let bytes = read_slice::<u8>(on_disk, slice)?;

494

read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)

489

Ok(HgPath::new(bytes))

490

}

495

}

491

496

492

fn read_s~~lice~~<T>(

497

fn read_nodes(

493

on_disk: &[u8],

498

on_disk: &[u8],

494

slice: ~~Slice~~,

499

slice: ChildNodes,

500

) -> Result<&[Node], DirstateV2ParseError> {

501

read_slice(on_disk, slice.start, slice.len.get())

502

}

503

504

fn read_slice<T, Len>(

505

on_disk: &[u8],

506

start: Offset,

507

len: Len,

495

) -> Result<&[T], DirstateV2ParseError>

508

) -> Result<&[T], DirstateV2ParseError>

496

where

509

where

497

T: BytesCast,

510

T: BytesCast,

511

Len: TryInto<usize>,

498

{

512

{

499

// Either `usize::MAX` would result in "out of bounds" error since a single

513

// Either `usize::MAX` would result in "out of bounds" error since a single

500

// `&[u8]` cannot occupy the entire addess space.

514

// `&[u8]` cannot occupy the entire addess space.

501

let start = ~~usize~~::~~try_from~~(~~slice~~.start.get()).unwrap_or(std::usize::MAX);

515

let start = start.get().try_into().unwrap_or(std::usize::MAX);

502

let len = ~~usize~~::~~try_from~~(~~slice~~.len.~~get~~()).unwrap_or(std::usize::MAX);

516

let len = len.try_into().unwrap_or(std::usize::MAX);

503

on_disk

517

on_disk

504

.get(start..)

518

.get(start..)

505

.and_then(|bytes| T::slice_from_bytes(bytes, len).ok())

519

.and_then(|bytes| T::slice_from_bytes(bytes, len).ok())

506

.map(|(slice, _rest)| slice)

520

.map(|(slice, _rest)| slice)

507

.ok_or_else(|| DirstateV2ParseError)

521

.ok_or_else(|| DirstateV2ParseError)

508

}

522

}

509

523

510

pub(crate) fn for_each_tracked_path<'on_disk>(

524

pub(crate) fn for_each_tracked_path<'on_disk>(

511

on_disk: &'on_disk [u8],

525

on_disk: &'on_disk [u8],

512

mut f: impl FnMut(&'on_disk HgPath),

526

mut f: impl FnMut(&'on_disk HgPath),

513

) -> Result<(), DirstateV2ParseError> {

527

) -> Result<(), DirstateV2ParseError> {

514

let root = read_root(on_disk)?;

528

let root = read_root(on_disk)?;

515

fn recur<'on_disk>(

529

fn recur<'on_disk>(

516

on_disk: &'on_disk [u8],

530

on_disk: &'on_disk [u8],

517

nodes: ~~Slice~~,

531

nodes: ChildNodes,

518

f: &mut impl FnMut(&'on_disk HgPath),

532

f: &mut impl FnMut(&'on_disk HgPath),

519

) -> Result<(), DirstateV2ParseError> {

533

) -> Result<(), DirstateV2ParseError> {

520

for node in read_~~slice~~::<~~Node~~>(on_disk, nodes)? {

534

for node in read_nodes(on_disk, nodes)? {

521

if let Some(state) = node.state()? {

535

if let Some(state) = node.state()? {

522

if state.is_tracked() {

536

if state.is_tracked() {

523

f(node.full_path(on_disk)?)

537

f(node.full_path(on_disk)?)

524

}

538

}

525

}

539

}

526

recur(on_disk, node.children, f)?

540

recur(on_disk, node.children, f)?

527

}

541

}

528

Ok(())

542

Ok(())

529

}

543

}

530

recur(on_disk, root.root_nodes, &mut f)

544

recur(on_disk, root.root_nodes, &mut f)

531

}

545

}

532

546

533

pub(super) fn write(

547

pub(super) fn write(

534

dirstate_map: &mut DirstateMap,

548

dirstate_map: &mut DirstateMap,

535

) -> Result<Vec<u8>, DirstateError> {

549

) -> Result<Vec<u8>, DirstateError> {

536

let root_len = std::mem::size_of::<Root>();

550

let root_len = std::mem::size_of::<Root>();

537

551

538

// This ignores the space for paths, and for nodes without an entry.

552

// This ignores the space for paths, and for nodes without an entry.

539

// TODO: better estimate? Skip the `Vec` and write to a file directly?

553

// TODO: better estimate? Skip the `Vec` and write to a file directly?

540

let size_guess = root_len

554

let size_guess = root_len

541

+ std::mem::size_of::<Node>()

555

+ std::mem::size_of::<Node>()

542

* dirstate_map.nodes_with_entry_count as usize;

556

* dirstate_map.nodes_with_entry_count as usize;

543

let mut out = Vec::with_capacity(size_guess);

557

let mut out = Vec::with_capacity(size_guess);

544

558

545

let root_nodes =

559

let root_nodes =

546

write_nodes(dirstate_map, dirstate_map.root.as_ref(), &mut out)?;

560

write_nodes(dirstate_map, dirstate_map.root.as_ref(), &mut out)?;

547

561

548

let root = Root {

562

let root = Root {

549

root_nodes,

563

root_nodes,

550

nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),

564

nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),

551

nodes_with_copy_source_count: dirstate_map

565

nodes_with_copy_source_count: dirstate_map

552

.nodes_with_copy_source_count

566

.nodes_with_copy_source_count

553

.into(),

567

.into(),

554

ignore_patterns_hash: dirstate_map.ignore_patterns_hash,

568

ignore_patterns_hash: dirstate_map.ignore_patterns_hash,

555

};

569

};

556

out.extend(root.as_bytes());

570

out.extend(root.as_bytes());

557

Ok(out)

571

Ok(out)

558

}

572

}

559

573

560

fn write_nodes(

574

fn write_nodes(

561

dirstate_map: &DirstateMap,

575

dirstate_map: &DirstateMap,

562

nodes: dirstate_map::ChildNodesRef,

576

nodes: dirstate_map::ChildNodesRef,

563

out: &mut Vec<u8>,

577

out: &mut Vec<u8>,

564

) -> Result<ChildNodes, DirstateError> {

578

) -> Result<ChildNodes, DirstateError> {

565

// `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration

579

// `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration

566

// order. Sort to enable binary search in the written file.

580

// order. Sort to enable binary search in the written file.

567

let nodes = nodes.sorted();

581

let nodes = nodes.sorted();

582

let nodes_len = nodes.len();

568

583

569

// First accumulate serialized nodes in a `Vec`

584

// First accumulate serialized nodes in a `Vec`

570

let mut on_disk_nodes = Vec::with_capacity(nodes.~~len~~());

585

let mut on_disk_nodes = Vec::with_capacity(nodes_len);

571

for node in nodes {

586

for node in nodes {

572

let children = write_nodes(

587

let children = write_nodes(

573

dirstate_map,

588

dirstate_map,

574

node.children(dirstate_map.on_disk)?,

589

node.children(dirstate_map.on_disk)?,

575

out,

590

out,

576

)?;

591

)?;

577

let full_path = node.full_path(dirstate_map.on_disk)?;

592

let full_path = node.full_path(dirstate_map.on_disk)?;

578

let full_path = write_~~slice~~::<u8>(full_path.as_bytes(), out);

593

let full_path = write_path(full_path.as_bytes(), out);

579

let copy_source =

594

let copy_source =

580

if let Some(source) = node.copy_source(dirstate_map.on_disk)? {

595

if let Some(source) = node.copy_source(dirstate_map.on_disk)? {

581

write_~~slice~~::<u8>(source.as_bytes(), out)

596

write_path(source.as_bytes(), out)

582

} else {

597

} else {

583

Slice {

598

PathSlice {

584

start: 0.into(),

599

start: 0.into(),

585

len: 0.into(),

600

len: 0.into(),

586

}

601

}

587

};

602

};

588

on_disk_nodes.push(match node {

603

on_disk_nodes.push(match node {

589

NodeRef::InMemory(path, node) => {

604

NodeRef::InMemory(path, node) => {

590

let (state, data) = match &node.data {

605

let (state, data) = match &node.data {

591

dirstate_map::NodeData::Entry(entry) => (

606

dirstate_map::NodeData::Entry(entry) => (

592

entry.state.into(),

607

entry.state.into(),

593

Entry {

608

Entry {

594

mode: entry.mode.into(),

609

mode: entry.mode.into(),

595

mtime: entry.mtime.into(),

610

mtime: entry.mtime.into(),

596

size: entry.size.into(),

611

size: entry.size.into(),

597

},

612

},

598

),

613

),

599

dirstate_map::NodeData::CachedDirectory { mtime } => {

614

dirstate_map::NodeData::CachedDirectory { mtime } => {

600

(b'd', Entry::from_timestamp(*mtime))

615

(b'd', Entry::from_timestamp(*mtime))

601

}

616

}

602

dirstate_map::NodeData::None => (

617

dirstate_map::NodeData::None => (

603

b'\0',

618

b'\0',

604

Entry {

619

Entry {

605

mode: 0.into(),

620

mode: 0.into(),

606

mtime: 0.into(),

621

mtime: 0.into(),

607

size: 0.into(),

622

size: 0.into(),

608

},

623

},

609

),

624

),

610

};

625

};

611

Node {

626

Node {

612

children,

627

children,

613

copy_source,

628

copy_source,

614

full_path,

629

full_path,

615

base_name_start: u32::try_from(path.base_name_start())

630

base_name_start: u16::try_from(path.base_name_start())

616

// Could only panic for paths over ~~4 G~~iB

631

// Could only panic for paths over 64 KiB

617

.expect("dirstate-v2 ~~offset~~ overflow")

632

.expect("dirstate-v2 path length overflow")

618

.into(),

633

.into(),

619

descendants_with_entry_count: node

634

descendants_with_entry_count: node

620

.descendants_with_entry_count

635

.descendants_with_entry_count

621

.into(),

636

.into(),

622

tracked_descendants_count: node

637

tracked_descendants_count: node

623

.tracked_descendants_count

638

.tracked_descendants_count

624

.into(),

639

.into(),

625

state,

640

state,

626

data,

641

data,

627

}

642

}

628

}

643

}

629

NodeRef::OnDisk(node) => Node {

644

NodeRef::OnDisk(node) => Node {

630

children,

645

children,

631

copy_source,

646

copy_source,

632

full_path,

647

full_path,

633

..*node

648

..*node

634

},

649

},

635

})

650

})

636

}

651

}

637

// … so we can write them contiguously

652

// … so we can write them contiguously, after writing everything else they

638

Ok(write_slice::<Node>(&on_disk_nodes, out))

653

// refer to.

654

let start = current_offset(out);

655

let len = u32::try_from(nodes_len)

656

// Could only panic with over 4 billion nodes

657

.expect("dirstate-v2 path length overflow")

658

.into();

659

out.extend(on_disk_nodes.as_bytes());

660

Ok(ChildNodes { start, len })

639

}

661

}

640

662

641

fn write_slice<T>(slice: &[T], out: &mut Vec<u8>) -> Slice

663

fn current_offset(out: &Vec<u8>) -> Offset {

642

where

664

u32::try_from(out.len())

643

T: BytesCast,

644

{

645

let start = u32::try_from(out.len())

646

// Could only panic for a dirstate file larger than 4 GiB

665

// Could only panic for a dirstate file larger than 4 GiB

647

.expect("dirstate-v2 offset overflow")

666

.expect("dirstate-v2 offset overflow")

648

.into();

667

.into()

649

let len = u32::try_from(slice.len())

668

}

650

// Could only panic for paths over 4 GiB or nodes with over 4 billions

669

651

// child nodes

670

fn write_path(slice: &[u8], out: &mut Vec<u8>) -> PathSlice {

652

.expect("dirstate-v2 offset overflow")

671

let start = current_offset(out);

672

let len = u16::try_from(slice.len())

673

// Could only panic for paths over 64 KiB

674

.expect("dirstate-v2 path length overflow")

653

.into();

675

.into();

654

out.extend(slice.as_bytes());

676

out.extend(slice.as_bytes());

655

Slice { start, len }

677

PathSlice { start, len }

656

}

678

}

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             //! The "version 2" disk representation of the dirstate
             //!
             //! # File format
             //!
             //! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts
             //! with a fixed-sized header whose layout is defined by the `DocketHeader`
             //! struct, followed by the data file identifier.
             //!
             //! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That
             //! file may be longer than the size given in the docket, but not shorter. Only
             //! the start of the data file up to the given size is considered. The
             //! fixed-size "root" of the dirstate tree whose layout is defined by the
             //! `Root` struct is found at the end of that slice of data.
             //!
             //! Its `root_nodes` field contains the slice (offset and length) to
             //! the nodes representing the files and directories at the root of the
             //! repository. Each node is also fixed-size, defined by the `Node` struct.
             //! Nodes in turn contain slices to variable-size paths, and to their own child
             //! nodes (if any) for nested files and directories.
             use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
             use crate::dirstate_tree::path_with_basename::WithBasename;
             use crate::errors::HgError;
             use crate::utils::hg_path::HgPath;
             use crate::DirstateEntry;
             use crate::DirstateError;
             use crate::DirstateParents;
             use crate::EntryState;
-            use bytes_cast::unaligned::{I32Be, I64Be, U32Be};
+            use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be};
             use bytes_cast::BytesCast;
             use format_bytes::format_bytes;
             use std::borrow::Cow;
             use std::convert::{TryFrom, TryInto};
             use std::time::{Duration, SystemTime, UNIX_EPOCH};
             /// Added at the start of `.hg/dirstate` when the "v2" format is used.
             /// This a redundant sanity check more than an actual "magic number" since
             /// `.hg/requires` already governs which format should be used.
             pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
             /// Keep space for 256-bit hashes
             const STORED_NODE_ID_BYTES: usize = 32;
             /// … even though only 160 bits are used for now, with SHA-1
             const USED_NODE_ID_BYTES: usize = 20;
             pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
             pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
             // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
             #[derive(BytesCast)]
             #[repr(C)]
             struct DocketHeader {
                 marker: [u8; V2_FORMAT_MARKER.len()],
                 parent_1: [u8; STORED_NODE_ID_BYTES],
                 parent_2: [u8; STORED_NODE_ID_BYTES],
+                /// Counted in bytes
                 data_size: Size,
                 uuid_size: u8,
             }
             pub struct Docket<'on_disk> {
                 header: &'on_disk DocketHeader,
                 uuid: &'on_disk [u8],
             }
             #[derive(BytesCast)]
             #[repr(C)]
             struct Root {
                 root_nodes: ChildNodes,
                 nodes_with_entry_count: Size,
                 nodes_with_copy_source_count: Size,
                 /// If non-zero, a hash of ignore files that were used for some previous
                 /// run of the `status` algorithm.
                 ///
                 /// We define:
                 ///
                 /// * "Root" ignore files are `.hgignore` at the root of the repository if
                 ///   it exists, and files from `ui.ignore.*` config. This set of files is
                 ///   then sorted by the string representation of their path.
                 /// * The "expanded contents" of an ignore files is the byte string made
                 ///   by concatenating its contents with the "expanded contents" of other
                 ///   files included with `include:` or `subinclude:` files, in inclusion
                 ///   order. This definition is recursive, as included files can
                 ///   themselves include more files.
                 ///
                 /// This hash is defined as the SHA-1 of the concatenation (in sorted
                 /// order) of the "expanded contents" of each "root" ignore file.
                 /// (Note that computing this does not require actually concatenating byte
                 /// strings into contiguous memory, instead SHA-1 hashing can be done
                 /// incrementally.)
                 ignore_patterns_hash: IgnorePatternsHash,
             }
             #[derive(BytesCast)]
             #[repr(C)]
             pub(super) struct Node {
                 full_path: PathSlice,
                 /// In bytes from `self.full_path.start`
-                base_name_start: Size,
+                base_name_start: PathSize,
                 copy_source: OptPathSlice,
                 children: ChildNodes,
                 pub(super) descendants_with_entry_count: Size,
                 pub(super) tracked_descendants_count: Size,
                 /// Depending on the value of `state`:
                 ///
                 /// * A null byte: `data` is not used.
                 ///
                 /// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together
                 ///   represent a dirstate entry like in the v1 format.
                 ///
                 /// * A `d` ASCII byte: the bytes of `data` should instead be interpreted
                 ///   as the `Timestamp` for the mtime of a cached directory.
                 ///
                 ///   The presence of this state means that at some point, this path in
                 ///   the working directory was observed:
                 ///
                 ///   - To be a directory
                 ///   - With the modification time as given by `Timestamp`
                 ///   - That timestamp was already strictly in the past when observed,
                 ///     meaning that later changes cannot happen in the same clock tick
                 ///     and must cause a different modification time (unless the system
                 ///     clock jumps back and we get unlucky, which is not impossible but
                 ///     but deemed unlikely enough).
                 ///   - All direct children of this directory (as returned by
                 ///     `std::fs::read_dir`) either have a corresponding dirstate node, or
                 ///     are ignored by ignore patterns whose hash is in
                 ///     `Root::ignore_patterns_hash`.
                 ///
                 ///   This means that if `std::fs::symlink_metadata` later reports the
                 ///   same modification time and ignored patterns haven’t changed, a run
                 ///   of status that is not listing ignored   files can skip calling
                 ///   `std::fs::read_dir` again for this directory,   iterate child
                 ///   dirstate nodes instead.
                 state: u8,
                 data: Entry,
             }
             #[derive(BytesCast, Copy, Clone)]
             #[repr(C)]
             struct Entry {
                 mode: I32Be,
                 mtime: I32Be,
                 size: I32Be,
             }
             /// Duration since the Unix epoch
             #[derive(BytesCast, Copy, Clone, PartialEq)]
             #[repr(C)]
             pub(super) struct Timestamp {
                 seconds: I64Be,
                 /// In `0 .. 1_000_000_000`.
                 ///
                 /// This timestamp is later or earlier than `(seconds, 0)` by this many
                 /// nanoseconds, if `seconds` is non-negative or negative, respectively.
                 nanoseconds: U32Be,
             }
             /// Counted in bytes from the start of the file
             ///
             /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
             type Offset = U32Be;
             /// Counted in number of items
             ///
-            /// NOTE: not supporting directories with more than 4 billion direct children,
+            /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
-            /// or filenames more than 4 GiB.
             type Size = U32Be;
-            /// Location of consecutive, fixed-size items.
+            /// Counted in bytes
             ///
-            /// An item can be a single byte for paths, or a struct with
+            /// NOTE: we choose not to support file names/paths longer than 64 KiB.
-            /// `derive(BytesCast)`.
+            type PathSize = U16Be;
-            #[derive(BytesCast, Copy, Clone)]
-            #[repr(C)]
-            struct Slice {
-                start: Offset,
-                len: Size,
             /// A contiguous sequence of `len` times `Node`, representing the child nodes
             /// of either some other node or of the repository root.
             ///
             /// Always sorted by ascending `full_path`, to allow binary search.
             /// Since nodes with the same parent nodes also have the same parent path,
             /// only the `base_name`s need to be compared during binary search.
-            type ChildNodes = Slice;
+            #[derive(BytesCast, Copy, Clone)]
+            #[repr(C)]
+            struct ChildNodes {
+                start: Offset,
+                len: Size,
+            }
             /// A `HgPath` of `len` bytes
-            type PathSlice = Slice;
+            #[derive(BytesCast, Copy, Clone)]
+            #[repr(C)]
+            struct PathSlice {
+                start: Offset,
+                len: PathSize,
+            }
             /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
-            type OptPathSlice = Slice;
+            type OptPathSlice = PathSlice;
             /// Make sure that size-affecting changes are made knowingly
             fn _static_assert_size_of() {
                 let _ = std::mem::transmute::<DocketHeader, [u8; 81]>;
                 let _ = std::mem::transmute::<Root, [u8; 36]>;
-                let _ = std::mem::transmute::<Node, [u8; 49]>;
+                let _ = std::mem::transmute::<Node, [u8; 43]>;
             }
             /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
             ///
             /// This should only happen if Mercurial is buggy or a repository is corrupted.
             #[derive(Debug)]
             pub struct DirstateV2ParseError;
             impl From<DirstateV2ParseError> for HgError {
                 fn from(_: DirstateV2ParseError) -> Self {
                     HgError::corrupted("dirstate-v2 parse error")
                 }
             }
             impl From<DirstateV2ParseError> for crate::DirstateError {
                 fn from(error: DirstateV2ParseError) -> Self {
                     HgError::from(error).into()
                 }
             }
             impl<'on_disk> Docket<'on_disk> {
                 pub fn parents(&self) -> DirstateParents {
                     use crate::Node;
                     let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
                         .unwrap()
                         .clone();
                     let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
                         .unwrap()
                         .clone();
                     DirstateParents { p1, p2 }
                 }
                 pub fn data_size(&self) -> usize {
                     // This `unwrap` could only panic on a 16-bit CPU
                     self.header.data_size.get().try_into().unwrap()
                 }
                 pub fn data_filename(&self) -> String {
                     String::from_utf8(format_bytes!(b"dirstate.{}.d", self.uuid)).unwrap()
                 }
             }
             pub fn read_docket(
                 on_disk: &[u8],
             ) -> Result<Docket<'_>, DirstateV2ParseError> {
                 let (header, uuid) =
                     DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
                 let uuid_size = header.uuid_size as usize;
                 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
                     Ok(Docket { header, uuid })
                 } else {
                     Err(DirstateV2ParseError)
                 }
             }
             fn read_root<'on_disk>(
                 on_disk: &'on_disk [u8],
             ) -> Result<&'on_disk Root, DirstateV2ParseError> {
                 // Find the `Root` at the end of the given slice
                 let root_offset = on_disk
                     .len()
                     .checked_sub(std::mem::size_of::<Root>())
                     // A non-empty slice too short is an error
                     .ok_or(DirstateV2ParseError)?;
                 let (root, _) = Root::from_bytes(&on_disk[root_offset..])
                     .map_err(|_| DirstateV2ParseError)?;
                 Ok(root)
             }
             pub(super) fn read<'on_disk>(
                 on_disk: &'on_disk [u8],
             ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
                 if on_disk.is_empty() {
                     return Ok(DirstateMap::empty(on_disk));
                 }
                 let root = read_root(on_disk)?;
                 let dirstate_map = DirstateMap {
                     on_disk,
-                    root: dirstate_map::ChildNodes::OnDisk(read_slice::<Node>(
+                    root: dirstate_map::ChildNodes::OnDisk(read_nodes(
                         on_disk,
                         root.root_nodes,
                     )?),
                     nodes_with_entry_count: root.nodes_with_entry_count.get(),
                     nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(),
                     ignore_patterns_hash: root.ignore_patterns_hash,
                 };
                 Ok(dirstate_map)
             }
             impl Node {
                 pub(super) fn full_path<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
                     read_hg_path(on_disk, self.full_path)
                 }
                 pub(super) fn base_name_start<'on_disk>(
                     &self,
                 ) -> Result<usize, DirstateV2ParseError> {
                     let start = self.base_name_start.get();
                     if start < self.full_path.len.get() {
                         let start = usize::try_from(start)
                             // u32 -> usize, could only panic on a 16-bit CPU
                             .expect("dirstate-v2 base_name_start out of bounds");
                         Ok(start)
                     } else {
                         Err(DirstateV2ParseError)
                     }
                 }
                 pub(super) fn base_name<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
                     let full_path = self.full_path(on_disk)?;
                     let base_name_start = self.base_name_start()?;
                     Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
                 }
                 pub(super) fn path<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
                     Ok(WithBasename::from_raw_parts(
                         Cow::Borrowed(self.full_path(on_disk)?),
                         self.base_name_start()?,
                     ))
                 }
                 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
                     self.copy_source.start.get() != 0
                 }
                 pub(super) fn copy_source<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
                     Ok(if self.has_copy_source() {
                         Some(read_hg_path(on_disk, self.copy_source)?)
                     } else {
                         None
                     })
                 }
                 pub(super) fn node_data(
                     &self,
                 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
                     let entry = |state| {
                         dirstate_map::NodeData::Entry(self.entry_with_given_state(state))
                     };
                     match self.state {
                         b'\0' => Ok(dirstate_map::NodeData::None),
                         b'd' => Ok(dirstate_map::NodeData::CachedDirectory {
                             mtime: *self.data.as_timestamp(),
                         }),
                         b'n' => Ok(entry(EntryState::Normal)),
                         b'a' => Ok(entry(EntryState::Added)),
                         b'r' => Ok(entry(EntryState::Removed)),
                         b'm' => Ok(entry(EntryState::Merged)),
                         _ => Err(DirstateV2ParseError),
                     }
                 }
                 pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {
                     if self.state == b'd' {
                         Some(self.data.as_timestamp())
                     } else {
                         None
                     }
                 }
                 pub(super) fn state(
                     &self,
                 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
                     match self.state {
                         b'\0' | b'd' => Ok(None),
                         b'n' => Ok(Some(EntryState::Normal)),
                         b'a' => Ok(Some(EntryState::Added)),
                         b'r' => Ok(Some(EntryState::Removed)),
                         b'm' => Ok(Some(EntryState::Merged)),
                         _ => Err(DirstateV2ParseError),
                     }
                 }
                 fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry {
                     DirstateEntry {
                         state,
                         mode: self.data.mode.get(),
                         mtime: self.data.mtime.get(),
                         size: self.data.size.get(),
                     }
                 }
                 pub(super) fn entry(
                     &self,
                 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
                     Ok(self
                         .state()?
                         .map(|state| self.entry_with_given_state(state)))
                 }
                 pub(super) fn children<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
-                    read_slice::<Node>(on_disk, self.children)
+                    read_nodes(on_disk, self.children)
                 }
                 pub(super) fn to_in_memory_node<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
                     Ok(dirstate_map::Node {
                         children: dirstate_map::ChildNodes::OnDisk(
                             self.children(on_disk)?,
                         ),
                         copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
                         data: self.node_data()?,
                         descendants_with_entry_count: self
                             .descendants_with_entry_count
                             .get(),
                         tracked_descendants_count: self.tracked_descendants_count.get(),
                     })
                 }
             }
             impl Entry {
                 fn from_timestamp(timestamp: Timestamp) -> Self {
                     // Safety: both types implement the `ByteCast` trait, so we could
                     // safely use `as_bytes` and `from_bytes` to do this conversion. Using
                     // `transmute` instead makes the compiler check that the two types
                     // have the same size, which eliminates the error case of
                     // `from_bytes`.
                     unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }
                 }
                 fn as_timestamp(&self) -> &Timestamp {
                     // Safety: same as above in `from_timestamp`
                     unsafe { &*(self as *const Entry as *const Timestamp) }
                 }
             }
             impl Timestamp {
                 pub fn seconds(&self) -> i64 {
                     self.seconds.get()
                 }
             }
             impl From<SystemTime> for Timestamp {
                 fn from(system_time: SystemTime) -> Self {
                     let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {
                         Ok(duration) => {
                             (duration.as_secs() as i64, duration.subsec_nanos())
                         }
                         Err(error) => {
                             let negative = error.duration();
                             (-(negative.as_secs() as i64), negative.subsec_nanos())
                         }
                     };
                     Timestamp {
                         seconds: secs.into(),
                         nanoseconds: nanos.into(),
                     }
                 }
             }
             impl From<&'_ Timestamp> for SystemTime {
                 fn from(timestamp: &'_ Timestamp) -> Self {
                     let secs = timestamp.seconds.get();
                     let nanos = timestamp.nanoseconds.get();
                     if secs >= 0 {
                         UNIX_EPOCH + Duration::new(secs as u64, nanos)
                     } else {
                         UNIX_EPOCH - Duration::new((-secs) as u64, nanos)
                     }
                 }
             }
             fn read_hg_path(
                 on_disk: &[u8],
-                slice: Slice,
+                slice: PathSlice,
             ) -> Result<&HgPath, DirstateV2ParseError> {
-                let bytes = read_slice::<u8>(on_disk, slice)?;
+                read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
-                Ok(HgPath::new(bytes))
             }
-            fn read_slice<T>(
+            fn read_nodes(
                 on_disk: &[u8],
-                slice: Slice,
+                slice: ChildNodes,
+            ) -> Result<&[Node], DirstateV2ParseError> {
+                read_slice(on_disk, slice.start, slice.len.get())
+            }
+            fn read_slice<T, Len>(
+                on_disk: &[u8],
+                start: Offset,
+                len: Len,
             ) -> Result<&[T], DirstateV2ParseError>
             where
                 T: BytesCast,
+                Len: TryInto<usize>,
             {
                 // Either `usize::MAX` would result in "out of bounds" error since a single
                 // `&[u8]` cannot occupy the entire addess space.
-                let start = usize::try_from(slice.start.get()).unwrap_or(std::usize::MAX);
+                let start = start.get().try_into().unwrap_or(std::usize::MAX);
-                let len = usize::try_from(slice.len.get()).unwrap_or(std::usize::MAX);
+                let len = len.try_into().unwrap_or(std::usize::MAX);
                 on_disk
                     .get(start..)
                     .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
                     .map(|(slice, _rest)| slice)
                     .ok_or_else(|| DirstateV2ParseError)
             }
             pub(crate) fn for_each_tracked_path<'on_disk>(
                 on_disk: &'on_disk [u8],
                 mut f: impl FnMut(&'on_disk HgPath),
             ) -> Result<(), DirstateV2ParseError> {
                 let root = read_root(on_disk)?;
                 fn recur<'on_disk>(
                     on_disk: &'on_disk [u8],
-                    nodes: Slice,
+                    nodes: ChildNodes,
                     f: &mut impl FnMut(&'on_disk HgPath),
                 ) -> Result<(), DirstateV2ParseError> {
-                    for node in read_slice::<Node>(on_disk, nodes)? {
+                    for node in read_nodes(on_disk, nodes)? {
                         if let Some(state) = node.state()? {
                             if state.is_tracked() {
                                 f(node.full_path(on_disk)?)
                             }
                         }
                         recur(on_disk, node.children, f)?
                     }
                     Ok(())
                 }
                 recur(on_disk, root.root_nodes, &mut f)
             }
             pub(super) fn write(
                 dirstate_map: &mut DirstateMap,
             ) -> Result<Vec<u8>, DirstateError> {
                 let root_len = std::mem::size_of::<Root>();
                 // This ignores the space for paths, and for nodes without an entry.
                 // TODO: better estimate? Skip the `Vec` and write to a file directly?
                 let size_guess = root_len
                     + std::mem::size_of::<Node>()
                         * dirstate_map.nodes_with_entry_count as usize;
                 let mut out = Vec::with_capacity(size_guess);
                 let root_nodes =
                     write_nodes(dirstate_map, dirstate_map.root.as_ref(), &mut out)?;
                 let root = Root {
                     root_nodes,
                     nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
                     nodes_with_copy_source_count: dirstate_map
                         .nodes_with_copy_source_count
                         .into(),
                     ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
                 };
                 out.extend(root.as_bytes());
                 Ok(out)
             }
             fn write_nodes(
                 dirstate_map: &DirstateMap,
                 nodes: dirstate_map::ChildNodesRef,
                 out: &mut Vec<u8>,
             ) -> Result<ChildNodes, DirstateError> {
                 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
                 // order. Sort to enable binary search in the written file.
                 let nodes = nodes.sorted();
+                let nodes_len = nodes.len();
                 // First accumulate serialized nodes in a `Vec`
-                let mut on_disk_nodes = Vec::with_capacity(nodes.len());
+                let mut on_disk_nodes = Vec::with_capacity(nodes_len);
                 for node in nodes {
                     let children = write_nodes(
                         dirstate_map,
                         node.children(dirstate_map.on_disk)?,
                         out,
                     )?;
                     let full_path = node.full_path(dirstate_map.on_disk)?;
-                    let full_path = write_slice::<u8>(full_path.as_bytes(), out);
+                    let full_path = write_path(full_path.as_bytes(), out);
                     let copy_source =
                         if let Some(source) = node.copy_source(dirstate_map.on_disk)? {
-                            write_slice::<u8>(source.as_bytes(), out)
+                            write_path(source.as_bytes(), out)
                         } else {
-                            Slice {
+                            PathSlice {
                                 start: 0.into(),
                                 len: 0.into(),
                             }
                         };
                     on_disk_nodes.push(match node {
                         NodeRef::InMemory(path, node) => {
                             let (state, data) = match &node.data {
                                 dirstate_map::NodeData::Entry(entry) => (
                                     entry.state.into(),
                                     Entry {
                                         mode: entry.mode.into(),
                                         mtime: entry.mtime.into(),
                                         size: entry.size.into(),
                                     },
                                 ),
                                 dirstate_map::NodeData::CachedDirectory { mtime } => {
                                     (b'd', Entry::from_timestamp(*mtime))
                                 }
                                 dirstate_map::NodeData::None => (
                                     b'\0',
                                     Entry {
                                         mode: 0.into(),
                                         mtime: 0.into(),
                                         size: 0.into(),
                                     },
                                 ),
                             };
                             Node {
                                 children,
                                 copy_source,
                                 full_path,
-                                base_name_start: u32::try_from(path.base_name_start())
+                                base_name_start: u16::try_from(path.base_name_start())
-                                    // Could only panic for paths over 4 GiB
+                                    // Could only panic for paths over 64 KiB
-                                    .expect("dirstate-v2 offset overflow")
+                                    .expect("dirstate-v2 path length overflow")
                                     .into(),
                                 descendants_with_entry_count: node
                                     .descendants_with_entry_count
                                     .into(),
                                 tracked_descendants_count: node
                                     .tracked_descendants_count
                                     .into(),
                                 state,
                                 data,
                             }
                         }
                         NodeRef::OnDisk(node) => Node {
                             children,
                             copy_source,
                             full_path,
                             ..*node
                         },
                     })
                 }
-                // … so we can write them contiguously
+                // … so we can write them contiguously, after writing everything else they
-                Ok(write_slice::<Node>(&on_disk_nodes, out))
+                // refer to.
+                let start = current_offset(out);
+                let len = u32::try_from(nodes_len)
+                    // Could only panic with over 4 billion nodes
+                    .expect("dirstate-v2 path length overflow")
+                    .into();
+                out.extend(on_disk_nodes.as_bytes());
+                Ok(ChildNodes { start, len })
             }
-            fn write_slice<T>(slice: &[T], out: &mut Vec<u8>) -> Slice
+            fn current_offset(out: &Vec<u8>) -> Offset {
-            where
+                u32::try_from(out.len())
-                T: BytesCast,
-                let start = u32::try_from(out.len())
                     // Could only panic for a dirstate file larger than 4 GiB
                     .expect("dirstate-v2 offset overflow")
-                    .into();
+                    .into()
-                let len = u32::try_from(slice.len())
-                    // Could only panic for paths over 4 GiB or nodes with over 4 billions
-                    // child nodes
+            fn write_path(slice: &[u8], out: &mut Vec<u8>) -> PathSlice {
-                    .expect("dirstate-v2 offset overflow")
+                let start = current_offset(out);
+                let len = u16::try_from(slice.len())
+                    // Could only panic for paths over 64 KiB
+                    .expect("dirstate-v2 path length overflow")
                     .into();
                 out.extend(slice.as_bytes());
-                Slice { start, len }
+                PathSlice { start, len }
             }