upstream/mercurial-mirror Commit - r50441:07b9ca52

1

# v2.py - Pure-Python implementation of the dirstate-v2 file format

1

# v2.py - Pure-Python implementation of the dirstate-v2 file format

2

#

2

#

3

# Copyright Mercurial Contributors

3

# Copyright Mercurial Contributors

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

9

import struct

9

import struct

10

11

from ..thirdparty import attr

11

from ..thirdparty import attr

12

from .. import error, policy

12

from .. import error, policy

13

14

parsers = policy.importmod('parsers')

14

parsers = policy.importmod('parsers')

15

16

17

# Must match the constant of the same name in

17

# Must match the constant of the same name in

18

# `rust/hg-core/src/dirstate_tree/on_disk.rs`

18

# `rust/hg-core/src/dirstate_tree/on_disk.rs`

19

TREE_METADATA_SIZE = 44

19

TREE_METADATA_SIZE = 44

20

NODE_SIZE = 44

20

NODE_SIZE = 44

21

22

23

# Must match the `TreeMetadata` Rust struct in

23

# Must match the `TreeMetadata` Rust struct in

24

# `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.

24

# `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.

25

#

25

#

26

# * 4 bytes: start offset of root nodes

26

# * 4 bytes: start offset of root nodes

27

# * 4 bytes: number of root nodes

27

# * 4 bytes: number of root nodes

28

# * 4 bytes: total number of nodes in the tree that have an entry

28

# * 4 bytes: total number of nodes in the tree that have an entry

29

# * 4 bytes: total number of nodes in the tree that have a copy source

29

# * 4 bytes: total number of nodes in the tree that have a copy source

30

# * 4 bytes: number of bytes in the data file that are not used anymore

30

# * 4 bytes: number of bytes in the data file that are not used anymore

31

# * 4 bytes: unused

31

# * 4 bytes: unused

32

# * 20 bytes: SHA-1 hash of ignore patterns

32

# * 20 bytes: SHA-1 hash of ignore patterns

33

TREE_METADATA = struct.Struct('>LLLLL4s20s')

33

TREE_METADATA = struct.Struct('>LLLLL4s20s')

34

35

36

# Must match the `Node` Rust struct in

36

# Must match the `Node` Rust struct in

37

# `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.

37

# `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.

38

#

38

#

39

# * 4 bytes: start offset of full path

39

# * 4 bytes: start offset of full path

40

# * 2 bytes: length of the full path

40

# * 2 bytes: length of the full path

41

# * 2 bytes: length within the full path before its "base name"

41

# * 2 bytes: length within the full path before its "base name"

42

# * 4 bytes: start offset of the copy source if any, or zero for no copy source

42

# * 4 bytes: start offset of the copy source if any, or zero for no copy source

43

# * 2 bytes: length of the copy source if any, or unused

43

# * 2 bytes: length of the copy source if any, or unused

44

# * 4 bytes: start offset of child nodes

44

# * 4 bytes: start offset of child nodes

45

# * 4 bytes: number of child nodes

45

# * 4 bytes: number of child nodes

46

# * 4 bytes: number of descendant nodes that have an entry

46

# * 4 bytes: number of descendant nodes that have an entry

47

# * 4 bytes: number of descendant nodes that have a "tracked" state

47

# * 4 bytes: number of descendant nodes that have a "tracked" state

48

# * 1 byte: flags

48

# * 1 byte: flags

49

# * 4 bytes: expected size

49

# * 4 bytes: expected size

50

# * 4 bytes: mtime seconds

50

# * 4 bytes: mtime seconds

51

# * 4 bytes: mtime nanoseconds

51

# * 4 bytes: mtime nanoseconds

52

NODE = struct.Struct('>LHHLHLLLLHlll')

52

NODE = struct.Struct('>LHHLHLLLLHlll')

53

54

55

assert TREE_METADATA_SIZE == TREE_METADATA.size

55

assert TREE_METADATA_SIZE == TREE_METADATA.size

56

assert NODE_SIZE == NODE.size

56

assert NODE_SIZE == NODE.size

57

58

# match constant in mercurial/pure/parsers.py

58

# match constant in mercurial/pure/parsers.py

59

DIRSTATE_V2_DIRECTORY = 1 << 13

59

DIRSTATE_V2_DIRECTORY = 1 << 13

60

61

62

def parse_dirstate(map, copy_map, data, tree_metadata):

62

def parse_dirstate(map, copy_map, data, tree_metadata):

63

"""parse a full v2-dirstate from a binary data into dictionnaries:

63

"""parse a full v2-dirstate from a binary data into dictionaries:

64

65

- map: a {path: entry} mapping that will be filled

65

- map: a {path: entry} mapping that will be filled

66

- copy_map: a {path: copy-source} mapping that will be filled

66

- copy_map: a {path: copy-source} mapping that will be filled

67

- data: a binary blob contains v2 nodes data

67

- data: a binary blob contains v2 nodes data

68

- tree_metadata:: a binary blob of the top level node (from the docket)

68

- tree_metadata:: a binary blob of the top level node (from the docket)

69

"""

69

"""

70

(

70

(

71

root_nodes_start,

71

root_nodes_start,

72

root_nodes_len,

72

root_nodes_len,

73

_nodes_with_entry_count,

73

_nodes_with_entry_count,

74

_nodes_with_copy_source_count,

74

_nodes_with_copy_source_count,

75

_unreachable_bytes,

75

_unreachable_bytes,

76

_unused,

76

_unused,

77

_ignore_patterns_hash,

77

_ignore_patterns_hash,

78

) = TREE_METADATA.unpack(tree_metadata)

78

) = TREE_METADATA.unpack(tree_metadata)

79

parse_nodes(map, copy_map, data, root_nodes_start, root_nodes_len)

79

parse_nodes(map, copy_map, data, root_nodes_start, root_nodes_len)

80

81

82

def parse_nodes(map, copy_map, data, start, len):

82

def parse_nodes(map, copy_map, data, start, len):

83

"""parse <len> nodes from <data> starting at offset <start>

83

"""parse <len> nodes from <data> starting at offset <start>

84

85

This is used by parse_dirstate to recursively fill `map` and `copy_map`.

85

This is used by parse_dirstate to recursively fill `map` and `copy_map`.

86

87

All directory specific information is ignored and do not need any

87

All directory specific information is ignored and do not need any

88

processing (DIRECTORY, ALL_UNKNOWN_RECORDED, ALL_IGNORED_RECORDED)

88

processing (DIRECTORY, ALL_UNKNOWN_RECORDED, ALL_IGNORED_RECORDED)

89

"""

89

"""

90

for i in range(len):

90

for i in range(len):

91

node_start = start + NODE_SIZE * i

91

node_start = start + NODE_SIZE * i

92

node_bytes = slice_with_len(data, node_start, NODE_SIZE)

92

node_bytes = slice_with_len(data, node_start, NODE_SIZE)

93

(

93

(

94

path_start,

94

path_start,

95

path_len,

95

path_len,

96

_basename_start,

96

_basename_start,

97

copy_source_start,

97

copy_source_start,

98

copy_source_len,

98

copy_source_len,

99

children_start,

99

children_start,

100

children_count,

100

children_count,

101

_descendants_with_entry_count,

101

_descendants_with_entry_count,

102

_tracked_descendants_count,

102

_tracked_descendants_count,

103

flags,

103

flags,

104

size,

104

size,

105

mtime_s,

105

mtime_s,

106

mtime_ns,

106

mtime_ns,

107

) = NODE.unpack(node_bytes)

107

) = NODE.unpack(node_bytes)

108

109

# Parse child nodes of this node recursively

109

# Parse child nodes of this node recursively

110

parse_nodes(map, copy_map, data, children_start, children_count)

110

parse_nodes(map, copy_map, data, children_start, children_count)

111

112

item = parsers.DirstateItem.from_v2_data(flags, size, mtime_s, mtime_ns)

112

item = parsers.DirstateItem.from_v2_data(flags, size, mtime_s, mtime_ns)

113

if not item.any_tracked:

113

if not item.any_tracked:

114

continue

114

continue

115

path = slice_with_len(data, path_start, path_len)

115

path = slice_with_len(data, path_start, path_len)

116

map[path] = item

116

map[path] = item

117

if copy_source_start:

117

if copy_source_start:

118

copy_map[path] = slice_with_len(

118

copy_map[path] = slice_with_len(

119

data, copy_source_start, copy_source_len

119

data, copy_source_start, copy_source_len

120

)

120

)

121

122

123

def slice_with_len(data, start, len):

123

def slice_with_len(data, start, len):

124

return data[start : start + len]

124

return data[start : start + len]

125

126

127

@attr.s

127

@attr.s

128

class Node:

128

class Node:

129

path = attr.ib()

129

path = attr.ib()

130

entry = attr.ib()

130

entry = attr.ib()

131

parent = attr.ib(default=None)

131

parent = attr.ib(default=None)

132

children_count = attr.ib(default=0)

132

children_count = attr.ib(default=0)

133

children_offset = attr.ib(default=0)

133

children_offset = attr.ib(default=0)

134

descendants_with_entry = attr.ib(default=0)

134

descendants_with_entry = attr.ib(default=0)

135

tracked_descendants = attr.ib(default=0)

135

tracked_descendants = attr.ib(default=0)

136

137

def pack(self, copy_map, paths_offset):

137

def pack(self, copy_map, paths_offset):

138

path = self.path

138

path = self.path

139

copy = copy_map.get(path)

139

copy = copy_map.get(path)

140

entry = self.entry

140

entry = self.entry

141

142

path_start = paths_offset

142

path_start = paths_offset

143

path_len = len(path)

143

path_len = len(path)

144

basename_start = path.rfind(b'/') + 1 # 0 if rfind returns -1

144

basename_start = path.rfind(b'/') + 1 # 0 if rfind returns -1

145

if copy is not None:

145

if copy is not None:

146

copy_source_start = paths_offset + len(path)

146

copy_source_start = paths_offset + len(path)

147

copy_source_len = len(copy)

147

copy_source_len = len(copy)

148

else:

148

else:

149

copy_source_start = 0

149

copy_source_start = 0

150

copy_source_len = 0

150

copy_source_len = 0

151

if entry is not None:

151

if entry is not None:

152

flags, size, mtime_s, mtime_ns = entry.v2_data()

152

flags, size, mtime_s, mtime_ns = entry.v2_data()

153

else:

153

else:

154

# There are no mtime-cached directories in the Python implementation

154

# There are no mtime-cached directories in the Python implementation

155

flags = DIRSTATE_V2_DIRECTORY

155

flags = DIRSTATE_V2_DIRECTORY

156

size = 0

156

size = 0

157

mtime_s = 0

157

mtime_s = 0

158

mtime_ns = 0

158

mtime_ns = 0

159

return NODE.pack(

159

return NODE.pack(

160

path_start,

160

path_start,

161

path_len,

161

path_len,

162

basename_start,

162

basename_start,

163

copy_source_start,

163

copy_source_start,

164

copy_source_len,

164

copy_source_len,

165

self.children_offset,

165

self.children_offset,

166

self.children_count,

166

self.children_count,

167

self.descendants_with_entry,

167

self.descendants_with_entry,

168

self.tracked_descendants,

168

self.tracked_descendants,

169

flags,

169

flags,

170

size,

170

size,

171

mtime_s,

171

mtime_s,

172

mtime_ns,

172

mtime_ns,

173

)

173

)

174

175

176

def pack_dirstate(map, copy_map):

176

def pack_dirstate(map, copy_map):

177

"""

177

"""

178

Pack `map` and `copy_map` into the dirstate v2 binary format and return

178

Pack `map` and `copy_map` into the dirstate v2 binary format and return

179

the bytearray.

179

the bytearray.

180

181

The on-disk format expects a tree-like structure where the leaves are

181

The on-disk format expects a tree-like structure where the leaves are

182

written first (and sorted per-directory), going up levels until the root

182

written first (and sorted per-directory), going up levels until the root

183

node and writing that one to the docket. See more details on the on-disk

183

node and writing that one to the docket. See more details on the on-disk

184

format in `mercurial/helptext/internals/dirstate-v2`.

184

format in `mercurial/helptext/internals/dirstate-v2`.

185

186

Since both `map` and `copy_map` are flat dicts we need to figure out the

186

Since both `map` and `copy_map` are flat dicts we need to figure out the

187

hierarchy. This algorithm does so without having to build the entire tree

187

hierarchy. This algorithm does so without having to build the entire tree

188

in-memory: it only keeps the minimum number of nodes around to satisfy the

188

in-memory: it only keeps the minimum number of nodes around to satisfy the

189

format.

189

format.

190

191

# Algorithm explanation

191

# Algorithm explanation

192

193

This explanation does not talk about the different counters for tracked

193

This explanation does not talk about the different counters for tracked

194

descendents and storing the copies, but that work is pretty simple once this

194

descendants and storing the copies, but that work is pretty simple once this

195

algorithm is in place.

195

algorithm is in place.

196

197

## Building a subtree

197

## Building a subtree

198

199

First, sort `map`: this makes it so the leaves of the tree are contiguous

199

First, sort `map`: this makes it so the leaves of the tree are contiguous

200

per directory (i.e. a/b/c and a/b/d will be next to each other in the list),

200

per directory (i.e. a/b/c and a/b/d will be next to each other in the list),

201

and enables us to use the ordering of folders to have a "cursor" of the

201

and enables us to use the ordering of folders to have a "cursor" of the

202

current folder we're in without ever going twice in the same branch of the

202

current folder we're in without ever going twice in the same branch of the

203

tree. The cursor is a node that remembers its parent and any information

203

tree. The cursor is a node that remembers its parent and any information

204

relevant to the format (see the `Node` class), building the relevant part

204

relevant to the format (see the `Node` class), building the relevant part

205

of the tree lazily.

205

of the tree lazily.

206

Then, for each file in `map`, move the cursor into the tree to the

206

Then, for each file in `map`, move the cursor into the tree to the

207

corresponding folder of the file: for example, if the very first file

207

corresponding folder of the file: for example, if the very first file

208

is "a/b/c", we start from `Node[""]`, create `Node["a"]` which points to

208

is "a/b/c", we start from `Node[""]`, create `Node["a"]` which points to

209

its parent `Node[""]`, then create `Node["a/b"]`, which points to its parent

209

its parent `Node[""]`, then create `Node["a/b"]`, which points to its parent

210

`Node["a"]`. These nodes are kept around in a stack.

210

`Node["a"]`. These nodes are kept around in a stack.

211

If the next file in `map` is in the same subtree ("a/b/d" or "a/b/e/f"), we

211

If the next file in `map` is in the same subtree ("a/b/d" or "a/b/e/f"), we

212

add it to the stack and keep looping with the same logic of creating the

212

add it to the stack and keep looping with the same logic of creating the

213

tree nodes as needed. If however the next file in `map` is *not* in the same

213

tree nodes as needed. If however the next file in `map` is *not* in the same

214

subtree ("a/other", if we're still in the "a/b" folder), then we know that

214

subtree ("a/other", if we're still in the "a/b" folder), then we know that

215

the subtree we're in is complete.

215

the subtree we're in is complete.

216

217

## Writing the subtree

217

## Writing the subtree

218

219

We have the entire subtree in the stack, so we start writing it to disk

219

We have the entire subtree in the stack, so we start writing it to disk

220

folder by folder. The way we write a folder is to pop the stack into a list

220

folder by folder. The way we write a folder is to pop the stack into a list

221

until the folder changes, revert this list of direct children (to satisfy

221

until the folder changes, revert this list of direct children (to satisfy

222

the format requirement that children be sorted). This process repeats until

222

the format requirement that children be sorted). This process repeats until

223

we hit the "other" subtree.

223

we hit the "other" subtree.

224

225

An example:

225

An example:

226

a

226

a

227

dir1/b

227

dir1/b

228

dir1/c

228

dir1/c

229

dir2/dir3/d

229

dir2/dir3/d

230

dir2/dir3/e

230

dir2/dir3/e

231

dir2/f

231

dir2/f

232

233

Would have us:

233

Would have us:

234

- add to the stack until "dir2/dir3/e"

234

- add to the stack until "dir2/dir3/e"

235

- realize that "dir2/f" is in a different subtree

235

- realize that "dir2/f" is in a different subtree

236

- pop "dir2/dir3/e", "dir2/dir3/d", reverse them so they're sorted and

236

- pop "dir2/dir3/e", "dir2/dir3/d", reverse them so they're sorted and

237

pack them since the next entry is "dir2/dir3"

237

pack them since the next entry is "dir2/dir3"

238

- go back up to "dir2"

238

- go back up to "dir2"

239

- add "dir2/f" to the stack

239

- add "dir2/f" to the stack

240

- realize we're done with the map

240

- realize we're done with the map

241

- pop "dir2/f", "dir2/dir3" from the stack, reverse and pack them

241

- pop "dir2/f", "dir2/dir3" from the stack, reverse and pack them

242

- go up to the root node, do the same to write "a", "dir1" and "dir2" in

242

- go up to the root node, do the same to write "a", "dir1" and "dir2" in

243

that order

243

that order

244

245

## Special case for the root node

245

## Special case for the root node

246

247

The root node is not serialized in the format, but its information is

247

The root node is not serialized in the format, but its information is

248

written to the docket. Again, see more details on the on-disk format in

248

written to the docket. Again, see more details on the on-disk format in

249

`mercurial/helptext/internals/dirstate-v2`.

249

`mercurial/helptext/internals/dirstate-v2`.

250

"""

250

"""

251

data = bytearray()

251

data = bytearray()

252

root_nodes_start = 0

252

root_nodes_start = 0

253

root_nodes_len = 0

253

root_nodes_len = 0

254

nodes_with_entry_count = 0

254

nodes_with_entry_count = 0

255

nodes_with_copy_source_count = 0

255

nodes_with_copy_source_count = 0

256

# Will always be 0 since this implementation always re-writes everything

256

# Will always be 0 since this implementation always re-writes everything

257

# to disk

257

# to disk

258

unreachable_bytes = 0

258

unreachable_bytes = 0

259

unused = b'\x00' * 4

259

unused = b'\x00' * 4

260

# This is an optimization that's only useful for the Rust implementation

260

# This is an optimization that's only useful for the Rust implementation

261

ignore_patterns_hash = b'\x00' * 20

261

ignore_patterns_hash = b'\x00' * 20

262

263

if len(map) == 0:

263

if len(map) == 0:

264

tree_metadata = TREE_METADATA.pack(

264

tree_metadata = TREE_METADATA.pack(

265

root_nodes_start,

265

root_nodes_start,

266

root_nodes_len,

266

root_nodes_len,

267

nodes_with_entry_count,

267

nodes_with_entry_count,

268

nodes_with_copy_source_count,

268

nodes_with_copy_source_count,

269

unreachable_bytes,

269

unreachable_bytes,

270

unused,

270

unused,

271

ignore_patterns_hash,

271

ignore_patterns_hash,

272

)

272

)

273

return data, tree_metadata

273

return data, tree_metadata

274

275

sorted_map = sorted(map.items(), key=lambda x: x[0])

275

sorted_map = sorted(map.items(), key=lambda x: x[0])

276

277

# Use a stack to ~~not~~ have to only remember the nodes we currently need

277

# Use a stack to have to only remember the nodes we currently need

278

# instead of building the entire tree in memory

278

# instead of building the entire tree in memory

279

stack = []

279

stack = []

280

current_node = Node(b"", None)

280

current_node = Node(b"", None)

281

stack.append(current_node)

281

stack.append(current_node)

282

283

for index, (path, entry) in enumerate(sorted_map, 1):

283

for index, (path, entry) in enumerate(sorted_map, 1):

284

nodes_with_entry_count += 1

284

nodes_with_entry_count += 1

285

if path in copy_map:

285

if path in copy_map:

286

nodes_with_copy_source_count += 1

286

nodes_with_copy_source_count += 1

287

current_folder = get_folder(path)

287

current_folder = get_folder(path)

288

current_node = move_to_correct_node_in_tree(

288

current_node = move_to_correct_node_in_tree(

289

current_folder, current_node, stack

289

current_folder, current_node, stack

290

)

290

)

291

292

current_node.children_count += 1

292

current_node.children_count += 1

293

# Entries from `map` are never `None`

293

# Entries from `map` are never `None`

294

if entry.tracked:

294

if entry.tracked:

295

current_node.tracked_descendants += 1

295

current_node.tracked_descendants += 1

296

current_node.descendants_with_entry += 1

296

current_node.descendants_with_entry += 1

297

stack.append(Node(path, entry, current_node))

297

stack.append(Node(path, entry, current_node))

298

299

should_pack = True

299

should_pack = True

300

next_path = None

300

next_path = None

301

if index < len(sorted_map):

301

if index < len(sorted_map):

302

# Determine if the next entry is in the same sub-tree, if so don't

302

# Determine if the next entry is in the same sub-tree, if so don't

303

# pack yet

303

# pack yet

304

next_path = sorted_map[index][0]

304

next_path = sorted_map[index][0]

305

should_pack = not is_ancestor(next_path, current_folder)

305

should_pack = not is_ancestor(next_path, current_folder)

306

if should_pack:

306

if should_pack:

307

pack_directory_children(current_node, copy_map, data, stack)

307

pack_directory_children(current_node, copy_map, data, stack)

308

while stack and current_node.path != b"":

308

while stack and current_node.path != b"":

309

# Go up the tree and write until we reach the folder of the next

309

# Go up the tree and write until we reach the folder of the next

310

# entry (if any, otherwise the root)

310

# entry (if any, otherwise the root)

311

parent = current_node.parent

311

parent = current_node.parent

312

in_ancestor_of_next_path = next_path is not None and (

312

in_ancestor_of_next_path = next_path is not None and (

313

is_ancestor(next_path, get_folder(stack[-1].path))

313

is_ancestor(next_path, get_folder(stack[-1].path))

314

)

314

)

315

if parent is None or in_ancestor_of_next_path:

315

if parent is None or in_ancestor_of_next_path:

316

break

316

break

317

pack_directory_children(parent, copy_map, data, stack)

317

pack_directory_children(parent, copy_map, data, stack)

318

current_node = parent

318

current_node = parent

319

320

# Special case for the root node since we don't write it to disk, only its

320

# Special case for the root node since we don't write it to disk, only its

321

# children to the docket

321

# children to the docket

322

current_node = stack.pop()

322

current_node = stack.pop()

323

assert current_node.path == b"", current_node.path

323

assert current_node.path == b"", current_node.path

324

assert len(stack) == 0, len(stack)

324

assert len(stack) == 0, len(stack)

325

326

tree_metadata = TREE_METADATA.pack(

326

tree_metadata = TREE_METADATA.pack(

327

current_node.children_offset,

327

current_node.children_offset,

328

current_node.children_count,

328

current_node.children_count,

329

nodes_with_entry_count,

329

nodes_with_entry_count,

330

nodes_with_copy_source_count,

330

nodes_with_copy_source_count,

331

unreachable_bytes,

331

unreachable_bytes,

332

unused,

332

unused,

333

ignore_patterns_hash,

333

ignore_patterns_hash,

334

)

334

)

335

336

return data, tree_metadata

336

return data, tree_metadata

337

338

339

def get_folder(path):

339

def get_folder(path):

340

"""

340

"""

341

Return the folder of the path that's given, an empty string for root paths.

341

Return the folder of the path that's given, an empty string for root paths.

342

"""

342

"""

343

return path.rsplit(b'/', 1)[0] if b'/' in path else b''

343

return path.rsplit(b'/', 1)[0] if b'/' in path else b''

344

345

346

def is_ancestor(path, maybe_ancestor):

346

def is_ancestor(path, maybe_ancestor):

347

"""Returns whether `maybe_ancestor` is an ancestor of `path`.

347

"""Returns whether `maybe_ancestor` is an ancestor of `path`.

348

349

>>> is_ancestor(b"a", b"")

349

>>> is_ancestor(b"a", b"")

350

True

350

True

351

>>> is_ancestor(b"a/b/c", b"a/b/c")

351

>>> is_ancestor(b"a/b/c", b"a/b/c")

352

False

352

False

353

>>> is_ancestor(b"hgext3rd/__init__.py", b"hgext")

353

>>> is_ancestor(b"hgext3rd/__init__.py", b"hgext")

354

False

354

False

355

>>> is_ancestor(b"hgext3rd/__init__.py", b"hgext3rd")

355

>>> is_ancestor(b"hgext3rd/__init__.py", b"hgext3rd")

356

True

356

True

357

"""

357

"""

358

if maybe_ancestor == b"":

358

if maybe_ancestor == b"":

359

return True

359

return True

360

if path <= maybe_ancestor:

360

if path <= maybe_ancestor:

361

return False

361

return False

362

path_components = path.split(b"/")

362

path_components = path.split(b"/")

363

ancestor_components = maybe_ancestor.split(b"/")

363

ancestor_components = maybe_ancestor.split(b"/")

364

return all(c == o for c, o in zip(path_components, ancestor_components))

364

return all(c == o for c, o in zip(path_components, ancestor_components))

365

366

367

def move_to_correct_node_in_tree(target_folder, current_node, stack):

367

def move_to_correct_node_in_tree(target_folder, current_node, stack):

368

"""

368

"""

369

Move inside the dirstate node tree to the node corresponding to

369

Move inside the dirstate node tree to the node corresponding to

370

`target_folder`, creating the missing nodes along the way if needed.

370

`target_folder`, creating the missing nodes along the way if needed.

371

"""

371

"""

372

while target_folder != current_node.path:

372

while target_folder != current_node.path:

373

if is_ancestor(target_folder, current_node.path):

373

if is_ancestor(target_folder, current_node.path):

374

# We need to go down a folder

374

# We need to go down a folder

375

prefix = target_folder[len(current_node.path) :].lstrip(b'/')

375

prefix = target_folder[len(current_node.path) :].lstrip(b'/')

376

subfolder_name = prefix.split(b'/', 1)[0]

376

subfolder_name = prefix.split(b'/', 1)[0]

377

if current_node.path:

377

if current_node.path:

378

subfolder_path = current_node.path + b'/' + subfolder_name

378

subfolder_path = current_node.path + b'/' + subfolder_name

379

else:

379

else:

380

subfolder_path = subfolder_name

380

subfolder_path = subfolder_name

381

next_node = stack[-1]

381

next_node = stack[-1]

382

if next_node.path == target_folder:

382

if next_node.path == target_folder:

383

# This folder is now a file and only contains removed entries

383

# This folder is now a file and only contains removed entries

384

# merge with the last node

384

# merge with the last node

385

current_node = next_node

385

current_node = next_node

386

else:

386

else:

387

current_node.children_count += 1

387

current_node.children_count += 1

388

current_node = Node(subfolder_path, None, current_node)

388

current_node = Node(subfolder_path, None, current_node)

389

stack.append(current_node)

389

stack.append(current_node)

390

else:

390

else:

391

# We need to go up a folder

391

# We need to go up a folder

392

current_node = current_node.parent

392

current_node = current_node.parent

393

return current_node

393

return current_node

394

395

396

def pack_directory_children(node, copy_map, data, stack):

396

def pack_directory_children(node, copy_map, data, stack):

397

"""

397

"""

398

Write the binary representation of the direct sorted children of `node` to

398

Write the binary representation of the direct sorted children of `node` to

399

`data`

399

`data`

400

"""

400

"""

401

direct_children = []

401

direct_children = []

402

403

while stack[-1].path != b"" and get_folder(stack[-1].path) == node.path:

403

while stack[-1].path != b"" and get_folder(stack[-1].path) == node.path:

404

direct_children.append(stack.pop())

404

direct_children.append(stack.pop())

405

if not direct_children:

405

if not direct_children:

406

raise error.ProgrammingError(b"no direct children for %r" % node.path)

406

raise error.ProgrammingError(b"no direct children for %r" % node.path)

407

408

# Reverse the stack to get the correct sorted order

408

# Reverse the stack to get the correct sorted order

409

direct_children.reverse()

409

direct_children.reverse()

410

packed_children = bytearray()

410

packed_children = bytearray()

411

# Write the paths to `data`. Pack child nodes but don't write them yet

411

# Write the paths to `data`. Pack child nodes but don't write them yet

412

for child in direct_children:

412

for child in direct_children:

413

packed = child.pack(copy_map=copy_map, paths_offset=len(data))

413

packed = child.pack(copy_map=copy_map, paths_offset=len(data))

414

packed_children.extend(packed)

414

packed_children.extend(packed)

415

data.extend(child.path)

415

data.extend(child.path)

416

data.extend(copy_map.get(child.path, b""))

416

data.extend(copy_map.get(child.path, b""))

417

node.tracked_descendants += child.tracked_descendants

417

node.tracked_descendants += child.tracked_descendants

418

node.descendants_with_entry += child.descendants_with_entry

418

node.descendants_with_entry += child.descendants_with_entry

419

# Write the fixed-size child nodes all together

419

# Write the fixed-size child nodes all together

420

node.children_offset = len(data)

420

node.children_offset = len(data)

421

data.extend(packed_children)

421

data.extend(packed_children)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # v2.py - Pure-Python implementation of the dirstate-v2 file format
             #
             # Copyright Mercurial Contributors
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             import struct
             from ..thirdparty import attr
             from .. import error, policy
             parsers = policy.importmod('parsers')
             # Must match the constant of the same name in
             # `rust/hg-core/src/dirstate_tree/on_disk.rs`
             TREE_METADATA_SIZE = 44
             NODE_SIZE = 44
             # Must match the `TreeMetadata` Rust struct in
             # `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.
             #
             # * 4 bytes: start offset of root nodes
             # * 4 bytes: number of root nodes
             # * 4 bytes: total number of nodes in the tree that have an entry
             # * 4 bytes: total number of nodes in the tree that have a copy source
             # * 4 bytes: number of bytes in the data file that are not used anymore
             # * 4 bytes: unused
             # * 20 bytes: SHA-1 hash of ignore patterns
             TREE_METADATA = struct.Struct('>LLLLL4s20s')
             # Must match the `Node` Rust struct in
             # `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.
             #
             # * 4 bytes: start offset of full path
             # * 2 bytes: length of the full path
             # * 2 bytes: length within the full path before its "base name"
             # * 4 bytes: start offset of the copy source if any, or zero for no copy source
             # * 2 bytes: length of the copy source if any, or unused
             # * 4 bytes: start offset of child nodes
             # * 4 bytes: number of child nodes
             # * 4 bytes: number of descendant nodes that have an entry
             # * 4 bytes: number of descendant nodes that have a "tracked" state
             # * 1 byte: flags
             # * 4 bytes: expected size
             # * 4 bytes: mtime seconds
             # * 4 bytes: mtime nanoseconds
             NODE = struct.Struct('>LHHLHLLLLHlll')
             assert TREE_METADATA_SIZE == TREE_METADATA.size
             assert NODE_SIZE == NODE.size
             # match constant in mercurial/pure/parsers.py
             DIRSTATE_V2_DIRECTORY = 1 << 13
             def parse_dirstate(map, copy_map, data, tree_metadata):
-                """parse a full v2-dirstate from a binary data into dictionnaries:
+                """parse a full v2-dirstate from a binary data into dictionaries:
                 - map: a {path: entry} mapping that will be filled
                 - copy_map: a {path: copy-source} mapping that will be filled
                 - data: a binary blob contains v2 nodes data
                 - tree_metadata:: a binary blob of the top level node (from the docket)
                 """
                 (
                     root_nodes_start,
                     root_nodes_len,
                     _nodes_with_entry_count,
                     _nodes_with_copy_source_count,
                     _unreachable_bytes,
                     _unused,
                     _ignore_patterns_hash,
                 ) = TREE_METADATA.unpack(tree_metadata)
                 parse_nodes(map, copy_map, data, root_nodes_start, root_nodes_len)
             def parse_nodes(map, copy_map, data, start, len):
                 """parse <len> nodes from <data> starting at offset <start>
                 This is used by parse_dirstate to recursively fill `map` and `copy_map`.
                 All directory specific information is ignored and do not need any
                 processing (DIRECTORY, ALL_UNKNOWN_RECORDED, ALL_IGNORED_RECORDED)
                 """
                 for i in range(len):
                     node_start = start + NODE_SIZE * i
                     node_bytes = slice_with_len(data, node_start, NODE_SIZE)
                     (
                         path_start,
                         path_len,
                         _basename_start,
                         copy_source_start,
                         copy_source_len,
                         children_start,
                         children_count,
                         _descendants_with_entry_count,
                         _tracked_descendants_count,
                         flags,
                         size,
                         mtime_s,
                         mtime_ns,
                     ) = NODE.unpack(node_bytes)
                     # Parse child nodes of this node recursively
                     parse_nodes(map, copy_map, data, children_start, children_count)
                     item = parsers.DirstateItem.from_v2_data(flags, size, mtime_s, mtime_ns)
                     if not item.any_tracked:
                         continue
                     path = slice_with_len(data, path_start, path_len)
                     map[path] = item
                     if copy_source_start:
                         copy_map[path] = slice_with_len(
                             data, copy_source_start, copy_source_len
                         )
             def slice_with_len(data, start, len):
                 return data[start : start + len]
             @attr.s
             class Node:
                 path = attr.ib()
                 entry = attr.ib()
                 parent = attr.ib(default=None)
                 children_count = attr.ib(default=0)
                 children_offset = attr.ib(default=0)
                 descendants_with_entry = attr.ib(default=0)
                 tracked_descendants = attr.ib(default=0)
                 def pack(self, copy_map, paths_offset):
                     path = self.path
                     copy = copy_map.get(path)
                     entry = self.entry
                     path_start = paths_offset
                     path_len = len(path)
                     basename_start = path.rfind(b'/') + 1  # 0 if rfind returns -1
                     if copy is not None:
                         copy_source_start = paths_offset + len(path)
                         copy_source_len = len(copy)
                     else:
                         copy_source_start = 0
                         copy_source_len = 0
                     if entry is not None:
                         flags, size, mtime_s, mtime_ns = entry.v2_data()
                     else:
                         # There are no mtime-cached directories in the Python implementation
                         flags = DIRSTATE_V2_DIRECTORY
                         size = 0
                         mtime_s = 0
                         mtime_ns = 0
                     return NODE.pack(
                         path_start,
                         path_len,
                         basename_start,
                         copy_source_start,
                         copy_source_len,
                         self.children_offset,
                         self.children_count,
                         self.descendants_with_entry,
                         self.tracked_descendants,
                         flags,
                         size,
                         mtime_s,
                         mtime_ns,
                     )
             def pack_dirstate(map, copy_map):
                 """
                 Pack `map` and `copy_map` into the dirstate v2 binary format and return
                 the bytearray.
                 The on-disk format expects a tree-like structure where the leaves are
                 written first (and sorted per-directory), going up levels until the root
                 node and writing that one to the docket. See more details on the on-disk
                 format in `mercurial/helptext/internals/dirstate-v2`.
                 Since both `map` and `copy_map` are flat dicts we need to figure out the
                 hierarchy. This algorithm does so without having to build the entire tree
                 in-memory: it only keeps the minimum number of nodes around to satisfy the
                 format.
                 # Algorithm explanation
                 This explanation does not talk about the different counters for tracked
-                descendents and storing the copies, but that work is pretty simple once this
+                descendants and storing the copies, but that work is pretty simple once this
                 algorithm is in place.
                 ## Building a subtree
                 First, sort `map`: this makes it so the leaves of the tree are contiguous
                 per directory (i.e. a/b/c and a/b/d will be next to each other in the list),
                 and enables us to use the ordering of folders to have a "cursor" of the
                 current folder we're in without ever going twice in the same branch of the
                 tree. The cursor is a node that remembers its parent and any information
                 relevant to the format (see the `Node` class), building the relevant part
                 of the tree lazily.
                 Then, for each file in `map`, move the cursor into the tree to the
                 corresponding folder of the file: for example, if the very first file
                 is "a/b/c", we start from `Node[""]`, create `Node["a"]` which points to
                 its parent `Node[""]`, then create `Node["a/b"]`, which points to its parent
                 `Node["a"]`. These nodes are kept around in a stack.
                 If the next file in `map` is in the same subtree ("a/b/d" or "a/b/e/f"), we
                 add it to the stack and keep looping with the same logic of creating the
                 tree nodes as needed. If however the next file in `map` is *not* in the same
                 subtree ("a/other", if we're still in the "a/b" folder), then we know that
                 the subtree we're in is complete.
                 ## Writing the subtree
                 We have the entire subtree in the stack, so we start writing it to disk
                 folder by folder. The way we write a folder is to pop the stack into a list
                 until the folder changes, revert this list of direct children (to satisfy
                 the format requirement that children be sorted). This process repeats until
                 we hit the "other" subtree.
                 An example:
                     a
                     dir1/b
                     dir1/c
                     dir2/dir3/d
                     dir2/dir3/e
                     dir2/f
                 Would have us:
                     - add to the stack until "dir2/dir3/e"
                     - realize that "dir2/f" is in a different subtree
                     - pop "dir2/dir3/e", "dir2/dir3/d", reverse them so they're sorted and
                       pack them since the next entry is "dir2/dir3"
                     - go back up to "dir2"
                     - add "dir2/f" to the stack
                     - realize we're done with the map
                     - pop "dir2/f", "dir2/dir3" from the stack, reverse and pack them
                     - go up to the root node, do the same to write "a", "dir1" and "dir2" in
                       that order
                 ## Special case for the root node
                 The root node is not serialized in the format, but its information is
                 written to the docket. Again, see more details on the on-disk format in
                 `mercurial/helptext/internals/dirstate-v2`.
                 """
                 data = bytearray()
                 root_nodes_start = 0
                 root_nodes_len = 0
                 nodes_with_entry_count = 0
                 nodes_with_copy_source_count = 0
                 # Will always be 0 since this implementation always re-writes everything
                 # to disk
                 unreachable_bytes = 0
                 unused = b'\x00' * 4
                 # This is an optimization that's only useful for the Rust implementation
                 ignore_patterns_hash = b'\x00' * 20
                 if len(map) == 0:
                     tree_metadata = TREE_METADATA.pack(
                         root_nodes_start,
                         root_nodes_len,
                         nodes_with_entry_count,
                         nodes_with_copy_source_count,
                         unreachable_bytes,
                         unused,
                         ignore_patterns_hash,
                     )
                     return data, tree_metadata
                 sorted_map = sorted(map.items(), key=lambda x: x[0])
-                # Use a stack to not have to only remember the nodes we currently need
+                # Use a stack to have to only remember the nodes we currently need
                 # instead of building the entire tree in memory
                 stack = []
                 current_node = Node(b"", None)
                 stack.append(current_node)
                 for index, (path, entry) in enumerate(sorted_map, 1):
                     nodes_with_entry_count += 1
                     if path in copy_map:
                         nodes_with_copy_source_count += 1
                     current_folder = get_folder(path)
                     current_node = move_to_correct_node_in_tree(
                         current_folder, current_node, stack
                     )
                     current_node.children_count += 1
                     # Entries from `map` are never `None`
                     if entry.tracked:
                         current_node.tracked_descendants += 1
                     current_node.descendants_with_entry += 1
                     stack.append(Node(path, entry, current_node))
                     should_pack = True
                     next_path = None
                     if index < len(sorted_map):
                         # Determine if the next entry is in the same sub-tree, if so don't
                         # pack yet
                         next_path = sorted_map[index][0]
                         should_pack = not is_ancestor(next_path, current_folder)
                     if should_pack:
                         pack_directory_children(current_node, copy_map, data, stack)
                         while stack and current_node.path != b"":
                             # Go up the tree and write until we reach the folder of the next
                             # entry (if any, otherwise the root)
                             parent = current_node.parent
                             in_ancestor_of_next_path = next_path is not None and (
                                 is_ancestor(next_path, get_folder(stack[-1].path))
                             )
                             if parent is None or in_ancestor_of_next_path:
                                 break
                             pack_directory_children(parent, copy_map, data, stack)
                             current_node = parent
                 # Special case for the root node since we don't write it to disk, only its
                 # children to the docket
                 current_node = stack.pop()
                 assert current_node.path == b"", current_node.path
                 assert len(stack) == 0, len(stack)
                 tree_metadata = TREE_METADATA.pack(
                     current_node.children_offset,
                     current_node.children_count,
                     nodes_with_entry_count,
                     nodes_with_copy_source_count,
                     unreachable_bytes,
                     unused,
                     ignore_patterns_hash,
                 )
                 return data, tree_metadata
             def get_folder(path):
                 """
                 Return the folder of the path that's given, an empty string for root paths.
                 """
                 return path.rsplit(b'/', 1)[0] if b'/' in path else b''
             def is_ancestor(path, maybe_ancestor):
                 """Returns whether `maybe_ancestor` is an ancestor of `path`.
                 >>> is_ancestor(b"a", b"")
                 True
                 >>> is_ancestor(b"a/b/c", b"a/b/c")
                 False
                 >>> is_ancestor(b"hgext3rd/__init__.py", b"hgext")
                 False
                 >>> is_ancestor(b"hgext3rd/__init__.py", b"hgext3rd")
                 True
                 """
                 if maybe_ancestor == b"":
                     return True
                 if path <= maybe_ancestor:
                     return False
                 path_components = path.split(b"/")
                 ancestor_components = maybe_ancestor.split(b"/")
                 return all(c == o for c, o in zip(path_components, ancestor_components))
             def move_to_correct_node_in_tree(target_folder, current_node, stack):
                 """
                 Move inside the dirstate node tree to the node corresponding to
                 `target_folder`, creating the missing nodes along the way if needed.
                 """
                 while target_folder != current_node.path:
                     if is_ancestor(target_folder, current_node.path):
                         # We need to go down a folder
                         prefix = target_folder[len(current_node.path) :].lstrip(b'/')
                         subfolder_name = prefix.split(b'/', 1)[0]
                         if current_node.path:
                             subfolder_path = current_node.path + b'/' + subfolder_name
                         else:
                             subfolder_path = subfolder_name
                         next_node = stack[-1]
                         if next_node.path == target_folder:
                             # This folder is now a file and only contains removed entries
                             # merge with the last node
                             current_node = next_node
                         else:
                             current_node.children_count += 1
                             current_node = Node(subfolder_path, None, current_node)
                             stack.append(current_node)
                     else:
                         # We need to go up a folder
                         current_node = current_node.parent
                 return current_node
             def pack_directory_children(node, copy_map, data, stack):
                 """
                 Write the binary representation of the direct sorted children of `node` to
                 `data`
                 """
                 direct_children = []
                 while stack[-1].path != b"" and get_folder(stack[-1].path) == node.path:
                     direct_children.append(stack.pop())
                 if not direct_children:
                     raise error.ProgrammingError(b"no direct children for %r" % node.path)
                 # Reverse the stack to get the correct sorted order
                 direct_children.reverse()
                 packed_children = bytearray()
                 # Write the paths to `data`. Pack child nodes but don't write them yet
                 for child in direct_children:
                     packed = child.pack(copy_map=copy_map, paths_offset=len(data))
                     packed_children.extend(packed)
                     data.extend(child.path)
                     data.extend(copy_map.get(child.path, b""))
                     node.tracked_descendants += child.tracked_descendants
                     node.descendants_with_entry += child.descendants_with_entry
                 # Write the fixed-size child nodes all together
                 node.children_offset = len(data)
                 data.extend(packed_children)