upstream/mercurial-mirror Commit - r49042:50dca3aa

1

# v2.py - Pure-Python implementation of the dirstate-v2 file format

1

# v2.py - Pure-Python implementation of the dirstate-v2 file format

2

#

2

#

3

# Copyright Mercurial Contributors

3

# Copyright Mercurial Contributors

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import

8

from __future__ import absolute_import

9

10

import struct

10

import struct

11

12

from ..thirdparty import attr

12

from ..thirdparty import attr

13

from .. import error, policy

13

from .. import error, policy

14

15

parsers = policy.importmod('parsers')

15

parsers = policy.importmod('parsers')

16

17

18

# Must match the constant of the same name in

18

# Must match the constant of the same name in

19

# `rust/hg-core/src/dirstate_tree/on_disk.rs`

19

# `rust/hg-core/src/dirstate_tree/on_disk.rs`

20

TREE_METADATA_SIZE = 44

20

TREE_METADATA_SIZE = 44

21

NODE_SIZE = 43

21

NODE_SIZE = 43

22

23

24

# Must match the `TreeMetadata` Rust struct in

24

# Must match the `TreeMetadata` Rust struct in

25

# `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.

25

# `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.

26

#

26

#

27

# * 4 bytes: start offset of root nodes

27

# * 4 bytes: start offset of root nodes

28

# * 4 bytes: number of root nodes

28

# * 4 bytes: number of root nodes

29

# * 4 bytes: total number of nodes in the tree that have an entry

29

# * 4 bytes: total number of nodes in the tree that have an entry

30

# * 4 bytes: total number of nodes in the tree that have a copy source

30

# * 4 bytes: total number of nodes in the tree that have a copy source

31

# * 4 bytes: number of bytes in the data file that are not used anymore

31

# * 4 bytes: number of bytes in the data file that are not used anymore

32

# * 4 bytes: unused

32

# * 4 bytes: unused

33

# * 20 bytes: SHA-1 hash of ignore patterns

33

# * 20 bytes: SHA-1 hash of ignore patterns

34

TREE_METADATA = struct.Struct('>LLLLL4s20s')

34

TREE_METADATA = struct.Struct('>LLLLL4s20s')

35

36

37

# Must match the `Node` Rust struct in

37

# Must match the `Node` Rust struct in

38

# `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.

38

# `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.

39

#

39

#

40

# * 4 bytes: start offset of full path

40

# * 4 bytes: start offset of full path

41

# * 2 bytes: length of the full path

41

# * 2 bytes: length of the full path

42

# * 2 bytes: length within the full path before its "base name"

42

# * 2 bytes: length within the full path before its "base name"

43

# * 4 bytes: start offset of the copy source if any, or zero for no copy source

43

# * 4 bytes: start offset of the copy source if any, or zero for no copy source

44

# * 2 bytes: length of the copy source if any, or unused

44

# * 2 bytes: length of the copy source if any, or unused

45

# * 4 bytes: start offset of child nodes

45

# * 4 bytes: start offset of child nodes

46

# * 4 bytes: number of child nodes

46

# * 4 bytes: number of child nodes

47

# * 4 bytes: number of descendant nodes that have an entry

47

# * 4 bytes: number of descendant nodes that have an entry

48

# * 4 bytes: number of descendant nodes that have a "tracked" state

48

# * 4 bytes: number of descendant nodes that have a "tracked" state

49

# * 1 byte: flags

49

# * 1 byte: flags

50

# * 4 bytes: expected size

50

# * 4 bytes: expected size

51

# * 4 bytes: mtime seconds

51

# * 4 bytes: mtime seconds

52

# * 4 bytes: mtime nanoseconds

52

# * 4 bytes: mtime nanoseconds

53

NODE = struct.Struct('>LHHLHLLLLBlll')

53

NODE = struct.Struct('>LHHLHLLLLBlll')

54

55

56

assert TREE_METADATA_SIZE == TREE_METADATA.size

56

assert TREE_METADATA_SIZE == TREE_METADATA.size

57

assert NODE_SIZE == NODE.size

57

assert NODE_SIZE == NODE.size

58

59

60

def parse_dirstate(map, copy_map, data, tree_metadata):

60

def parse_dirstate(map, copy_map, data, tree_metadata):

61

"""parse a full v2-dirstate from a binary data into dictionnaries:

61

"""parse a full v2-dirstate from a binary data into dictionnaries:

62

63

- map: a {path: entry} mapping that will be filled

63

- map: a {path: entry} mapping that will be filled

64

- copy_map: a {path: copy-source} mapping that will be filled

64

- copy_map: a {path: copy-source} mapping that will be filled

65

- data: a binary blob contains v2 nodes data

65

- data: a binary blob contains v2 nodes data

66

- tree_metadata:: a binary blob of the top level node (from the docket)

66

- tree_metadata:: a binary blob of the top level node (from the docket)

67

"""

67

"""

68

(

68

(

69

root_nodes_start,

69

root_nodes_start,

70

root_nodes_len,

70

root_nodes_len,

71

_nodes_with_entry_count,

71

_nodes_with_entry_count,

72

_nodes_with_copy_source_count,

72

_nodes_with_copy_source_count,

73

_unreachable_bytes,

73

_unreachable_bytes,

74

_unused,

74

_unused,

75

_ignore_patterns_hash,

75

_ignore_patterns_hash,

76

) = TREE_METADATA.unpack(tree_metadata)

76

) = TREE_METADATA.unpack(tree_metadata)

77

parse_nodes(map, copy_map, data, root_nodes_start, root_nodes_len)

77

parse_nodes(map, copy_map, data, root_nodes_start, root_nodes_len)

78

79

80

def parse_nodes(map, copy_map, data, start, len):

80

def parse_nodes(map, copy_map, data, start, len):

81

"""parse <len> nodes from <data> starting at offset <start>

81

"""parse <len> nodes from <data> starting at offset <start>

82

83

This is used by parse_dirstate to recursively fill `map` and `copy_map`.

83

This is used by parse_dirstate to recursively fill `map` and `copy_map`.

84

"""

84

"""

85

for i in range(len):

85

for i in range(len):

86

node_start = start + NODE_SIZE * i

86

node_start = start + NODE_SIZE * i

87

node_bytes = slice_with_len(data, node_start, NODE_SIZE)

87

node_bytes = slice_with_len(data, node_start, NODE_SIZE)

88

(

88

(

89

path_start,

89

path_start,

90

path_len,

90

path_len,

91

_basename_start,

91

_basename_start,

92

copy_source_start,

92

copy_source_start,

93

copy_source_len,

93

copy_source_len,

94

children_start,

94

children_start,

95

children_count,

95

children_count,

96

_descendants_with_entry_count,

96

_descendants_with_entry_count,

97

_tracked_descendants_count,

97

_tracked_descendants_count,

98

flags,

98

flags,

99

size,

99

size,

100

mtime_s,

100

mtime_s,

101

_mtime_ns,

101

_mtime_ns,

102

) = NODE.unpack(node_bytes)

102

) = NODE.unpack(node_bytes)

103

104

# Parse child nodes of this node recursively

104

# Parse child nodes of this node recursively

105

parse_nodes(map, copy_map, data, children_start, children_count)

105

parse_nodes(map, copy_map, data, children_start, children_count)

106

107

item = parsers.DirstateItem.from_v2_data(flags, size, mtime_s)

107

item = parsers.DirstateItem.from_v2_data(flags, size, mtime_s)

108

if not item.any_tracked:

108

if not item.any_tracked:

109

continue

109

continue

110

path = slice_with_len(data, path_start, path_len)

110

path = slice_with_len(data, path_start, path_len)

111

map[path] = item

111

map[path] = item

112

if copy_source_start:

112

if copy_source_start:

113

copy_map[path] = slice_with_len(

113

copy_map[path] = slice_with_len(

114

data, copy_source_start, copy_source_len

114

data, copy_source_start, copy_source_len

115

)

115

)

116

117

118

def slice_with_len(data, start, len):

118

def slice_with_len(data, start, len):

119

return data[start : start + len]

119

return data[start : start + len]

120

121

122

@attr.s

122

@attr.s

123

class Node(object):

123

class Node(object):

124

path = attr.ib()

124

path = attr.ib()

125

entry = attr.ib()

125

entry = attr.ib()

126

parent = attr.ib(default=None)

126

parent = attr.ib(default=None)

127

children_count = attr.ib(default=0)

127

children_count = attr.ib(default=0)

128

children_offset = attr.ib(default=0)

128

children_offset = attr.ib(default=0)

129

descendants_with_entry = attr.ib(default=0)

129

descendants_with_entry = attr.ib(default=0)

130

tracked_descendants = attr.ib(default=0)

130

tracked_descendants = attr.ib(default=0)

131

132

def pack(self, copy_map, paths_offset):

132

def pack(self, copy_map, paths_offset):

133

path = self.path

133

path = self.path

134

copy = copy_map.get(path)

134

copy = copy_map.get(path)

135

entry = self.entry

135

entry = self.entry

136

137

path_start = paths_offset

137

path_start = paths_offset

138

path_len = len(path)

138

path_len = len(path)

139

basename_start = path.rfind(b'/') + 1 # 0 if rfind returns -1

139

basename_start = path.rfind(b'/') + 1 # 0 if rfind returns -1

140

if copy is not None:

140

if copy is not None:

141

copy_source_start = paths_offset + len(path)

141

copy_source_start = paths_offset + len(path)

142

copy_source_len = len(copy)

142

copy_source_len = len(copy)

143

else:

143

else:

144

copy_source_start = 0

144

copy_source_start = 0

145

copy_source_len = 0

145

copy_source_len = 0

146

if entry is not None:

146

if entry is not None:

147

flags, size, mtime_s = entry.v2_data()

147

flags, size, mtime_s = entry.v2_data()

148

mtime_ns = 0

148

mtime_ns = 0

149

else:

149

else:

150

# There are no mtime-cached directories in the Python implementation

150

# There are no mtime-cached directories in the Python implementation

151

flags = 0

151

flags = 0

152

mode = 0

153

size = 0

152

size = 0

154

mtime_s = 0

153

mtime_s = 0

155

mtime_ns = 0

154

mtime_ns = 0

156

return NODE.pack(

155

return NODE.pack(

157

path_start,

156

path_start,

158

path_len,

157

path_len,

159

basename_start,

158

basename_start,

160

copy_source_start,

159

copy_source_start,

161

copy_source_len,

160

copy_source_len,

162

self.children_offset,

161

self.children_offset,

163

self.children_count,

162

self.children_count,

164

self.descendants_with_entry,

163

self.descendants_with_entry,

165

self.tracked_descendants,

164

self.tracked_descendants,

166

flags,

165

flags,

167

size,

166

size,

168

mtime_s,

167

mtime_s,

169

mtime_ns,

168

mtime_ns,

170

)

169

)

171

170

172

171

173

def pack_dirstate(map, copy_map, now):

172

def pack_dirstate(map, copy_map, now):

174

"""

173

"""

175

Pack `map` and `copy_map` into the dirstate v2 binary format and return

174

Pack `map` and `copy_map` into the dirstate v2 binary format and return

176

the bytearray.

175

the bytearray.

177

`now` is a timestamp of the current filesystem time used to detect race

176

`now` is a timestamp of the current filesystem time used to detect race

178

conditions in writing the dirstate to disk, see inline comment.

177

conditions in writing the dirstate to disk, see inline comment.

179

178

180

The on-disk format expects a tree-like structure where the leaves are

179

The on-disk format expects a tree-like structure where the leaves are

181

written first (and sorted per-directory), going up levels until the root

180

written first (and sorted per-directory), going up levels until the root

182

node and writing that one to the docket. See more details on the on-disk

181

node and writing that one to the docket. See more details on the on-disk

183

format in `mercurial/helptext/internals/dirstate-v2`.

182

format in `mercurial/helptext/internals/dirstate-v2`.

184

183

185

Since both `map` and `copy_map` are flat dicts we need to figure out the

184

Since both `map` and `copy_map` are flat dicts we need to figure out the

186

hierarchy. This algorithm does so without having to build the entire tree

185

hierarchy. This algorithm does so without having to build the entire tree

187

in-memory: it only keeps the minimum number of nodes around to satisfy the

186

in-memory: it only keeps the minimum number of nodes around to satisfy the

188

format.

187

format.

189

188

190

# Algorithm explanation

189

# Algorithm explanation

191

190

192

This explanation does not talk about the different counters for tracked

191

This explanation does not talk about the different counters for tracked

193

descendents and storing the copies, but that work is pretty simple once this

192

descendents and storing the copies, but that work is pretty simple once this

194

algorithm is in place.

193

algorithm is in place.

195

194

196

## Building a subtree

195

## Building a subtree

197

196

198

First, sort `map`: this makes it so the leaves of the tree are contiguous

197

First, sort `map`: this makes it so the leaves of the tree are contiguous

199

per directory (i.e. a/b/c and a/b/d will be next to each other in the list),

198

per directory (i.e. a/b/c and a/b/d will be next to each other in the list),

200

and enables us to use the ordering of folders to have a "cursor" of the

199

and enables us to use the ordering of folders to have a "cursor" of the

201

current folder we're in without ever going twice in the same branch of the

200

current folder we're in without ever going twice in the same branch of the

202

tree. The cursor is a node that remembers its parent and any information

201

tree. The cursor is a node that remembers its parent and any information

203

relevant to the format (see the `Node` class), building the relevant part

202

relevant to the format (see the `Node` class), building the relevant part

204

of the tree lazily.

203

of the tree lazily.

205

Then, for each file in `map`, move the cursor into the tree to the

204

Then, for each file in `map`, move the cursor into the tree to the

206

corresponding folder of the file: for example, if the very first file

205

corresponding folder of the file: for example, if the very first file

207

is "a/b/c", we start from `Node[""]`, create `Node["a"]` which points to

206

is "a/b/c", we start from `Node[""]`, create `Node["a"]` which points to

208

its parent `Node[""]`, then create `Node["a/b"]`, which points to its parent

207

its parent `Node[""]`, then create `Node["a/b"]`, which points to its parent

209

`Node["a"]`. These nodes are kept around in a stack.

208

`Node["a"]`. These nodes are kept around in a stack.

210

If the next file in `map` is in the same subtree ("a/b/d" or "a/b/e/f"), we

209

If the next file in `map` is in the same subtree ("a/b/d" or "a/b/e/f"), we

211

add it to the stack and keep looping with the same logic of creating the

210

add it to the stack and keep looping with the same logic of creating the

212

tree nodes as needed. If however the next file in `map` is *not* in the same

211

tree nodes as needed. If however the next file in `map` is *not* in the same

213

subtree ("a/other", if we're still in the "a/b" folder), then we know that

212

subtree ("a/other", if we're still in the "a/b" folder), then we know that

214

the subtree we're in is complete.

213

the subtree we're in is complete.

215

214

216

## Writing the subtree

215

## Writing the subtree

217

216

218

We have the entire subtree in the stack, so we start writing it to disk

217

We have the entire subtree in the stack, so we start writing it to disk

219

folder by folder. The way we write a folder is to pop the stack into a list

218

folder by folder. The way we write a folder is to pop the stack into a list

220

until the folder changes, revert this list of direct children (to satisfy

219

until the folder changes, revert this list of direct children (to satisfy

221

the format requirement that children be sorted). This process repeats until

220

the format requirement that children be sorted). This process repeats until

222

we hit the "other" subtree.

221

we hit the "other" subtree.

223

222

224

An example:

223

An example:

225

a

224

a

226

dir1/b

225

dir1/b

227

dir1/c

226

dir1/c

228

dir2/dir3/d

227

dir2/dir3/d

229

dir2/dir3/e

228

dir2/dir3/e

230

dir2/f

229

dir2/f

231

230

232

Would have us:

231

Would have us:

233

- add to the stack until "dir2/dir3/e"

232

- add to the stack until "dir2/dir3/e"

234

- realize that "dir2/f" is in a different subtree

233

- realize that "dir2/f" is in a different subtree

235

- pop "dir2/dir3/e", "dir2/dir3/d", reverse them so they're sorted and

234

- pop "dir2/dir3/e", "dir2/dir3/d", reverse them so they're sorted and

236

pack them since the next entry is "dir2/dir3"

235

pack them since the next entry is "dir2/dir3"

237

- go back up to "dir2"

236

- go back up to "dir2"

238

- add "dir2/f" to the stack

237

- add "dir2/f" to the stack

239

- realize we're done with the map

238

- realize we're done with the map

240

- pop "dir2/f", "dir2/dir3" from the stack, reverse and pack them

239

- pop "dir2/f", "dir2/dir3" from the stack, reverse and pack them

241

- go up to the root node, do the same to write "a", "dir1" and "dir2" in

240

- go up to the root node, do the same to write "a", "dir1" and "dir2" in

242

that order

241

that order

243

242

244

## Special case for the root node

243

## Special case for the root node

245

244

246

The root node is not serialized in the format, but its information is

245

The root node is not serialized in the format, but its information is

247

written to the docket. Again, see more details on the on-disk format in

246

written to the docket. Again, see more details on the on-disk format in

248

`mercurial/helptext/internals/dirstate-v2`.

247

`mercurial/helptext/internals/dirstate-v2`.

249

"""

248

"""

250

now = int(now)

249

now = int(now)

251

data = bytearray()

250

data = bytearray()

252

root_nodes_start = 0

251

root_nodes_start = 0

253

root_nodes_len = 0

252

root_nodes_len = 0

254

nodes_with_entry_count = 0

253

nodes_with_entry_count = 0

255

nodes_with_copy_source_count = 0

254

nodes_with_copy_source_count = 0

256

# Will always be 0 since this implementation always re-writes everything

255

# Will always be 0 since this implementation always re-writes everything

257

# to disk

256

# to disk

258

unreachable_bytes = 0

257

unreachable_bytes = 0

259

unused = b'\x00' * 4

258

unused = b'\x00' * 4

260

# This is an optimization that's only useful for the Rust implementation

259

# This is an optimization that's only useful for the Rust implementation

261

ignore_patterns_hash = b'\x00' * 20

260

ignore_patterns_hash = b'\x00' * 20

262

261

263

if len(map) == 0:

262

if len(map) == 0:

264

tree_metadata = TREE_METADATA.pack(

263

tree_metadata = TREE_METADATA.pack(

265

root_nodes_start,

264

root_nodes_start,

266

root_nodes_len,

265

root_nodes_len,

267

nodes_with_entry_count,

266

nodes_with_entry_count,

268

nodes_with_copy_source_count,

267

nodes_with_copy_source_count,

269

unreachable_bytes,

268

unreachable_bytes,

270

unused,

269

unused,

271

ignore_patterns_hash,

270

ignore_patterns_hash,

272

)

271

)

273

return data, tree_metadata

272

return data, tree_metadata

274

273

275

sorted_map = sorted(map.items(), key=lambda x: x[0])

274

sorted_map = sorted(map.items(), key=lambda x: x[0])

276

275

277

# Use a stack to not have to only remember the nodes we currently need

276

# Use a stack to not have to only remember the nodes we currently need

278

# instead of building the entire tree in memory

277

# instead of building the entire tree in memory

279

stack = []

278

stack = []

280

current_node = Node(b"", None)

279

current_node = Node(b"", None)

281

stack.append(current_node)

280

stack.append(current_node)

282

281

283

for index, (path, entry) in enumerate(sorted_map, 1):

282

for index, (path, entry) in enumerate(sorted_map, 1):

284

if entry.need_delay(now):

283

if entry.need_delay(now):

285

# The file was last modified "simultaneously" with the current

284

# The file was last modified "simultaneously" with the current

286

# write to dirstate (i.e. within the same second for file-

285

# write to dirstate (i.e. within the same second for file-

287

# systems with a granularity of 1 sec). This commonly happens

286

# systems with a granularity of 1 sec). This commonly happens

288

# for at least a couple of files on 'update'.

287

# for at least a couple of files on 'update'.

289

# The user could change the file without changing its size

288

# The user could change the file without changing its size

290

# within the same second. Invalidate the file's mtime in

289

# within the same second. Invalidate the file's mtime in

291

# dirstate, forcing future 'status' calls to compare the

290

# dirstate, forcing future 'status' calls to compare the

292

# contents of the file if the size is the same. This prevents

291

# contents of the file if the size is the same. This prevents

293

# mistakenly treating such files as clean.

292

# mistakenly treating such files as clean.

294

entry.set_possibly_dirty()

293

entry.set_possibly_dirty()

295

nodes_with_entry_count += 1

294

nodes_with_entry_count += 1

296

if path in copy_map:

295

if path in copy_map:

297

nodes_with_copy_source_count += 1

296

nodes_with_copy_source_count += 1

298

current_folder = get_folder(path)

297

current_folder = get_folder(path)

299

current_node = move_to_correct_node_in_tree(

298

current_node = move_to_correct_node_in_tree(

300

current_folder, current_node, stack

299

current_folder, current_node, stack

301

)

300

)

302

301

303

current_node.children_count += 1

302

current_node.children_count += 1

304

# Entries from `map` are never `None`

303

# Entries from `map` are never `None`

305

if entry.tracked:

304

if entry.tracked:

306

current_node.tracked_descendants += 1

305

current_node.tracked_descendants += 1

307

current_node.descendants_with_entry += 1

306

current_node.descendants_with_entry += 1

308

stack.append(Node(path, entry, current_node))

307

stack.append(Node(path, entry, current_node))

309

308

310

should_pack = True

309

should_pack = True

311

next_path = None

310

next_path = None

312

if index < len(sorted_map):

311

if index < len(sorted_map):

313

# Determine if the next entry is in the same sub-tree, if so don't

312

# Determine if the next entry is in the same sub-tree, if so don't

314

# pack yet

313

# pack yet

315

next_path = sorted_map[index][0]

314

next_path = sorted_map[index][0]

316

should_pack = not get_folder(next_path).startswith(current_folder)

315

should_pack = not get_folder(next_path).startswith(current_folder)

317

if should_pack:

316

if should_pack:

318

pack_directory_children(current_node, copy_map, data, stack)

317

pack_directory_children(current_node, copy_map, data, stack)

319

while stack and current_node.path != b"":

318

while stack and current_node.path != b"":

320

# Go up the tree and write until we reach the folder of the next

319

# Go up the tree and write until we reach the folder of the next

321

# entry (if any, otherwise the root)

320

# entry (if any, otherwise the root)

322

parent = current_node.parent

321

parent = current_node.parent

323

in_parent_folder_of_next_entry = next_path is not None and (

322

in_parent_folder_of_next_entry = next_path is not None and (

324

get_folder(next_path).startswith(get_folder(stack[-1].path))

323

get_folder(next_path).startswith(get_folder(stack[-1].path))

325

)

324

)

326

if parent is None or in_parent_folder_of_next_entry:

325

if parent is None or in_parent_folder_of_next_entry:

327

break

326

break

328

pack_directory_children(parent, copy_map, data, stack)

327

pack_directory_children(parent, copy_map, data, stack)

329

current_node = parent

328

current_node = parent

330

329

331

# Special case for the root node since we don't write it to disk, only its

330

# Special case for the root node since we don't write it to disk, only its

332

# children to the docket

331

# children to the docket

333

current_node = stack.pop()

332

current_node = stack.pop()

334

assert current_node.path == b"", current_node.path

333

assert current_node.path == b"", current_node.path

335

assert len(stack) == 0, len(stack)

334

assert len(stack) == 0, len(stack)

336

335

337

tree_metadata = TREE_METADATA.pack(

336

tree_metadata = TREE_METADATA.pack(

338

current_node.children_offset,

337

current_node.children_offset,

339

current_node.children_count,

338

current_node.children_count,

340

nodes_with_entry_count,

339

nodes_with_entry_count,

341

nodes_with_copy_source_count,

340

nodes_with_copy_source_count,

342

unreachable_bytes,

341

unreachable_bytes,

343

unused,

342

unused,

344

ignore_patterns_hash,

343

ignore_patterns_hash,

345

)

344

)

346

345

347

return data, tree_metadata

346

return data, tree_metadata

348

347

349

348

350

def get_folder(path):

349

def get_folder(path):

351

"""

350

"""

352

Return the folder of the path that's given, an empty string for root paths.

351

Return the folder of the path that's given, an empty string for root paths.

353

"""

352

"""

354

return path.rsplit(b'/', 1)[0] if b'/' in path else b''

353

return path.rsplit(b'/', 1)[0] if b'/' in path else b''

355

354

356

355

357

def move_to_correct_node_in_tree(target_folder, current_node, stack):

356

def move_to_correct_node_in_tree(target_folder, current_node, stack):

358

"""

357

"""

359

Move inside the dirstate node tree to the node corresponding to

358

Move inside the dirstate node tree to the node corresponding to

360

`target_folder`, creating the missing nodes along the way if needed.

359

`target_folder`, creating the missing nodes along the way if needed.

361

"""

360

"""

362

while target_folder != current_node.path:

361

while target_folder != current_node.path:

363

if target_folder.startswith(current_node.path):

362

if target_folder.startswith(current_node.path):

364

# We need to go down a folder

363

# We need to go down a folder

365

prefix = target_folder[len(current_node.path) :].lstrip(b'/')

364

prefix = target_folder[len(current_node.path) :].lstrip(b'/')

366

subfolder_name = prefix.split(b'/', 1)[0]

365

subfolder_name = prefix.split(b'/', 1)[0]

367

if current_node.path:

366

if current_node.path:

368

subfolder_path = current_node.path + b'/' + subfolder_name

367

subfolder_path = current_node.path + b'/' + subfolder_name

369

else:

368

else:

370

subfolder_path = subfolder_name

369

subfolder_path = subfolder_name

371

next_node = stack[-1]

370

next_node = stack[-1]

372

if next_node.path == target_folder:

371

if next_node.path == target_folder:

373

# This folder is now a file and only contains removed entries

372

# This folder is now a file and only contains removed entries

374

# merge with the last node

373

# merge with the last node

375

current_node = next_node

374

current_node = next_node

376

else:

375

else:

377

current_node.children_count += 1

376

current_node.children_count += 1

378

current_node = Node(subfolder_path, None, current_node)

377

current_node = Node(subfolder_path, None, current_node)

379

stack.append(current_node)

378

stack.append(current_node)

380

else:

379

else:

381

# We need to go up a folder

380

# We need to go up a folder

382

current_node = current_node.parent

381

current_node = current_node.parent

383

return current_node

382

return current_node

384

383

385

384

386

def pack_directory_children(node, copy_map, data, stack):

385

def pack_directory_children(node, copy_map, data, stack):

387

"""

386

"""

388

Write the binary representation of the direct sorted children of `node` to

387

Write the binary representation of the direct sorted children of `node` to

389

`data`

388

`data`

390

"""

389

"""

391

direct_children = []

390

direct_children = []

392

391

393

while stack[-1].path != b"" and get_folder(stack[-1].path) == node.path:

392

while stack[-1].path != b"" and get_folder(stack[-1].path) == node.path:

394

direct_children.append(stack.pop())

393

direct_children.append(stack.pop())

395

if not direct_children:

394

if not direct_children:

396

raise error.ProgrammingError(b"no direct children for %r" % node.path)

395

raise error.ProgrammingError(b"no direct children for %r" % node.path)

397

396

398

# Reverse the stack to get the correct sorted order

397

# Reverse the stack to get the correct sorted order

399

direct_children.reverse()

398

direct_children.reverse()

400

packed_children = bytearray()

399

packed_children = bytearray()

401

# Write the paths to `data`. Pack child nodes but don't write them yet

400

# Write the paths to `data`. Pack child nodes but don't write them yet

402

for child in direct_children:

401

for child in direct_children:

403

packed = child.pack(copy_map=copy_map, paths_offset=len(data))

402

packed = child.pack(copy_map=copy_map, paths_offset=len(data))

404

packed_children.extend(packed)

403

packed_children.extend(packed)

405

data.extend(child.path)

404

data.extend(child.path)

406

data.extend(copy_map.get(child.path, b""))

405

data.extend(copy_map.get(child.path, b""))

407

node.tracked_descendants += child.tracked_descendants

406

node.tracked_descendants += child.tracked_descendants

408

node.descendants_with_entry += child.descendants_with_entry

407

node.descendants_with_entry += child.descendants_with_entry

409

# Write the fixed-size child nodes all together

408

# Write the fixed-size child nodes all together

410

node.children_offset = len(data)

409

node.children_offset = len(data)

411

data.extend(packed_children)

410

data.extend(packed_children)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # v2.py - Pure-Python implementation of the dirstate-v2 file format
             #
             # Copyright Mercurial Contributors
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import struct
             from ..thirdparty import attr
             from .. import error, policy
             parsers = policy.importmod('parsers')
             # Must match the constant of the same name in
             # `rust/hg-core/src/dirstate_tree/on_disk.rs`
             TREE_METADATA_SIZE = 44
             NODE_SIZE = 43
             # Must match the `TreeMetadata` Rust struct in
             # `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.
             #
             # * 4 bytes: start offset of root nodes
             # * 4 bytes: number of root nodes
             # * 4 bytes: total number of nodes in the tree that have an entry
             # * 4 bytes: total number of nodes in the tree that have a copy source
             # * 4 bytes: number of bytes in the data file that are not used anymore
             # * 4 bytes: unused
             # * 20 bytes: SHA-1 hash of ignore patterns
             TREE_METADATA = struct.Struct('>LLLLL4s20s')
             # Must match the `Node` Rust struct in
             # `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.
             #
             # * 4 bytes: start offset of full path
             # * 2 bytes: length of the full path
             # * 2 bytes: length within the full path before its "base name"
             # * 4 bytes: start offset of the copy source if any, or zero for no copy source
             # * 2 bytes: length of the copy source if any, or unused
             # * 4 bytes: start offset of child nodes
             # * 4 bytes: number of child nodes
             # * 4 bytes: number of descendant nodes that have an entry
             # * 4 bytes: number of descendant nodes that have a "tracked" state
             # * 1 byte: flags
             # * 4 bytes: expected size
             # * 4 bytes: mtime seconds
             # * 4 bytes: mtime nanoseconds
             NODE = struct.Struct('>LHHLHLLLLBlll')
             assert TREE_METADATA_SIZE == TREE_METADATA.size
             assert NODE_SIZE == NODE.size
             def parse_dirstate(map, copy_map, data, tree_metadata):
                 """parse a full v2-dirstate from a binary data into dictionnaries:
                 - map: a {path: entry} mapping that will be filled
                 - copy_map: a {path: copy-source} mapping that will be filled
                 - data: a binary blob contains v2 nodes data
                 - tree_metadata:: a binary blob of the top level node (from the docket)
                 """
                 (
                     root_nodes_start,
                     root_nodes_len,
                     _nodes_with_entry_count,
                     _nodes_with_copy_source_count,
                     _unreachable_bytes,
                     _unused,
                     _ignore_patterns_hash,
                 ) = TREE_METADATA.unpack(tree_metadata)
                 parse_nodes(map, copy_map, data, root_nodes_start, root_nodes_len)
             def parse_nodes(map, copy_map, data, start, len):
                 """parse <len> nodes from <data> starting at offset <start>
                 This is used by parse_dirstate to recursively fill `map` and `copy_map`.
                 """
                 for i in range(len):
                     node_start = start + NODE_SIZE * i
                     node_bytes = slice_with_len(data, node_start, NODE_SIZE)
                     (
                         path_start,
                         path_len,
                         _basename_start,
                         copy_source_start,
                         copy_source_len,
                         children_start,
                         children_count,
                         _descendants_with_entry_count,
                         _tracked_descendants_count,
                         flags,
                         size,
                         mtime_s,
                         _mtime_ns,
                     ) = NODE.unpack(node_bytes)
                     # Parse child nodes of this node recursively
                     parse_nodes(map, copy_map, data, children_start, children_count)
                     item = parsers.DirstateItem.from_v2_data(flags, size, mtime_s)
                     if not item.any_tracked:
                         continue
                     path = slice_with_len(data, path_start, path_len)
                     map[path] = item
                     if copy_source_start:
                         copy_map[path] = slice_with_len(
                             data, copy_source_start, copy_source_len
                         )
             def slice_with_len(data, start, len):
                 return data[start : start + len]
             @attr.s
             class Node(object):
                 path = attr.ib()
                 entry = attr.ib()
                 parent = attr.ib(default=None)
                 children_count = attr.ib(default=0)
                 children_offset = attr.ib(default=0)
                 descendants_with_entry = attr.ib(default=0)
                 tracked_descendants = attr.ib(default=0)
                 def pack(self, copy_map, paths_offset):
                     path = self.path
                     copy = copy_map.get(path)
                     entry = self.entry
                     path_start = paths_offset
                     path_len = len(path)
                     basename_start = path.rfind(b'/') + 1  # 0 if rfind returns -1
                     if copy is not None:
                         copy_source_start = paths_offset + len(path)
                         copy_source_len = len(copy)
                     else:
                         copy_source_start = 0
                         copy_source_len = 0
                     if entry is not None:
                         flags, size, mtime_s = entry.v2_data()
                         mtime_ns = 0
                     else:
                         # There are no mtime-cached directories in the Python implementation
                         flags = 0
-                        mode = 0
                         size = 0
                         mtime_s = 0
                         mtime_ns = 0
                     return NODE.pack(
                         path_start,
                         path_len,
                         basename_start,
                         copy_source_start,
                         copy_source_len,
                         self.children_offset,
                         self.children_count,
                         self.descendants_with_entry,
                         self.tracked_descendants,
                         flags,
                         size,
                         mtime_s,
                         mtime_ns,
                     )
             def pack_dirstate(map, copy_map, now):
                 """
                 Pack `map` and `copy_map` into the dirstate v2 binary format and return
                 the bytearray.
                 `now` is a timestamp of the current filesystem time used to detect race
                 conditions in writing the dirstate to disk, see inline comment.
                 The on-disk format expects a tree-like structure where the leaves are
                 written first (and sorted per-directory), going up levels until the root
                 node and writing that one to the docket. See more details on the on-disk
                 format in `mercurial/helptext/internals/dirstate-v2`.
                 Since both `map` and `copy_map` are flat dicts we need to figure out the
                 hierarchy. This algorithm does so without having to build the entire tree
                 in-memory: it only keeps the minimum number of nodes around to satisfy the
                 format.
                 # Algorithm explanation
                 This explanation does not talk about the different counters for tracked
                 descendents and storing the copies, but that work is pretty simple once this
                 algorithm is in place.
                 ## Building a subtree
                 First, sort `map`: this makes it so the leaves of the tree are contiguous
                 per directory (i.e. a/b/c and a/b/d will be next to each other in the list),
                 and enables us to use the ordering of folders to have a "cursor" of the
                 current folder we're in without ever going twice in the same branch of the
                 tree. The cursor is a node that remembers its parent and any information
                 relevant to the format (see the `Node` class), building the relevant part
                 of the tree lazily.
                 Then, for each file in `map`, move the cursor into the tree to the
                 corresponding folder of the file: for example, if the very first file
                 is "a/b/c", we start from `Node[""]`, create `Node["a"]` which points to
                 its parent `Node[""]`, then create `Node["a/b"]`, which points to its parent
                 `Node["a"]`. These nodes are kept around in a stack.
                 If the next file in `map` is in the same subtree ("a/b/d" or "a/b/e/f"), we
                 add it to the stack and keep looping with the same logic of creating the
                 tree nodes as needed. If however the next file in `map` is *not* in the same
                 subtree ("a/other", if we're still in the "a/b" folder), then we know that
                 the subtree we're in is complete.
                 ## Writing the subtree
                 We have the entire subtree in the stack, so we start writing it to disk
                 folder by folder. The way we write a folder is to pop the stack into a list
                 until the folder changes, revert this list of direct children (to satisfy
                 the format requirement that children be sorted). This process repeats until
                 we hit the "other" subtree.
                 An example:
                     a
                     dir1/b
                     dir1/c
                     dir2/dir3/d
                     dir2/dir3/e
                     dir2/f
                 Would have us:
                     - add to the stack until "dir2/dir3/e"
                     - realize that "dir2/f" is in a different subtree
                     - pop "dir2/dir3/e", "dir2/dir3/d", reverse them so they're sorted and
                       pack them since the next entry is "dir2/dir3"
                     - go back up to "dir2"
                     - add "dir2/f" to the stack
                     - realize we're done with the map
                     - pop "dir2/f", "dir2/dir3" from the stack, reverse and pack them
                     - go up to the root node, do the same to write "a", "dir1" and "dir2" in
                       that order
                 ## Special case for the root node
                 The root node is not serialized in the format, but its information is
                 written to the docket. Again, see more details on the on-disk format in
                 `mercurial/helptext/internals/dirstate-v2`.
                 """
                 now = int(now)
                 data = bytearray()
                 root_nodes_start = 0
                 root_nodes_len = 0
                 nodes_with_entry_count = 0
                 nodes_with_copy_source_count = 0
                 # Will always be 0 since this implementation always re-writes everything
                 # to disk
                 unreachable_bytes = 0
                 unused = b'\x00' * 4
                 # This is an optimization that's only useful for the Rust implementation
                 ignore_patterns_hash = b'\x00' * 20
                 if len(map) == 0:
                     tree_metadata = TREE_METADATA.pack(
                         root_nodes_start,
                         root_nodes_len,
                         nodes_with_entry_count,
                         nodes_with_copy_source_count,
                         unreachable_bytes,
                         unused,
                         ignore_patterns_hash,
                     )
                     return data, tree_metadata
                 sorted_map = sorted(map.items(), key=lambda x: x[0])
                 # Use a stack to not have to only remember the nodes we currently need
                 # instead of building the entire tree in memory
                 stack = []
                 current_node = Node(b"", None)
                 stack.append(current_node)
                 for index, (path, entry) in enumerate(sorted_map, 1):
                     if entry.need_delay(now):
                         # The file was last modified "simultaneously" with the current
                         # write to dirstate (i.e. within the same second for file-
                         # systems with a granularity of 1 sec). This commonly happens
                         # for at least a couple of files on 'update'.
                         # The user could change the file without changing its size
                         # within the same second. Invalidate the file's mtime in
                         # dirstate, forcing future 'status' calls to compare the
                         # contents of the file if the size is the same. This prevents
                         # mistakenly treating such files as clean.
                         entry.set_possibly_dirty()
                     nodes_with_entry_count += 1
                     if path in copy_map:
                         nodes_with_copy_source_count += 1
                     current_folder = get_folder(path)
                     current_node = move_to_correct_node_in_tree(
                         current_folder, current_node, stack
                     )
                     current_node.children_count += 1
                     # Entries from `map` are never `None`
                     if entry.tracked:
                         current_node.tracked_descendants += 1
                     current_node.descendants_with_entry += 1
                     stack.append(Node(path, entry, current_node))
                     should_pack = True
                     next_path = None
                     if index < len(sorted_map):
                         # Determine if the next entry is in the same sub-tree, if so don't
                         # pack yet
                         next_path = sorted_map[index][0]
                         should_pack = not get_folder(next_path).startswith(current_folder)
                     if should_pack:
                         pack_directory_children(current_node, copy_map, data, stack)
                         while stack and current_node.path != b"":
                             # Go up the tree and write until we reach the folder of the next
                             # entry (if any, otherwise the root)
                             parent = current_node.parent
                             in_parent_folder_of_next_entry = next_path is not None and (
                                 get_folder(next_path).startswith(get_folder(stack[-1].path))
                             )
                             if parent is None or in_parent_folder_of_next_entry:
                                 break
                             pack_directory_children(parent, copy_map, data, stack)
                             current_node = parent
                 # Special case for the root node since we don't write it to disk, only its
                 # children to the docket
                 current_node = stack.pop()
                 assert current_node.path == b"", current_node.path
                 assert len(stack) == 0, len(stack)
                 tree_metadata = TREE_METADATA.pack(
                     current_node.children_offset,
                     current_node.children_count,
                     nodes_with_entry_count,
                     nodes_with_copy_source_count,
                     unreachable_bytes,
                     unused,
                     ignore_patterns_hash,
                 )
                 return data, tree_metadata
             def get_folder(path):
                 """
                 Return the folder of the path that's given, an empty string for root paths.
                 """
                 return path.rsplit(b'/', 1)[0] if b'/' in path else b''
             def move_to_correct_node_in_tree(target_folder, current_node, stack):
                 """
                 Move inside the dirstate node tree to the node corresponding to
                 `target_folder`, creating the missing nodes along the way if needed.
                 """
                 while target_folder != current_node.path:
                     if target_folder.startswith(current_node.path):
                         # We need to go down a folder
                         prefix = target_folder[len(current_node.path) :].lstrip(b'/')
                         subfolder_name = prefix.split(b'/', 1)[0]
                         if current_node.path:
                             subfolder_path = current_node.path + b'/' + subfolder_name
                         else:
                             subfolder_path = subfolder_name
                         next_node = stack[-1]
                         if next_node.path == target_folder:
                             # This folder is now a file and only contains removed entries
                             # merge with the last node
                             current_node = next_node
                         else:
                             current_node.children_count += 1
                             current_node = Node(subfolder_path, None, current_node)
                             stack.append(current_node)
                     else:
                         # We need to go up a folder
                         current_node = current_node.parent
                 return current_node
             def pack_directory_children(node, copy_map, data, stack):
                 """
                 Write the binary representation of the direct sorted children of `node` to
                 `data`
                 """
                 direct_children = []
                 while stack[-1].path != b"" and get_folder(stack[-1].path) == node.path:
                     direct_children.append(stack.pop())
                 if not direct_children:
                     raise error.ProgrammingError(b"no direct children for %r" % node.path)
                 # Reverse the stack to get the correct sorted order
                 direct_children.reverse()
                 packed_children = bytearray()
                 # Write the paths to `data`. Pack child nodes but don't write them yet
                 for child in direct_children:
                     packed = child.pack(copy_map=copy_map, paths_offset=len(data))
                     packed_children.extend(packed)
                     data.extend(child.path)
                     data.extend(copy_map.get(child.path, b""))
                     node.tracked_descendants += child.tracked_descendants
                     node.descendants_with_entry += child.descendants_with_entry
                 # Write the fixed-size child nodes all together
                 node.children_offset = len(data)
                 data.extend(packed_children)