##// END OF EJS Templates
dirstate: Remove unused variable...
Simon Sapin -
r49042:50dca3aa default
parent child Browse files
Show More
@@ -1,411 +1,410 b''
1 1 # v2.py - Pure-Python implementation of the dirstate-v2 file format
2 2 #
3 3 # Copyright Mercurial Contributors
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11
12 12 from ..thirdparty import attr
13 13 from .. import error, policy
14 14
15 15 parsers = policy.importmod('parsers')
16 16
17 17
18 18 # Must match the constant of the same name in
19 19 # `rust/hg-core/src/dirstate_tree/on_disk.rs`
20 20 TREE_METADATA_SIZE = 44
21 21 NODE_SIZE = 43
22 22
23 23
24 24 # Must match the `TreeMetadata` Rust struct in
25 25 # `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.
26 26 #
27 27 # * 4 bytes: start offset of root nodes
28 28 # * 4 bytes: number of root nodes
29 29 # * 4 bytes: total number of nodes in the tree that have an entry
30 30 # * 4 bytes: total number of nodes in the tree that have a copy source
31 31 # * 4 bytes: number of bytes in the data file that are not used anymore
32 32 # * 4 bytes: unused
33 33 # * 20 bytes: SHA-1 hash of ignore patterns
34 34 TREE_METADATA = struct.Struct('>LLLLL4s20s')
35 35
36 36
37 37 # Must match the `Node` Rust struct in
38 38 # `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.
39 39 #
40 40 # * 4 bytes: start offset of full path
41 41 # * 2 bytes: length of the full path
42 42 # * 2 bytes: length within the full path before its "base name"
43 43 # * 4 bytes: start offset of the copy source if any, or zero for no copy source
44 44 # * 2 bytes: length of the copy source if any, or unused
45 45 # * 4 bytes: start offset of child nodes
46 46 # * 4 bytes: number of child nodes
47 47 # * 4 bytes: number of descendant nodes that have an entry
48 48 # * 4 bytes: number of descendant nodes that have a "tracked" state
49 49 # * 1 byte: flags
50 50 # * 4 bytes: expected size
51 51 # * 4 bytes: mtime seconds
52 52 # * 4 bytes: mtime nanoseconds
53 53 NODE = struct.Struct('>LHHLHLLLLBlll')
54 54
55 55
56 56 assert TREE_METADATA_SIZE == TREE_METADATA.size
57 57 assert NODE_SIZE == NODE.size
58 58
59 59
60 60 def parse_dirstate(map, copy_map, data, tree_metadata):
61 61 """parse a full v2-dirstate from a binary data into dictionnaries:
62 62
63 63 - map: a {path: entry} mapping that will be filled
64 64 - copy_map: a {path: copy-source} mapping that will be filled
65 65 - data: a binary blob contains v2 nodes data
66 66 - tree_metadata:: a binary blob of the top level node (from the docket)
67 67 """
68 68 (
69 69 root_nodes_start,
70 70 root_nodes_len,
71 71 _nodes_with_entry_count,
72 72 _nodes_with_copy_source_count,
73 73 _unreachable_bytes,
74 74 _unused,
75 75 _ignore_patterns_hash,
76 76 ) = TREE_METADATA.unpack(tree_metadata)
77 77 parse_nodes(map, copy_map, data, root_nodes_start, root_nodes_len)
78 78
79 79
80 80 def parse_nodes(map, copy_map, data, start, len):
81 81 """parse <len> nodes from <data> starting at offset <start>
82 82
83 83 This is used by parse_dirstate to recursively fill `map` and `copy_map`.
84 84 """
85 85 for i in range(len):
86 86 node_start = start + NODE_SIZE * i
87 87 node_bytes = slice_with_len(data, node_start, NODE_SIZE)
88 88 (
89 89 path_start,
90 90 path_len,
91 91 _basename_start,
92 92 copy_source_start,
93 93 copy_source_len,
94 94 children_start,
95 95 children_count,
96 96 _descendants_with_entry_count,
97 97 _tracked_descendants_count,
98 98 flags,
99 99 size,
100 100 mtime_s,
101 101 _mtime_ns,
102 102 ) = NODE.unpack(node_bytes)
103 103
104 104 # Parse child nodes of this node recursively
105 105 parse_nodes(map, copy_map, data, children_start, children_count)
106 106
107 107 item = parsers.DirstateItem.from_v2_data(flags, size, mtime_s)
108 108 if not item.any_tracked:
109 109 continue
110 110 path = slice_with_len(data, path_start, path_len)
111 111 map[path] = item
112 112 if copy_source_start:
113 113 copy_map[path] = slice_with_len(
114 114 data, copy_source_start, copy_source_len
115 115 )
116 116
117 117
118 118 def slice_with_len(data, start, len):
119 119 return data[start : start + len]
120 120
121 121
122 122 @attr.s
123 123 class Node(object):
124 124 path = attr.ib()
125 125 entry = attr.ib()
126 126 parent = attr.ib(default=None)
127 127 children_count = attr.ib(default=0)
128 128 children_offset = attr.ib(default=0)
129 129 descendants_with_entry = attr.ib(default=0)
130 130 tracked_descendants = attr.ib(default=0)
131 131
132 132 def pack(self, copy_map, paths_offset):
133 133 path = self.path
134 134 copy = copy_map.get(path)
135 135 entry = self.entry
136 136
137 137 path_start = paths_offset
138 138 path_len = len(path)
139 139 basename_start = path.rfind(b'/') + 1 # 0 if rfind returns -1
140 140 if copy is not None:
141 141 copy_source_start = paths_offset + len(path)
142 142 copy_source_len = len(copy)
143 143 else:
144 144 copy_source_start = 0
145 145 copy_source_len = 0
146 146 if entry is not None:
147 147 flags, size, mtime_s = entry.v2_data()
148 148 mtime_ns = 0
149 149 else:
150 150 # There are no mtime-cached directories in the Python implementation
151 151 flags = 0
152 mode = 0
153 152 size = 0
154 153 mtime_s = 0
155 154 mtime_ns = 0
156 155 return NODE.pack(
157 156 path_start,
158 157 path_len,
159 158 basename_start,
160 159 copy_source_start,
161 160 copy_source_len,
162 161 self.children_offset,
163 162 self.children_count,
164 163 self.descendants_with_entry,
165 164 self.tracked_descendants,
166 165 flags,
167 166 size,
168 167 mtime_s,
169 168 mtime_ns,
170 169 )
171 170
172 171
173 172 def pack_dirstate(map, copy_map, now):
174 173 """
175 174 Pack `map` and `copy_map` into the dirstate v2 binary format and return
176 175 the bytearray.
177 176 `now` is a timestamp of the current filesystem time used to detect race
178 177 conditions in writing the dirstate to disk, see inline comment.
179 178
180 179 The on-disk format expects a tree-like structure where the leaves are
181 180 written first (and sorted per-directory), going up levels until the root
182 181 node and writing that one to the docket. See more details on the on-disk
183 182 format in `mercurial/helptext/internals/dirstate-v2`.
184 183
185 184 Since both `map` and `copy_map` are flat dicts we need to figure out the
186 185 hierarchy. This algorithm does so without having to build the entire tree
187 186 in-memory: it only keeps the minimum number of nodes around to satisfy the
188 187 format.
189 188
190 189 # Algorithm explanation
191 190
192 191 This explanation does not talk about the different counters for tracked
193 192 descendents and storing the copies, but that work is pretty simple once this
194 193 algorithm is in place.
195 194
196 195 ## Building a subtree
197 196
198 197 First, sort `map`: this makes it so the leaves of the tree are contiguous
199 198 per directory (i.e. a/b/c and a/b/d will be next to each other in the list),
200 199 and enables us to use the ordering of folders to have a "cursor" of the
201 200 current folder we're in without ever going twice in the same branch of the
202 201 tree. The cursor is a node that remembers its parent and any information
203 202 relevant to the format (see the `Node` class), building the relevant part
204 203 of the tree lazily.
205 204 Then, for each file in `map`, move the cursor into the tree to the
206 205 corresponding folder of the file: for example, if the very first file
207 206 is "a/b/c", we start from `Node[""]`, create `Node["a"]` which points to
208 207 its parent `Node[""]`, then create `Node["a/b"]`, which points to its parent
209 208 `Node["a"]`. These nodes are kept around in a stack.
210 209 If the next file in `map` is in the same subtree ("a/b/d" or "a/b/e/f"), we
211 210 add it to the stack and keep looping with the same logic of creating the
212 211 tree nodes as needed. If however the next file in `map` is *not* in the same
213 212 subtree ("a/other", if we're still in the "a/b" folder), then we know that
214 213 the subtree we're in is complete.
215 214
216 215 ## Writing the subtree
217 216
218 217 We have the entire subtree in the stack, so we start writing it to disk
219 218 folder by folder. The way we write a folder is to pop the stack into a list
220 219 until the folder changes, revert this list of direct children (to satisfy
221 220 the format requirement that children be sorted). This process repeats until
222 221 we hit the "other" subtree.
223 222
224 223 An example:
225 224 a
226 225 dir1/b
227 226 dir1/c
228 227 dir2/dir3/d
229 228 dir2/dir3/e
230 229 dir2/f
231 230
232 231 Would have us:
233 232 - add to the stack until "dir2/dir3/e"
234 233 - realize that "dir2/f" is in a different subtree
235 234 - pop "dir2/dir3/e", "dir2/dir3/d", reverse them so they're sorted and
236 235 pack them since the next entry is "dir2/dir3"
237 236 - go back up to "dir2"
238 237 - add "dir2/f" to the stack
239 238 - realize we're done with the map
240 239 - pop "dir2/f", "dir2/dir3" from the stack, reverse and pack them
241 240 - go up to the root node, do the same to write "a", "dir1" and "dir2" in
242 241 that order
243 242
244 243 ## Special case for the root node
245 244
246 245 The root node is not serialized in the format, but its information is
247 246 written to the docket. Again, see more details on the on-disk format in
248 247 `mercurial/helptext/internals/dirstate-v2`.
249 248 """
250 249 now = int(now)
251 250 data = bytearray()
252 251 root_nodes_start = 0
253 252 root_nodes_len = 0
254 253 nodes_with_entry_count = 0
255 254 nodes_with_copy_source_count = 0
256 255 # Will always be 0 since this implementation always re-writes everything
257 256 # to disk
258 257 unreachable_bytes = 0
259 258 unused = b'\x00' * 4
260 259 # This is an optimization that's only useful for the Rust implementation
261 260 ignore_patterns_hash = b'\x00' * 20
262 261
263 262 if len(map) == 0:
264 263 tree_metadata = TREE_METADATA.pack(
265 264 root_nodes_start,
266 265 root_nodes_len,
267 266 nodes_with_entry_count,
268 267 nodes_with_copy_source_count,
269 268 unreachable_bytes,
270 269 unused,
271 270 ignore_patterns_hash,
272 271 )
273 272 return data, tree_metadata
274 273
275 274 sorted_map = sorted(map.items(), key=lambda x: x[0])
276 275
277 276 # Use a stack to not have to only remember the nodes we currently need
278 277 # instead of building the entire tree in memory
279 278 stack = []
280 279 current_node = Node(b"", None)
281 280 stack.append(current_node)
282 281
283 282 for index, (path, entry) in enumerate(sorted_map, 1):
284 283 if entry.need_delay(now):
285 284 # The file was last modified "simultaneously" with the current
286 285 # write to dirstate (i.e. within the same second for file-
287 286 # systems with a granularity of 1 sec). This commonly happens
288 287 # for at least a couple of files on 'update'.
289 288 # The user could change the file without changing its size
290 289 # within the same second. Invalidate the file's mtime in
291 290 # dirstate, forcing future 'status' calls to compare the
292 291 # contents of the file if the size is the same. This prevents
293 292 # mistakenly treating such files as clean.
294 293 entry.set_possibly_dirty()
295 294 nodes_with_entry_count += 1
296 295 if path in copy_map:
297 296 nodes_with_copy_source_count += 1
298 297 current_folder = get_folder(path)
299 298 current_node = move_to_correct_node_in_tree(
300 299 current_folder, current_node, stack
301 300 )
302 301
303 302 current_node.children_count += 1
304 303 # Entries from `map` are never `None`
305 304 if entry.tracked:
306 305 current_node.tracked_descendants += 1
307 306 current_node.descendants_with_entry += 1
308 307 stack.append(Node(path, entry, current_node))
309 308
310 309 should_pack = True
311 310 next_path = None
312 311 if index < len(sorted_map):
313 312 # Determine if the next entry is in the same sub-tree, if so don't
314 313 # pack yet
315 314 next_path = sorted_map[index][0]
316 315 should_pack = not get_folder(next_path).startswith(current_folder)
317 316 if should_pack:
318 317 pack_directory_children(current_node, copy_map, data, stack)
319 318 while stack and current_node.path != b"":
320 319 # Go up the tree and write until we reach the folder of the next
321 320 # entry (if any, otherwise the root)
322 321 parent = current_node.parent
323 322 in_parent_folder_of_next_entry = next_path is not None and (
324 323 get_folder(next_path).startswith(get_folder(stack[-1].path))
325 324 )
326 325 if parent is None or in_parent_folder_of_next_entry:
327 326 break
328 327 pack_directory_children(parent, copy_map, data, stack)
329 328 current_node = parent
330 329
331 330 # Special case for the root node since we don't write it to disk, only its
332 331 # children to the docket
333 332 current_node = stack.pop()
334 333 assert current_node.path == b"", current_node.path
335 334 assert len(stack) == 0, len(stack)
336 335
337 336 tree_metadata = TREE_METADATA.pack(
338 337 current_node.children_offset,
339 338 current_node.children_count,
340 339 nodes_with_entry_count,
341 340 nodes_with_copy_source_count,
342 341 unreachable_bytes,
343 342 unused,
344 343 ignore_patterns_hash,
345 344 )
346 345
347 346 return data, tree_metadata
348 347
349 348
350 349 def get_folder(path):
351 350 """
352 351 Return the folder of the path that's given, an empty string for root paths.
353 352 """
354 353 return path.rsplit(b'/', 1)[0] if b'/' in path else b''
355 354
356 355
357 356 def move_to_correct_node_in_tree(target_folder, current_node, stack):
358 357 """
359 358 Move inside the dirstate node tree to the node corresponding to
360 359 `target_folder`, creating the missing nodes along the way if needed.
361 360 """
362 361 while target_folder != current_node.path:
363 362 if target_folder.startswith(current_node.path):
364 363 # We need to go down a folder
365 364 prefix = target_folder[len(current_node.path) :].lstrip(b'/')
366 365 subfolder_name = prefix.split(b'/', 1)[0]
367 366 if current_node.path:
368 367 subfolder_path = current_node.path + b'/' + subfolder_name
369 368 else:
370 369 subfolder_path = subfolder_name
371 370 next_node = stack[-1]
372 371 if next_node.path == target_folder:
373 372 # This folder is now a file and only contains removed entries
374 373 # merge with the last node
375 374 current_node = next_node
376 375 else:
377 376 current_node.children_count += 1
378 377 current_node = Node(subfolder_path, None, current_node)
379 378 stack.append(current_node)
380 379 else:
381 380 # We need to go up a folder
382 381 current_node = current_node.parent
383 382 return current_node
384 383
385 384
386 385 def pack_directory_children(node, copy_map, data, stack):
387 386 """
388 387 Write the binary representation of the direct sorted children of `node` to
389 388 `data`
390 389 """
391 390 direct_children = []
392 391
393 392 while stack[-1].path != b"" and get_folder(stack[-1].path) == node.path:
394 393 direct_children.append(stack.pop())
395 394 if not direct_children:
396 395 raise error.ProgrammingError(b"no direct children for %r" % node.path)
397 396
398 397 # Reverse the stack to get the correct sorted order
399 398 direct_children.reverse()
400 399 packed_children = bytearray()
401 400 # Write the paths to `data`. Pack child nodes but don't write them yet
402 401 for child in direct_children:
403 402 packed = child.pack(copy_map=copy_map, paths_offset=len(data))
404 403 packed_children.extend(packed)
405 404 data.extend(child.path)
406 405 data.extend(copy_map.get(child.path, b""))
407 406 node.tracked_descendants += child.tracked_descendants
408 407 node.descendants_with_entry += child.descendants_with_entry
409 408 # Write the fixed-size child nodes all together
410 409 node.children_offset = len(data)
411 410 data.extend(packed_children)
General Comments 0
You need to be logged in to leave comments. Login now