##// END OF EJS Templates
dirstate-v2: fix edge case where entries aren't sorted...
Raphaël Gomès -
r50444:fc719967 stable
parent child Browse files
Show More
@@ -1,421 +1,421 b''
1 # v2.py - Pure-Python implementation of the dirstate-v2 file format
1 # v2.py - Pure-Python implementation of the dirstate-v2 file format
2 #
2 #
3 # Copyright Mercurial Contributors
3 # Copyright Mercurial Contributors
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import struct
9 import struct
10
10
11 from ..thirdparty import attr
11 from ..thirdparty import attr
12 from .. import error, policy
12 from .. import error, policy
13
13
14 parsers = policy.importmod('parsers')
14 parsers = policy.importmod('parsers')
15
15
16
16
17 # Must match the constant of the same name in
17 # Must match the constant of the same name in
18 # `rust/hg-core/src/dirstate_tree/on_disk.rs`
18 # `rust/hg-core/src/dirstate_tree/on_disk.rs`
19 TREE_METADATA_SIZE = 44
19 TREE_METADATA_SIZE = 44
20 NODE_SIZE = 44
20 NODE_SIZE = 44
21
21
22
22
23 # Must match the `TreeMetadata` Rust struct in
23 # Must match the `TreeMetadata` Rust struct in
24 # `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.
24 # `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.
25 #
25 #
26 # * 4 bytes: start offset of root nodes
26 # * 4 bytes: start offset of root nodes
27 # * 4 bytes: number of root nodes
27 # * 4 bytes: number of root nodes
28 # * 4 bytes: total number of nodes in the tree that have an entry
28 # * 4 bytes: total number of nodes in the tree that have an entry
29 # * 4 bytes: total number of nodes in the tree that have a copy source
29 # * 4 bytes: total number of nodes in the tree that have a copy source
30 # * 4 bytes: number of bytes in the data file that are not used anymore
30 # * 4 bytes: number of bytes in the data file that are not used anymore
31 # * 4 bytes: unused
31 # * 4 bytes: unused
32 # * 20 bytes: SHA-1 hash of ignore patterns
32 # * 20 bytes: SHA-1 hash of ignore patterns
33 TREE_METADATA = struct.Struct('>LLLLL4s20s')
33 TREE_METADATA = struct.Struct('>LLLLL4s20s')
34
34
35
35
36 # Must match the `Node` Rust struct in
36 # Must match the `Node` Rust struct in
37 # `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.
37 # `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.
38 #
38 #
39 # * 4 bytes: start offset of full path
39 # * 4 bytes: start offset of full path
40 # * 2 bytes: length of the full path
40 # * 2 bytes: length of the full path
41 # * 2 bytes: length within the full path before its "base name"
41 # * 2 bytes: length within the full path before its "base name"
42 # * 4 bytes: start offset of the copy source if any, or zero for no copy source
42 # * 4 bytes: start offset of the copy source if any, or zero for no copy source
43 # * 2 bytes: length of the copy source if any, or unused
43 # * 2 bytes: length of the copy source if any, or unused
44 # * 4 bytes: start offset of child nodes
44 # * 4 bytes: start offset of child nodes
45 # * 4 bytes: number of child nodes
45 # * 4 bytes: number of child nodes
46 # * 4 bytes: number of descendant nodes that have an entry
46 # * 4 bytes: number of descendant nodes that have an entry
47 # * 4 bytes: number of descendant nodes that have a "tracked" state
47 # * 4 bytes: number of descendant nodes that have a "tracked" state
48 # * 1 byte: flags
48 # * 1 byte: flags
49 # * 4 bytes: expected size
49 # * 4 bytes: expected size
50 # * 4 bytes: mtime seconds
50 # * 4 bytes: mtime seconds
51 # * 4 bytes: mtime nanoseconds
51 # * 4 bytes: mtime nanoseconds
52 NODE = struct.Struct('>LHHLHLLLLHlll')
52 NODE = struct.Struct('>LHHLHLLLLHlll')
53
53
54
54
55 assert TREE_METADATA_SIZE == TREE_METADATA.size
55 assert TREE_METADATA_SIZE == TREE_METADATA.size
56 assert NODE_SIZE == NODE.size
56 assert NODE_SIZE == NODE.size
57
57
58 # match constant in mercurial/pure/parsers.py
58 # match constant in mercurial/pure/parsers.py
59 DIRSTATE_V2_DIRECTORY = 1 << 13
59 DIRSTATE_V2_DIRECTORY = 1 << 13
60
60
61
61
62 def parse_dirstate(map, copy_map, data, tree_metadata):
62 def parse_dirstate(map, copy_map, data, tree_metadata):
63 """parse a full v2-dirstate from a binary data into dictionaries:
63 """parse a full v2-dirstate from a binary data into dictionaries:
64
64
65 - map: a {path: entry} mapping that will be filled
65 - map: a {path: entry} mapping that will be filled
66 - copy_map: a {path: copy-source} mapping that will be filled
66 - copy_map: a {path: copy-source} mapping that will be filled
67 - data: a binary blob contains v2 nodes data
67 - data: a binary blob contains v2 nodes data
68 - tree_metadata:: a binary blob of the top level node (from the docket)
68 - tree_metadata:: a binary blob of the top level node (from the docket)
69 """
69 """
70 (
70 (
71 root_nodes_start,
71 root_nodes_start,
72 root_nodes_len,
72 root_nodes_len,
73 _nodes_with_entry_count,
73 _nodes_with_entry_count,
74 _nodes_with_copy_source_count,
74 _nodes_with_copy_source_count,
75 _unreachable_bytes,
75 _unreachable_bytes,
76 _unused,
76 _unused,
77 _ignore_patterns_hash,
77 _ignore_patterns_hash,
78 ) = TREE_METADATA.unpack(tree_metadata)
78 ) = TREE_METADATA.unpack(tree_metadata)
79 parse_nodes(map, copy_map, data, root_nodes_start, root_nodes_len)
79 parse_nodes(map, copy_map, data, root_nodes_start, root_nodes_len)
80
80
81
81
82 def parse_nodes(map, copy_map, data, start, len):
82 def parse_nodes(map, copy_map, data, start, len):
83 """parse <len> nodes from <data> starting at offset <start>
83 """parse <len> nodes from <data> starting at offset <start>
84
84
85 This is used by parse_dirstate to recursively fill `map` and `copy_map`.
85 This is used by parse_dirstate to recursively fill `map` and `copy_map`.
86
86
87 All directory specific information is ignored and do not need any
87 All directory specific information is ignored and do not need any
88 processing (DIRECTORY, ALL_UNKNOWN_RECORDED, ALL_IGNORED_RECORDED)
88 processing (DIRECTORY, ALL_UNKNOWN_RECORDED, ALL_IGNORED_RECORDED)
89 """
89 """
90 for i in range(len):
90 for i in range(len):
91 node_start = start + NODE_SIZE * i
91 node_start = start + NODE_SIZE * i
92 node_bytes = slice_with_len(data, node_start, NODE_SIZE)
92 node_bytes = slice_with_len(data, node_start, NODE_SIZE)
93 (
93 (
94 path_start,
94 path_start,
95 path_len,
95 path_len,
96 _basename_start,
96 _basename_start,
97 copy_source_start,
97 copy_source_start,
98 copy_source_len,
98 copy_source_len,
99 children_start,
99 children_start,
100 children_count,
100 children_count,
101 _descendants_with_entry_count,
101 _descendants_with_entry_count,
102 _tracked_descendants_count,
102 _tracked_descendants_count,
103 flags,
103 flags,
104 size,
104 size,
105 mtime_s,
105 mtime_s,
106 mtime_ns,
106 mtime_ns,
107 ) = NODE.unpack(node_bytes)
107 ) = NODE.unpack(node_bytes)
108
108
109 # Parse child nodes of this node recursively
109 # Parse child nodes of this node recursively
110 parse_nodes(map, copy_map, data, children_start, children_count)
110 parse_nodes(map, copy_map, data, children_start, children_count)
111
111
112 item = parsers.DirstateItem.from_v2_data(flags, size, mtime_s, mtime_ns)
112 item = parsers.DirstateItem.from_v2_data(flags, size, mtime_s, mtime_ns)
113 if not item.any_tracked:
113 if not item.any_tracked:
114 continue
114 continue
115 path = slice_with_len(data, path_start, path_len)
115 path = slice_with_len(data, path_start, path_len)
116 map[path] = item
116 map[path] = item
117 if copy_source_start:
117 if copy_source_start:
118 copy_map[path] = slice_with_len(
118 copy_map[path] = slice_with_len(
119 data, copy_source_start, copy_source_len
119 data, copy_source_start, copy_source_len
120 )
120 )
121
121
122
122
123 def slice_with_len(data, start, len):
123 def slice_with_len(data, start, len):
124 return data[start : start + len]
124 return data[start : start + len]
125
125
126
126
127 @attr.s
127 @attr.s
128 class Node:
128 class Node:
129 path = attr.ib()
129 path = attr.ib()
130 entry = attr.ib()
130 entry = attr.ib()
131 parent = attr.ib(default=None)
131 parent = attr.ib(default=None)
132 children_count = attr.ib(default=0)
132 children_count = attr.ib(default=0)
133 children_offset = attr.ib(default=0)
133 children_offset = attr.ib(default=0)
134 descendants_with_entry = attr.ib(default=0)
134 descendants_with_entry = attr.ib(default=0)
135 tracked_descendants = attr.ib(default=0)
135 tracked_descendants = attr.ib(default=0)
136
136
137 def pack(self, copy_map, paths_offset):
137 def pack(self, copy_map, paths_offset):
138 path = self.path
138 path = self.path
139 copy = copy_map.get(path)
139 copy = copy_map.get(path)
140 entry = self.entry
140 entry = self.entry
141
141
142 path_start = paths_offset
142 path_start = paths_offset
143 path_len = len(path)
143 path_len = len(path)
144 basename_start = path.rfind(b'/') + 1 # 0 if rfind returns -1
144 basename_start = path.rfind(b'/') + 1 # 0 if rfind returns -1
145 if copy is not None:
145 if copy is not None:
146 copy_source_start = paths_offset + len(path)
146 copy_source_start = paths_offset + len(path)
147 copy_source_len = len(copy)
147 copy_source_len = len(copy)
148 else:
148 else:
149 copy_source_start = 0
149 copy_source_start = 0
150 copy_source_len = 0
150 copy_source_len = 0
151 if entry is not None:
151 if entry is not None:
152 flags, size, mtime_s, mtime_ns = entry.v2_data()
152 flags, size, mtime_s, mtime_ns = entry.v2_data()
153 else:
153 else:
154 # There are no mtime-cached directories in the Python implementation
154 # There are no mtime-cached directories in the Python implementation
155 flags = DIRSTATE_V2_DIRECTORY
155 flags = DIRSTATE_V2_DIRECTORY
156 size = 0
156 size = 0
157 mtime_s = 0
157 mtime_s = 0
158 mtime_ns = 0
158 mtime_ns = 0
159 return NODE.pack(
159 return NODE.pack(
160 path_start,
160 path_start,
161 path_len,
161 path_len,
162 basename_start,
162 basename_start,
163 copy_source_start,
163 copy_source_start,
164 copy_source_len,
164 copy_source_len,
165 self.children_offset,
165 self.children_offset,
166 self.children_count,
166 self.children_count,
167 self.descendants_with_entry,
167 self.descendants_with_entry,
168 self.tracked_descendants,
168 self.tracked_descendants,
169 flags,
169 flags,
170 size,
170 size,
171 mtime_s,
171 mtime_s,
172 mtime_ns,
172 mtime_ns,
173 )
173 )
174
174
175
175
176 def pack_dirstate(map, copy_map):
176 def pack_dirstate(map, copy_map):
177 """
177 """
178 Pack `map` and `copy_map` into the dirstate v2 binary format and return
178 Pack `map` and `copy_map` into the dirstate v2 binary format and return
179 the tuple of (data, metadata) bytearrays.
179 the tuple of (data, metadata) bytearrays.
180
180
181 The on-disk format expects a tree-like structure where the leaves are
181 The on-disk format expects a tree-like structure where the leaves are
182 written first (and sorted per-directory), going up levels until the root
182 written first (and sorted per-directory), going up levels until the root
183 node and writing that one to the docket. See more details on the on-disk
183 node and writing that one to the docket. See more details on the on-disk
184 format in `mercurial/helptext/internals/dirstate-v2`.
184 format in `mercurial/helptext/internals/dirstate-v2`.
185
185
186 Since both `map` and `copy_map` are flat dicts we need to figure out the
186 Since both `map` and `copy_map` are flat dicts we need to figure out the
187 hierarchy. This algorithm does so without having to build the entire tree
187 hierarchy. This algorithm does so without having to build the entire tree
188 in-memory: it only keeps the minimum number of nodes around to satisfy the
188 in-memory: it only keeps the minimum number of nodes around to satisfy the
189 format.
189 format.
190
190
191 # Algorithm explanation
191 # Algorithm explanation
192
192
193 This explanation does not talk about the different counters for tracked
193 This explanation does not talk about the different counters for tracked
194 descendants and storing the copies, but that work is pretty simple once this
194 descendants and storing the copies, but that work is pretty simple once this
195 algorithm is in place.
195 algorithm is in place.
196
196
197 ## Building a subtree
197 ## Building a subtree
198
198
199 First, sort `map`: this makes it so the leaves of the tree are contiguous
199 First, sort `map`: this makes it so the leaves of the tree are contiguous
200 per directory (i.e. a/b/c and a/b/d will be next to each other in the list),
200 per directory (i.e. a/b/c and a/b/d will be next to each other in the list),
201 and enables us to use the ordering of folders to have a "cursor" of the
201 and enables us to use the ordering of folders to have a "cursor" of the
202 current folder we're in without ever going twice in the same branch of the
202 current folder we're in without ever going twice in the same branch of the
203 tree. The cursor is a node that remembers its parent and any information
203 tree. The cursor is a node that remembers its parent and any information
204 relevant to the format (see the `Node` class), building the relevant part
204 relevant to the format (see the `Node` class), building the relevant part
205 of the tree lazily.
205 of the tree lazily.
206 Then, for each file in `map`, move the cursor into the tree to the
206 Then, for each file in `map`, move the cursor into the tree to the
207 corresponding folder of the file: for example, if the very first file
207 corresponding folder of the file: for example, if the very first file
208 is "a/b/c", we start from `Node[""]`, create `Node["a"]` which points to
208 is "a/b/c", we start from `Node[""]`, create `Node["a"]` which points to
209 its parent `Node[""]`, then create `Node["a/b"]`, which points to its parent
209 its parent `Node[""]`, then create `Node["a/b"]`, which points to its parent
210 `Node["a"]`. These nodes are kept around in a stack.
210 `Node["a"]`. These nodes are kept around in a stack.
211 If the next file in `map` is in the same subtree ("a/b/d" or "a/b/e/f"), we
211 If the next file in `map` is in the same subtree ("a/b/d" or "a/b/e/f"), we
212 add it to the stack and keep looping with the same logic of creating the
212 add it to the stack and keep looping with the same logic of creating the
213 tree nodes as needed. If however the next file in `map` is *not* in the same
213 tree nodes as needed. If however the next file in `map` is *not* in the same
214 subtree ("a/other", if we're still in the "a/b" folder), then we know that
214 subtree ("a/other", if we're still in the "a/b" folder), then we know that
215 the subtree we're in is complete.
215 the subtree we're in is complete.
216
216
217 ## Writing the subtree
217 ## Writing the subtree
218
218
219 We have the entire subtree in the stack, so we start writing it to disk
219 We have the entire subtree in the stack, so we start writing it to disk
220 folder by folder. The way we write a folder is to pop the stack into a list
220 folder by folder. The way we write a folder is to pop the stack into a list
221 until the folder changes, revert this list of direct children (to satisfy
221 until the folder changes, revert this list of direct children (to satisfy
222 the format requirement that children be sorted). This process repeats until
222 the format requirement that children be sorted). This process repeats until
223 we hit the "other" subtree.
223 we hit the "other" subtree.
224
224
225 An example:
225 An example:
226 a
226 a
227 dir1/b
227 dir1/b
228 dir1/c
228 dir1/c
229 dir2/dir3/d
229 dir2/dir3/d
230 dir2/dir3/e
230 dir2/dir3/e
231 dir2/f
231 dir2/f
232
232
233 Would have us:
233 Would have us:
234 - add to the stack until "dir2/dir3/e"
234 - add to the stack until "dir2/dir3/e"
235 - realize that "dir2/f" is in a different subtree
235 - realize that "dir2/f" is in a different subtree
236 - pop "dir2/dir3/e", "dir2/dir3/d", reverse them so they're sorted and
236 - pop "dir2/dir3/e", "dir2/dir3/d", reverse them so they're sorted and
237 pack them since the next entry is "dir2/dir3"
237 pack them since the next entry is "dir2/dir3"
238 - go back up to "dir2"
238 - go back up to "dir2"
239 - add "dir2/f" to the stack
239 - add "dir2/f" to the stack
240 - realize we're done with the map
240 - realize we're done with the map
241 - pop "dir2/f", "dir2/dir3" from the stack, reverse and pack them
241 - pop "dir2/f", "dir2/dir3" from the stack, reverse and pack them
242 - go up to the root node, do the same to write "a", "dir1" and "dir2" in
242 - go up to the root node, do the same to write "a", "dir1" and "dir2" in
243 that order
243 that order
244
244
245 ## Special case for the root node
245 ## Special case for the root node
246
246
247 The root node is not serialized in the format, but its information is
247 The root node is not serialized in the format, but its information is
248 written to the docket. Again, see more details on the on-disk format in
248 written to the docket. Again, see more details on the on-disk format in
249 `mercurial/helptext/internals/dirstate-v2`.
249 `mercurial/helptext/internals/dirstate-v2`.
250 """
250 """
251 data = bytearray()
251 data = bytearray()
252 root_nodes_start = 0
252 root_nodes_start = 0
253 root_nodes_len = 0
253 root_nodes_len = 0
254 nodes_with_entry_count = 0
254 nodes_with_entry_count = 0
255 nodes_with_copy_source_count = 0
255 nodes_with_copy_source_count = 0
256 # Will always be 0 since this implementation always re-writes everything
256 # Will always be 0 since this implementation always re-writes everything
257 # to disk
257 # to disk
258 unreachable_bytes = 0
258 unreachable_bytes = 0
259 unused = b'\x00' * 4
259 unused = b'\x00' * 4
260 # This is an optimization that's only useful for the Rust implementation
260 # This is an optimization that's only useful for the Rust implementation
261 ignore_patterns_hash = b'\x00' * 20
261 ignore_patterns_hash = b'\x00' * 20
262
262
263 if len(map) == 0:
263 if len(map) == 0:
264 tree_metadata = TREE_METADATA.pack(
264 tree_metadata = TREE_METADATA.pack(
265 root_nodes_start,
265 root_nodes_start,
266 root_nodes_len,
266 root_nodes_len,
267 nodes_with_entry_count,
267 nodes_with_entry_count,
268 nodes_with_copy_source_count,
268 nodes_with_copy_source_count,
269 unreachable_bytes,
269 unreachable_bytes,
270 unused,
270 unused,
271 ignore_patterns_hash,
271 ignore_patterns_hash,
272 )
272 )
273 return data, tree_metadata
273 return data, tree_metadata
274
274
275 sorted_map = sorted(map.items(), key=lambda x: x[0])
275 sorted_map = sorted(map.items(), key=lambda x: x[0].split(b"/"))
276
276
277 # Use a stack to have to only remember the nodes we currently need
277 # Use a stack to have to only remember the nodes we currently need
278 # instead of building the entire tree in memory
278 # instead of building the entire tree in memory
279 stack = []
279 stack = []
280 current_node = Node(b"", None)
280 current_node = Node(b"", None)
281 stack.append(current_node)
281 stack.append(current_node)
282
282
283 for index, (path, entry) in enumerate(sorted_map, 1):
283 for index, (path, entry) in enumerate(sorted_map, 1):
284 nodes_with_entry_count += 1
284 nodes_with_entry_count += 1
285 if path in copy_map:
285 if path in copy_map:
286 nodes_with_copy_source_count += 1
286 nodes_with_copy_source_count += 1
287 current_folder = get_folder(path)
287 current_folder = get_folder(path)
288 current_node = move_to_correct_node_in_tree(
288 current_node = move_to_correct_node_in_tree(
289 current_folder, current_node, stack
289 current_folder, current_node, stack
290 )
290 )
291
291
292 current_node.children_count += 1
292 current_node.children_count += 1
293 # Entries from `map` are never `None`
293 # Entries from `map` are never `None`
294 if entry.tracked:
294 if entry.tracked:
295 current_node.tracked_descendants += 1
295 current_node.tracked_descendants += 1
296 current_node.descendants_with_entry += 1
296 current_node.descendants_with_entry += 1
297 stack.append(Node(path, entry, current_node))
297 stack.append(Node(path, entry, current_node))
298
298
299 should_pack = True
299 should_pack = True
300 next_path = None
300 next_path = None
301 if index < len(sorted_map):
301 if index < len(sorted_map):
302 # Determine if the next entry is in the same sub-tree, if so don't
302 # Determine if the next entry is in the same sub-tree, if so don't
303 # pack yet
303 # pack yet
304 next_path = sorted_map[index][0]
304 next_path = sorted_map[index][0]
305 should_pack = not is_ancestor(next_path, current_folder)
305 should_pack = not is_ancestor(next_path, current_folder)
306 if should_pack:
306 if should_pack:
307 pack_directory_children(current_node, copy_map, data, stack)
307 pack_directory_children(current_node, copy_map, data, stack)
308 while stack and current_node.path != b"":
308 while stack and current_node.path != b"":
309 # Go up the tree and write until we reach the folder of the next
309 # Go up the tree and write until we reach the folder of the next
310 # entry (if any, otherwise the root)
310 # entry (if any, otherwise the root)
311 parent = current_node.parent
311 parent = current_node.parent
312 in_ancestor_of_next_path = next_path is not None and (
312 in_ancestor_of_next_path = next_path is not None and (
313 is_ancestor(next_path, get_folder(stack[-1].path))
313 is_ancestor(next_path, get_folder(stack[-1].path))
314 )
314 )
315 if parent is None or in_ancestor_of_next_path:
315 if parent is None or in_ancestor_of_next_path:
316 break
316 break
317 pack_directory_children(parent, copy_map, data, stack)
317 pack_directory_children(parent, copy_map, data, stack)
318 current_node = parent
318 current_node = parent
319
319
320 # Special case for the root node since we don't write it to disk, only its
320 # Special case for the root node since we don't write it to disk, only its
321 # children to the docket
321 # children to the docket
322 current_node = stack.pop()
322 current_node = stack.pop()
323 assert current_node.path == b"", current_node.path
323 assert current_node.path == b"", current_node.path
324 assert len(stack) == 0, len(stack)
324 assert len(stack) == 0, len(stack)
325
325
326 tree_metadata = TREE_METADATA.pack(
326 tree_metadata = TREE_METADATA.pack(
327 current_node.children_offset,
327 current_node.children_offset,
328 current_node.children_count,
328 current_node.children_count,
329 nodes_with_entry_count,
329 nodes_with_entry_count,
330 nodes_with_copy_source_count,
330 nodes_with_copy_source_count,
331 unreachable_bytes,
331 unreachable_bytes,
332 unused,
332 unused,
333 ignore_patterns_hash,
333 ignore_patterns_hash,
334 )
334 )
335
335
336 return data, tree_metadata
336 return data, tree_metadata
337
337
338
338
339 def get_folder(path):
339 def get_folder(path):
340 """
340 """
341 Return the folder of the path that's given, an empty string for root paths.
341 Return the folder of the path that's given, an empty string for root paths.
342 """
342 """
343 return path.rsplit(b'/', 1)[0] if b'/' in path else b''
343 return path.rsplit(b'/', 1)[0] if b'/' in path else b''
344
344
345
345
346 def is_ancestor(path, maybe_ancestor):
346 def is_ancestor(path, maybe_ancestor):
347 """Returns whether `maybe_ancestor` is an ancestor of `path`.
347 """Returns whether `maybe_ancestor` is an ancestor of `path`.
348
348
349 >>> is_ancestor(b"a", b"")
349 >>> is_ancestor(b"a", b"")
350 True
350 True
351 >>> is_ancestor(b"a/b/c", b"a/b/c")
351 >>> is_ancestor(b"a/b/c", b"a/b/c")
352 False
352 False
353 >>> is_ancestor(b"hgext3rd/__init__.py", b"hgext")
353 >>> is_ancestor(b"hgext3rd/__init__.py", b"hgext")
354 False
354 False
355 >>> is_ancestor(b"hgext3rd/__init__.py", b"hgext3rd")
355 >>> is_ancestor(b"hgext3rd/__init__.py", b"hgext3rd")
356 True
356 True
357 """
357 """
358 if maybe_ancestor == b"":
358 if maybe_ancestor == b"":
359 return True
359 return True
360 if path <= maybe_ancestor:
360 if path <= maybe_ancestor:
361 return False
361 return False
362 path_components = path.split(b"/")
362 path_components = path.split(b"/")
363 ancestor_components = maybe_ancestor.split(b"/")
363 ancestor_components = maybe_ancestor.split(b"/")
364 return all(c == o for c, o in zip(path_components, ancestor_components))
364 return all(c == o for c, o in zip(path_components, ancestor_components))
365
365
366
366
367 def move_to_correct_node_in_tree(target_folder, current_node, stack):
367 def move_to_correct_node_in_tree(target_folder, current_node, stack):
368 """
368 """
369 Move inside the dirstate node tree to the node corresponding to
369 Move inside the dirstate node tree to the node corresponding to
370 `target_folder`, creating the missing nodes along the way if needed.
370 `target_folder`, creating the missing nodes along the way if needed.
371 """
371 """
372 while target_folder != current_node.path:
372 while target_folder != current_node.path:
373 if is_ancestor(target_folder, current_node.path):
373 if is_ancestor(target_folder, current_node.path):
374 # We need to go down a folder
374 # We need to go down a folder
375 prefix = target_folder[len(current_node.path) :].lstrip(b'/')
375 prefix = target_folder[len(current_node.path) :].lstrip(b'/')
376 subfolder_name = prefix.split(b'/', 1)[0]
376 subfolder_name = prefix.split(b'/', 1)[0]
377 if current_node.path:
377 if current_node.path:
378 subfolder_path = current_node.path + b'/' + subfolder_name
378 subfolder_path = current_node.path + b'/' + subfolder_name
379 else:
379 else:
380 subfolder_path = subfolder_name
380 subfolder_path = subfolder_name
381 next_node = stack[-1]
381 next_node = stack[-1]
382 if next_node.path == target_folder:
382 if next_node.path == target_folder:
383 # This folder is now a file and only contains removed entries
383 # This folder is now a file and only contains removed entries
384 # merge with the last node
384 # merge with the last node
385 current_node = next_node
385 current_node = next_node
386 else:
386 else:
387 current_node.children_count += 1
387 current_node.children_count += 1
388 current_node = Node(subfolder_path, None, current_node)
388 current_node = Node(subfolder_path, None, current_node)
389 stack.append(current_node)
389 stack.append(current_node)
390 else:
390 else:
391 # We need to go up a folder
391 # We need to go up a folder
392 current_node = current_node.parent
392 current_node = current_node.parent
393 return current_node
393 return current_node
394
394
395
395
396 def pack_directory_children(node, copy_map, data, stack):
396 def pack_directory_children(node, copy_map, data, stack):
397 """
397 """
398 Write the binary representation of the direct sorted children of `node` to
398 Write the binary representation of the direct sorted children of `node` to
399 `data`
399 `data`
400 """
400 """
401 direct_children = []
401 direct_children = []
402
402
403 while stack[-1].path != b"" and get_folder(stack[-1].path) == node.path:
403 while stack[-1].path != b"" and get_folder(stack[-1].path) == node.path:
404 direct_children.append(stack.pop())
404 direct_children.append(stack.pop())
405 if not direct_children:
405 if not direct_children:
406 raise error.ProgrammingError(b"no direct children for %r" % node.path)
406 raise error.ProgrammingError(b"no direct children for %r" % node.path)
407
407
408 # Reverse the stack to get the correct sorted order
408 # Reverse the stack to get the correct sorted order
409 direct_children.reverse()
409 direct_children.reverse()
410 packed_children = bytearray()
410 packed_children = bytearray()
411 # Write the paths to `data`. Pack child nodes but don't write them yet
411 # Write the paths to `data`. Pack child nodes but don't write them yet
412 for child in direct_children:
412 for child in direct_children:
413 packed = child.pack(copy_map=copy_map, paths_offset=len(data))
413 packed = child.pack(copy_map=copy_map, paths_offset=len(data))
414 packed_children.extend(packed)
414 packed_children.extend(packed)
415 data.extend(child.path)
415 data.extend(child.path)
416 data.extend(copy_map.get(child.path, b""))
416 data.extend(copy_map.get(child.path, b""))
417 node.tracked_descendants += child.tracked_descendants
417 node.tracked_descendants += child.tracked_descendants
418 node.descendants_with_entry += child.descendants_with_entry
418 node.descendants_with_entry += child.descendants_with_entry
419 # Write the fixed-size child nodes all together
419 # Write the fixed-size child nodes all together
420 node.children_offset = len(data)
420 node.children_offset = len(data)
421 data.extend(packed_children)
421 data.extend(packed_children)
@@ -1,271 +1,261 b''
1 #testcases dirstate-v1 dirstate-v2
1 #testcases dirstate-v1 dirstate-v2
2
2
3 #if dirstate-v2
3 #if dirstate-v2
4 $ cat >> $HGRCPATH << EOF
4 $ cat >> $HGRCPATH << EOF
5 > [format]
5 > [format]
6 > use-dirstate-v2=1
6 > use-dirstate-v2=1
7 > [storage]
7 > [storage]
8 > dirstate-v2.slow-path=allow
8 > dirstate-v2.slow-path=allow
9 > EOF
9 > EOF
10 #endif
10 #endif
11
11
12 ------ Test dirstate._dirs refcounting
12 ------ Test dirstate._dirs refcounting
13
13
14 $ hg init t
14 $ hg init t
15 $ cd t
15 $ cd t
16 $ mkdir -p a/b/c/d
16 $ mkdir -p a/b/c/d
17 $ touch a/b/c/d/x
17 $ touch a/b/c/d/x
18 $ touch a/b/c/d/y
18 $ touch a/b/c/d/y
19 $ touch a/b/c/d/z
19 $ touch a/b/c/d/z
20 $ hg ci -Am m
20 $ hg ci -Am m
21 adding a/b/c/d/x
21 adding a/b/c/d/x
22 adding a/b/c/d/y
22 adding a/b/c/d/y
23 adding a/b/c/d/z
23 adding a/b/c/d/z
24 $ hg mv a z
24 $ hg mv a z
25 moving a/b/c/d/x to z/b/c/d/x
25 moving a/b/c/d/x to z/b/c/d/x
26 moving a/b/c/d/y to z/b/c/d/y
26 moving a/b/c/d/y to z/b/c/d/y
27 moving a/b/c/d/z to z/b/c/d/z
27 moving a/b/c/d/z to z/b/c/d/z
28
28
29 Test name collisions
29 Test name collisions
30
30
31 $ rm z/b/c/d/x
31 $ rm z/b/c/d/x
32 $ mkdir z/b/c/d/x
32 $ mkdir z/b/c/d/x
33 $ touch z/b/c/d/x/y
33 $ touch z/b/c/d/x/y
34 $ hg add z/b/c/d/x/y
34 $ hg add z/b/c/d/x/y
35 abort: file 'z/b/c/d/x' in dirstate clashes with 'z/b/c/d/x/y'
35 abort: file 'z/b/c/d/x' in dirstate clashes with 'z/b/c/d/x/y'
36 [255]
36 [255]
37 $ rm -rf z/b/c/d
37 $ rm -rf z/b/c/d
38 $ touch z/b/c/d
38 $ touch z/b/c/d
39 $ hg add z/b/c/d
39 $ hg add z/b/c/d
40 abort: directory 'z/b/c/d' already in dirstate
40 abort: directory 'z/b/c/d' already in dirstate
41 [255]
41 [255]
42
42
43 $ cd ..
43 $ cd ..
44
44
45 Issue1790: dirstate entry locked into unset if file mtime is set into
45 Issue1790: dirstate entry locked into unset if file mtime is set into
46 the future
46 the future
47
47
48 Prepare test repo:
48 Prepare test repo:
49
49
50 $ hg init u
50 $ hg init u
51 $ cd u
51 $ cd u
52 $ echo a > a
52 $ echo a > a
53 $ hg add
53 $ hg add
54 adding a
54 adding a
55 $ hg ci -m1
55 $ hg ci -m1
56
56
57 Set mtime of a into the future:
57 Set mtime of a into the future:
58
58
59 $ touch -t 203101011200 a
59 $ touch -t 203101011200 a
60
60
61 Status must not set a's entry to unset (issue1790):
61 Status must not set a's entry to unset (issue1790):
62
62
63 $ hg status
63 $ hg status
64 $ hg debugstate
64 $ hg debugstate
65 n 644 2 2031-01-01 12:00:00 a
65 n 644 2 2031-01-01 12:00:00 a
66
66
67 Test modulo storage/comparison of absurd dates:
67 Test modulo storage/comparison of absurd dates:
68
68
69 #if no-aix
69 #if no-aix
70 $ touch -t 195001011200 a
70 $ touch -t 195001011200 a
71 $ hg st
71 $ hg st
72 $ hg debugstate
72 $ hg debugstate
73 n 644 2 2018-01-19 15:14:08 a
73 n 644 2 2018-01-19 15:14:08 a
74 #endif
74 #endif
75
75
76 Verify that exceptions during a dirstate change leave the dirstate
76 Verify that exceptions during a dirstate change leave the dirstate
77 coherent (issue4353)
77 coherent (issue4353)
78
78
79 $ cat > ../dirstateexception.py <<EOF
79 $ cat > ../dirstateexception.py <<EOF
80 > from mercurial import (
80 > from mercurial import (
81 > error,
81 > error,
82 > extensions,
82 > extensions,
83 > mergestate as mergestatemod,
83 > mergestate as mergestatemod,
84 > )
84 > )
85 >
85 >
86 > def wraprecordupdates(*args):
86 > def wraprecordupdates(*args):
87 > raise error.Abort(b"simulated error while recording dirstateupdates")
87 > raise error.Abort(b"simulated error while recording dirstateupdates")
88 >
88 >
89 > def reposetup(ui, repo):
89 > def reposetup(ui, repo):
90 > extensions.wrapfunction(mergestatemod, 'recordupdates',
90 > extensions.wrapfunction(mergestatemod, 'recordupdates',
91 > wraprecordupdates)
91 > wraprecordupdates)
92 > EOF
92 > EOF
93
93
94 $ hg rm a
94 $ hg rm a
95 $ hg commit -m 'rm a'
95 $ hg commit -m 'rm a'
96 $ echo "[extensions]" >> .hg/hgrc
96 $ echo "[extensions]" >> .hg/hgrc
97 $ echo "dirstateex=../dirstateexception.py" >> .hg/hgrc
97 $ echo "dirstateex=../dirstateexception.py" >> .hg/hgrc
98 $ hg up 0
98 $ hg up 0
99 abort: simulated error while recording dirstateupdates
99 abort: simulated error while recording dirstateupdates
100 [255]
100 [255]
101 $ hg log -r . -T '{rev}\n'
101 $ hg log -r . -T '{rev}\n'
102 1
102 1
103 $ hg status
103 $ hg status
104 ? a
104 ? a
105
105
106 #if dirstate-v2
106 #if dirstate-v2
107 Check that folders that are prefixes of others do not throw the packer into an
107 Check that folders that are prefixes of others do not throw the packer into an
108 infinite loop.
108 infinite loop.
109
109
110 $ cd ..
110 $ cd ..
111 $ hg init infinite-loop
111 $ hg init infinite-loop
112 $ cd infinite-loop
112 $ cd infinite-loop
113 $ mkdir hgext3rd hgext
113 $ mkdir hgext3rd hgext
114 $ touch hgext3rd/__init__.py hgext/zeroconf.py
114 $ touch hgext3rd/__init__.py hgext/zeroconf.py
115 $ hg commit -Aqm0
115 $ hg commit -Aqm0
116
116
117 $ hg st -c
117 $ hg st -c
118 C hgext/zeroconf.py
118 C hgext/zeroconf.py
119 C hgext3rd/__init__.py
119 C hgext3rd/__init__.py
120
120
121 $ cd ..
121 $ cd ..
122
122
123 Check that the old dirstate data file is removed correctly and the new one is
123 Check that the old dirstate data file is removed correctly and the new one is
124 valid.
124 valid.
125
125
126 $ dirstate_data_files () {
126 $ dirstate_data_files () {
127 > find .hg -maxdepth 1 -name "dirstate.*"
127 > find .hg -maxdepth 1 -name "dirstate.*"
128 > }
128 > }
129
129
130 $ find_dirstate_uuid () {
130 $ find_dirstate_uuid () {
131 > hg debugstate --docket | grep uuid | sed 's/.*uuid: \(.*\)/\1/'
131 > hg debugstate --docket | grep uuid | sed 's/.*uuid: \(.*\)/\1/'
132 > }
132 > }
133
133
134 $ find_dirstate_data_size () {
134 $ find_dirstate_data_size () {
135 > hg debugstate --docket | grep 'size of dirstate data' | sed 's/.*size of dirstate data: \(.*\)/\1/'
135 > hg debugstate --docket | grep 'size of dirstate data' | sed 's/.*size of dirstate data: \(.*\)/\1/'
136 > }
136 > }
137
137
138 $ dirstate_uuid_has_not_changed () {
138 $ dirstate_uuid_has_not_changed () {
139 > # Non-Rust always rewrites the whole dirstate
139 > # Non-Rust always rewrites the whole dirstate
140 > if [ $# -eq 1 ] || ([ -n "$HGMODULEPOLICY" ] && [ -z "${HGMODULEPOLICY##*rust*}" ]) || [ -n "$RHG_INSTALLED_AS_HG" ]; then
140 > if [ $# -eq 1 ] || ([ -n "$HGMODULEPOLICY" ] && [ -z "${HGMODULEPOLICY##*rust*}" ]) || [ -n "$RHG_INSTALLED_AS_HG" ]; then
141 > test $current_uid = $(find_dirstate_uuid)
141 > test $current_uid = $(find_dirstate_uuid)
142 > else
142 > else
143 > echo "not testing because using Python implementation"
143 > echo "not testing because using Python implementation"
144 > fi
144 > fi
145 > }
145 > }
146
146
147 $ cd ..
147 $ cd ..
148 $ hg init append-mostly
148 $ hg init append-mostly
149 $ cd append-mostly
149 $ cd append-mostly
150 $ mkdir dir dir2
150 $ mkdir dir dir2
151 $ touch dir/a dir/b dir/c dir/d dir/e dir2/f
151 $ touch dir/a dir/b dir/c dir/d dir/e dir2/f
152 $ hg commit -Aqm initial
152 $ hg commit -Aqm initial
153 $ hg st
153 $ hg st
154 $ dirstate_data_files | wc -l
154 $ dirstate_data_files | wc -l
155 *1 (re)
155 *1 (re)
156 $ current_uid=$(find_dirstate_uuid)
156 $ current_uid=$(find_dirstate_uuid)
157
157
158 Nothing changes here
158 Nothing changes here
159
159
160 $ hg st
160 $ hg st
161 $ dirstate_data_files | wc -l
161 $ dirstate_data_files | wc -l
162 *1 (re)
162 *1 (re)
163 $ dirstate_uuid_has_not_changed
163 $ dirstate_uuid_has_not_changed
164 not testing because using Python implementation (no-rust no-rhg !)
164 not testing because using Python implementation (no-rust no-rhg !)
165
165
166 Trigger an append with a small change
166 Trigger an append with a small change
167
167
168 $ current_data_size=$(find_dirstate_data_size)
168 $ current_data_size=$(find_dirstate_data_size)
169 $ rm dir2/f
169 $ rm dir2/f
170 $ hg st
170 $ hg st
171 ! dir2/f
171 ! dir2/f
172 $ dirstate_data_files | wc -l
172 $ dirstate_data_files | wc -l
173 *1 (re)
173 *1 (re)
174 $ dirstate_uuid_has_not_changed
174 $ dirstate_uuid_has_not_changed
175 not testing because using Python implementation (no-rust no-rhg !)
175 not testing because using Python implementation (no-rust no-rhg !)
176 $ new_data_size=$(find_dirstate_data_size)
176 $ new_data_size=$(find_dirstate_data_size)
177 $ [ "$current_data_size" -eq "$new_data_size" ]; echo $?
177 $ [ "$current_data_size" -eq "$new_data_size" ]; echo $?
178 0 (no-rust no-rhg !)
178 0 (no-rust no-rhg !)
179 1 (rust !)
179 1 (rust !)
180 1 (no-rust rhg !)
180 1 (no-rust rhg !)
181
181
182 Unused bytes counter is non-0 when appending
182 Unused bytes counter is non-0 when appending
183 $ touch file
183 $ touch file
184 $ hg add file
184 $ hg add file
185 $ current_uid=$(find_dirstate_uuid)
185 $ current_uid=$(find_dirstate_uuid)
186
186
187 Trigger a rust/rhg run which updates the unused bytes value
187 Trigger a rust/rhg run which updates the unused bytes value
188 $ hg st
188 $ hg st
189 A file
189 A file
190 ! dir2/f
190 ! dir2/f
191 $ dirstate_data_files | wc -l
191 $ dirstate_data_files | wc -l
192 *1 (re)
192 *1 (re)
193 $ dirstate_uuid_has_not_changed
193 $ dirstate_uuid_has_not_changed
194 not testing because using Python implementation (no-rust no-rhg !)
194 not testing because using Python implementation (no-rust no-rhg !)
195
195
196 $ hg debugstate --docket | grep unused
196 $ hg debugstate --docket | grep unused
197 number of unused bytes: 0 (no-rust no-rhg !)
197 number of unused bytes: 0 (no-rust no-rhg !)
198 number of unused bytes: [1-9]\d* (re) (rhg no-rust !)
198 number of unused bytes: [1-9]\d* (re) (rhg no-rust !)
199 number of unused bytes: [1-9]\d* (re) (rust no-rhg !)
199 number of unused bytes: [1-9]\d* (re) (rust no-rhg !)
200 number of unused bytes: [1-9]\d* (re) (rust rhg !)
200 number of unused bytes: [1-9]\d* (re) (rust rhg !)
201
201
202 Delete most of the dirstate to trigger a non-append
202 Delete most of the dirstate to trigger a non-append
203 $ hg rm dir/a dir/b dir/c dir/d
203 $ hg rm dir/a dir/b dir/c dir/d
204 $ dirstate_data_files | wc -l
204 $ dirstate_data_files | wc -l
205 *1 (re)
205 *1 (re)
206 $ dirstate_uuid_has_not_changed also-if-python
206 $ dirstate_uuid_has_not_changed also-if-python
207 [1]
207 [1]
208
208
209 Check that unused bytes counter is reset when creating a new docket
209 Check that unused bytes counter is reset when creating a new docket
210
210
211 $ hg debugstate --docket | grep unused
211 $ hg debugstate --docket | grep unused
212 number of unused bytes: 0
212 number of unused bytes: 0
213
213
214 #endif
214 #endif
215
215
216 Transaction compatibility
216 Transaction compatibility
217 -------------------------
217 -------------------------
218
218
219 The transaction preserves the dirstate.
219 The transaction preserves the dirstate.
220 We should make sure all of it (docket + data) is preserved
220 We should make sure all of it (docket + data) is preserved
221
221
222 #if dirstate-v2
222 #if dirstate-v2
223 $ hg commit -m 'bli'
223 $ hg commit -m 'bli'
224 #endif
224 #endif
225
225
226 $ hg update --quiet
226 $ hg update --quiet
227 $ hg revert --all --quiet
227 $ hg revert --all --quiet
228 $ rm -f a
228 $ rm -f a
229 $ echo foo > foo
229 $ echo foo > foo
230 $ hg add foo
230 $ hg add foo
231 $ hg commit -m foo
231 $ hg commit -m foo
232
232
233 #if dirstate-v2
233 #if dirstate-v2
234 $ uid=$(find_dirstate_uuid)
234 $ uid=$(find_dirstate_uuid)
235 $ touch bar
235 $ touch bar
236 $ while [ uid = $(find_dirstate_uuid) ]; do
236 $ while [ uid = $(find_dirstate_uuid) ]; do
237 > hg add bar;
237 > hg add bar;
238 > hg remove bar;
238 > hg remove bar;
239 > done;
239 > done;
240 $ rm bar
240 $ rm bar
241 #endif
241 #endif
242 $ hg rollback
242 $ hg rollback
243 repository tip rolled back to revision 1 (undo commit)
243 repository tip rolled back to revision 1 (undo commit)
244 working directory now based on revision 1
244 working directory now based on revision 1
245
245
246 $ hg status
246 $ hg status
247 A foo
247 A foo
248 $ cd ..
248 $ cd ..
249
249
250 Check dirstate ordering
250 Check dirstate ordering
251 (e.g. `src/dirstate/` and `src/dirstate.rs` shouldn't cause issues)
251 (e.g. `src/dirstate/` and `src/dirstate.rs` shouldn't cause issues)
252
252
253 $ hg init repro
253 $ hg init repro
254 $ cd repro
254 $ cd repro
255 $ mkdir src
255 $ mkdir src
256 $ mkdir src/dirstate
256 $ mkdir src/dirstate
257 $ touch src/dirstate/file1 src/dirstate/file2 src/dirstate.rs
257 $ touch src/dirstate/file1 src/dirstate/file2 src/dirstate.rs
258 $ touch file1 file2
258 $ touch file1 file2
259 $ hg commit -Aqm1
259 $ hg commit -Aqm1
260 #if rhg no-rust dirstate-v2
261 $ hg st
260 $ hg st
262 ! src/dirstate/file1 (known-bad-output !)
263 ! src/dirstate/file2 (known-bad-output !)
264 ? src/dirstate/file1 (known-bad-output !)
265 ? src/dirstate/file2 (known-bad-output !)
266 expected a value, found none (known-bad-output !)
267 [255]
268 #else
269 $ hg st
270 #endif
271 $ cd ..
261 $ cd ..
General Comments 0
You need to be logged in to leave comments. Login now