##// END OF EJS Templates
dirstate-v2: actually use sub-second mtime precision...
Simon Sapin -
r49082:83d0bd45 default
parent child Browse files
Show More
@@ -1,77 +1,87
1 # Copyright Mercurial Contributors
1 # Copyright Mercurial Contributors
2 #
2 #
3 # This software may be used and distributed according to the terms of the
3 # This software may be used and distributed according to the terms of the
4 # GNU General Public License version 2 or any later version.
4 # GNU General Public License version 2 or any later version.
5
5
6 from __future__ import absolute_import
6 from __future__ import absolute_import
7
7
8 import functools
8 import functools
9 import stat
9 import stat
10
10
11
11
12 rangemask = 0x7FFFFFFF
12 rangemask = 0x7FFFFFFF
13
13
14
14
15 @functools.total_ordering
15 @functools.total_ordering
16 class timestamp(tuple):
16 class timestamp(tuple):
17 """
17 """
18 A Unix timestamp with optional nanoseconds precision,
18 A Unix timestamp with optional nanoseconds precision,
19 modulo 2**31 seconds.
19 modulo 2**31 seconds.
20
20
21 A 2-tuple containing:
21 A 2-tuple containing:
22
22
23 `truncated_seconds`: seconds since the Unix epoch,
23 `truncated_seconds`: seconds since the Unix epoch,
24 truncated to its lower 31 bits
24 truncated to its lower 31 bits
25
25
26 `subsecond_nanoseconds`: number of nanoseconds since `truncated_seconds`.
26 `subsecond_nanoseconds`: number of nanoseconds since `truncated_seconds`.
27 When this is zero, the sub-second precision is considered unknown.
27 When this is zero, the sub-second precision is considered unknown.
28 """
28 """
29
29
30 def __new__(cls, value):
30 def __new__(cls, value):
31 truncated_seconds, subsec_nanos = value
31 truncated_seconds, subsec_nanos = value
32 value = (truncated_seconds & rangemask, subsec_nanos)
32 value = (truncated_seconds & rangemask, subsec_nanos)
33 return super(timestamp, cls).__new__(cls, value)
33 return super(timestamp, cls).__new__(cls, value)
34
34
35 def __eq__(self, other):
35 def __eq__(self, other):
36 self_secs, self_subsec_nanos = self
36 self_secs, self_subsec_nanos = self
37 other_secs, other_subsec_nanos = other
37 other_secs, other_subsec_nanos = other
38 return self_secs == other_secs and (
38 return self_secs == other_secs and (
39 self_subsec_nanos == other_subsec_nanos
39 self_subsec_nanos == other_subsec_nanos
40 or self_subsec_nanos == 0
40 or self_subsec_nanos == 0
41 or other_subsec_nanos == 0
41 or other_subsec_nanos == 0
42 )
42 )
43
43
44 def __gt__(self, other):
44 def __gt__(self, other):
45 self_secs, self_subsec_nanos = self
45 self_secs, self_subsec_nanos = self
46 other_secs, other_subsec_nanos = other
46 other_secs, other_subsec_nanos = other
47 if self_secs > other_secs:
47 if self_secs > other_secs:
48 return True
48 return True
49 if self_secs < other_secs:
49 if self_secs < other_secs:
50 return False
50 return False
51 if self_subsec_nanos == 0 or other_subsec_nanos == 0:
51 if self_subsec_nanos == 0 or other_subsec_nanos == 0:
52 # they are considered equal, so not "greater than"
52 # they are considered equal, so not "greater than"
53 return False
53 return False
54 return self_subsec_nanos > other_subsec_nanos
54 return self_subsec_nanos > other_subsec_nanos
55
55
56
56
57 def zero():
57 def zero():
58 """
58 """
59 Returns the `timestamp` at the Unix epoch.
59 Returns the `timestamp` at the Unix epoch.
60 """
60 """
61 return tuple.__new__(timestamp, (0, 0))
61 return tuple.__new__(timestamp, (0, 0))
62
62
63
63
64 def mtime_of(stat_result):
64 def mtime_of(stat_result):
65 """
65 """
66 Takes an `os.stat_result`-like object and returns a `timestamp` object
66 Takes an `os.stat_result`-like object and returns a `timestamp` object
67 for its modification time.
67 for its modification time.
68 """
68 """
69 # https://docs.python.org/2/library/os.html#os.stat_float_times
69 try:
70 # "For compatibility with older Python versions,
70 # TODO: add this attribute to `osutil.stat` objects,
71 # accessing stat_result as a tuple always returns integers."
71 # see `mercurial/cext/osutil.c`.
72 secs = stat_result[stat.ST_MTIME]
72 #
73 # This attribute is also not available on Python 2.
74 nanos = stat_result.st_mtime_ns
75 except AttributeError:
76 # https://docs.python.org/2/library/os.html#os.stat_float_times
77 # "For compatibility with older Python versions,
78 # accessing stat_result as a tuple always returns integers."
79 secs = stat_result[stat.ST_MTIME]
73
80
74 # For now
81 subsec_nanos = 0
75 subsec_nanos = 0
82 else:
83 billion = int(1e9)
84 secs = nanos // billion
85 subsec_nanos = nanos % billion
76
86
77 return timestamp((secs, subsec_nanos))
87 return timestamp((secs, subsec_nanos))
@@ -1,414 +1,411
1 # v2.py - Pure-Python implementation of the dirstate-v2 file format
1 # v2.py - Pure-Python implementation of the dirstate-v2 file format
2 #
2 #
3 # Copyright Mercurial Contributors
3 # Copyright Mercurial Contributors
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11
11
12 from ..thirdparty import attr
12 from ..thirdparty import attr
13 from .. import error, policy
13 from .. import error, policy
14
14
15 parsers = policy.importmod('parsers')
15 parsers = policy.importmod('parsers')
16
16
17
17
18 # Must match the constant of the same name in
18 # Must match the constant of the same name in
19 # `rust/hg-core/src/dirstate_tree/on_disk.rs`
19 # `rust/hg-core/src/dirstate_tree/on_disk.rs`
20 TREE_METADATA_SIZE = 44
20 TREE_METADATA_SIZE = 44
21 NODE_SIZE = 44
21 NODE_SIZE = 44
22
22
23
23
24 # Must match the `TreeMetadata` Rust struct in
24 # Must match the `TreeMetadata` Rust struct in
25 # `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.
25 # `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.
26 #
26 #
27 # * 4 bytes: start offset of root nodes
27 # * 4 bytes: start offset of root nodes
28 # * 4 bytes: number of root nodes
28 # * 4 bytes: number of root nodes
29 # * 4 bytes: total number of nodes in the tree that have an entry
29 # * 4 bytes: total number of nodes in the tree that have an entry
30 # * 4 bytes: total number of nodes in the tree that have a copy source
30 # * 4 bytes: total number of nodes in the tree that have a copy source
31 # * 4 bytes: number of bytes in the data file that are not used anymore
31 # * 4 bytes: number of bytes in the data file that are not used anymore
32 # * 4 bytes: unused
32 # * 4 bytes: unused
33 # * 20 bytes: SHA-1 hash of ignore patterns
33 # * 20 bytes: SHA-1 hash of ignore patterns
34 TREE_METADATA = struct.Struct('>LLLLL4s20s')
34 TREE_METADATA = struct.Struct('>LLLLL4s20s')
35
35
36
36
37 # Must match the `Node` Rust struct in
37 # Must match the `Node` Rust struct in
38 # `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.
38 # `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.
39 #
39 #
40 # * 4 bytes: start offset of full path
40 # * 4 bytes: start offset of full path
41 # * 2 bytes: length of the full path
41 # * 2 bytes: length of the full path
42 # * 2 bytes: length within the full path before its "base name"
42 # * 2 bytes: length within the full path before its "base name"
43 # * 4 bytes: start offset of the copy source if any, or zero for no copy source
43 # * 4 bytes: start offset of the copy source if any, or zero for no copy source
44 # * 2 bytes: length of the copy source if any, or unused
44 # * 2 bytes: length of the copy source if any, or unused
45 # * 4 bytes: start offset of child nodes
45 # * 4 bytes: start offset of child nodes
46 # * 4 bytes: number of child nodes
46 # * 4 bytes: number of child nodes
47 # * 4 bytes: number of descendant nodes that have an entry
47 # * 4 bytes: number of descendant nodes that have an entry
48 # * 4 bytes: number of descendant nodes that have a "tracked" state
48 # * 4 bytes: number of descendant nodes that have a "tracked" state
49 # * 1 byte: flags
49 # * 1 byte: flags
50 # * 4 bytes: expected size
50 # * 4 bytes: expected size
51 # * 4 bytes: mtime seconds
51 # * 4 bytes: mtime seconds
52 # * 4 bytes: mtime nanoseconds
52 # * 4 bytes: mtime nanoseconds
53 NODE = struct.Struct('>LHHLHLLLLHlll')
53 NODE = struct.Struct('>LHHLHLLLLHlll')
54
54
55
55
56 assert TREE_METADATA_SIZE == TREE_METADATA.size
56 assert TREE_METADATA_SIZE == TREE_METADATA.size
57 assert NODE_SIZE == NODE.size
57 assert NODE_SIZE == NODE.size
58
58
59
59
60 def parse_dirstate(map, copy_map, data, tree_metadata):
60 def parse_dirstate(map, copy_map, data, tree_metadata):
61 """parse a full v2-dirstate from a binary data into dictionnaries:
61 """parse a full v2-dirstate from a binary data into dictionnaries:
62
62
63 - map: a {path: entry} mapping that will be filled
63 - map: a {path: entry} mapping that will be filled
64 - copy_map: a {path: copy-source} mapping that will be filled
64 - copy_map: a {path: copy-source} mapping that will be filled
65 - data: a binary blob contains v2 nodes data
65 - data: a binary blob contains v2 nodes data
66 - tree_metadata:: a binary blob of the top level node (from the docket)
66 - tree_metadata:: a binary blob of the top level node (from the docket)
67 """
67 """
68 (
68 (
69 root_nodes_start,
69 root_nodes_start,
70 root_nodes_len,
70 root_nodes_len,
71 _nodes_with_entry_count,
71 _nodes_with_entry_count,
72 _nodes_with_copy_source_count,
72 _nodes_with_copy_source_count,
73 _unreachable_bytes,
73 _unreachable_bytes,
74 _unused,
74 _unused,
75 _ignore_patterns_hash,
75 _ignore_patterns_hash,
76 ) = TREE_METADATA.unpack(tree_metadata)
76 ) = TREE_METADATA.unpack(tree_metadata)
77 parse_nodes(map, copy_map, data, root_nodes_start, root_nodes_len)
77 parse_nodes(map, copy_map, data, root_nodes_start, root_nodes_len)
78
78
79
79
80 def parse_nodes(map, copy_map, data, start, len):
80 def parse_nodes(map, copy_map, data, start, len):
81 """parse <len> nodes from <data> starting at offset <start>
81 """parse <len> nodes from <data> starting at offset <start>
82
82
83 This is used by parse_dirstate to recursively fill `map` and `copy_map`.
83 This is used by parse_dirstate to recursively fill `map` and `copy_map`.
84
84
85 All directory specific information is ignored and do not need any
85 All directory specific information is ignored and do not need any
86 processing (HAS_DIRECTORY_MTIME, ALL_UNKNOWN_RECORDED, ALL_IGNORED_RECORDED)
86 processing (HAS_DIRECTORY_MTIME, ALL_UNKNOWN_RECORDED, ALL_IGNORED_RECORDED)
87 """
87 """
88 for i in range(len):
88 for i in range(len):
89 node_start = start + NODE_SIZE * i
89 node_start = start + NODE_SIZE * i
90 node_bytes = slice_with_len(data, node_start, NODE_SIZE)
90 node_bytes = slice_with_len(data, node_start, NODE_SIZE)
91 (
91 (
92 path_start,
92 path_start,
93 path_len,
93 path_len,
94 _basename_start,
94 _basename_start,
95 copy_source_start,
95 copy_source_start,
96 copy_source_len,
96 copy_source_len,
97 children_start,
97 children_start,
98 children_count,
98 children_count,
99 _descendants_with_entry_count,
99 _descendants_with_entry_count,
100 _tracked_descendants_count,
100 _tracked_descendants_count,
101 flags,
101 flags,
102 size,
102 size,
103 mtime_s,
103 mtime_s,
104 _mtime_ns,
104 mtime_ns,
105 ) = NODE.unpack(node_bytes)
105 ) = NODE.unpack(node_bytes)
106
106
107 # Parse child nodes of this node recursively
107 # Parse child nodes of this node recursively
108 parse_nodes(map, copy_map, data, children_start, children_count)
108 parse_nodes(map, copy_map, data, children_start, children_count)
109
109
110 # Don’t yet use sub-second precision if it exists in the file,
111 # since other parts of the code still set it to zero.
112 mtime_ns = 0
113 item = parsers.DirstateItem.from_v2_data(flags, size, mtime_s, mtime_ns)
110 item = parsers.DirstateItem.from_v2_data(flags, size, mtime_s, mtime_ns)
114 if not item.any_tracked:
111 if not item.any_tracked:
115 continue
112 continue
116 path = slice_with_len(data, path_start, path_len)
113 path = slice_with_len(data, path_start, path_len)
117 map[path] = item
114 map[path] = item
118 if copy_source_start:
115 if copy_source_start:
119 copy_map[path] = slice_with_len(
116 copy_map[path] = slice_with_len(
120 data, copy_source_start, copy_source_len
117 data, copy_source_start, copy_source_len
121 )
118 )
122
119
123
120
124 def slice_with_len(data, start, len):
121 def slice_with_len(data, start, len):
125 return data[start : start + len]
122 return data[start : start + len]
126
123
127
124
128 @attr.s
125 @attr.s
129 class Node(object):
126 class Node(object):
130 path = attr.ib()
127 path = attr.ib()
131 entry = attr.ib()
128 entry = attr.ib()
132 parent = attr.ib(default=None)
129 parent = attr.ib(default=None)
133 children_count = attr.ib(default=0)
130 children_count = attr.ib(default=0)
134 children_offset = attr.ib(default=0)
131 children_offset = attr.ib(default=0)
135 descendants_with_entry = attr.ib(default=0)
132 descendants_with_entry = attr.ib(default=0)
136 tracked_descendants = attr.ib(default=0)
133 tracked_descendants = attr.ib(default=0)
137
134
138 def pack(self, copy_map, paths_offset):
135 def pack(self, copy_map, paths_offset):
139 path = self.path
136 path = self.path
140 copy = copy_map.get(path)
137 copy = copy_map.get(path)
141 entry = self.entry
138 entry = self.entry
142
139
143 path_start = paths_offset
140 path_start = paths_offset
144 path_len = len(path)
141 path_len = len(path)
145 basename_start = path.rfind(b'/') + 1 # 0 if rfind returns -1
142 basename_start = path.rfind(b'/') + 1 # 0 if rfind returns -1
146 if copy is not None:
143 if copy is not None:
147 copy_source_start = paths_offset + len(path)
144 copy_source_start = paths_offset + len(path)
148 copy_source_len = len(copy)
145 copy_source_len = len(copy)
149 else:
146 else:
150 copy_source_start = 0
147 copy_source_start = 0
151 copy_source_len = 0
148 copy_source_len = 0
152 if entry is not None:
149 if entry is not None:
153 flags, size, mtime_s, mtime_ns = entry.v2_data()
150 flags, size, mtime_s, mtime_ns = entry.v2_data()
154 else:
151 else:
155 # There are no mtime-cached directories in the Python implementation
152 # There are no mtime-cached directories in the Python implementation
156 flags = 0
153 flags = 0
157 size = 0
154 size = 0
158 mtime_s = 0
155 mtime_s = 0
159 mtime_ns = 0
156 mtime_ns = 0
160 return NODE.pack(
157 return NODE.pack(
161 path_start,
158 path_start,
162 path_len,
159 path_len,
163 basename_start,
160 basename_start,
164 copy_source_start,
161 copy_source_start,
165 copy_source_len,
162 copy_source_len,
166 self.children_offset,
163 self.children_offset,
167 self.children_count,
164 self.children_count,
168 self.descendants_with_entry,
165 self.descendants_with_entry,
169 self.tracked_descendants,
166 self.tracked_descendants,
170 flags,
167 flags,
171 size,
168 size,
172 mtime_s,
169 mtime_s,
173 mtime_ns,
170 mtime_ns,
174 )
171 )
175
172
176
173
177 def pack_dirstate(map, copy_map, now):
174 def pack_dirstate(map, copy_map, now):
178 """
175 """
179 Pack `map` and `copy_map` into the dirstate v2 binary format and return
176 Pack `map` and `copy_map` into the dirstate v2 binary format and return
180 the bytearray.
177 the bytearray.
181 `now` is a timestamp of the current filesystem time used to detect race
178 `now` is a timestamp of the current filesystem time used to detect race
182 conditions in writing the dirstate to disk, see inline comment.
179 conditions in writing the dirstate to disk, see inline comment.
183
180
184 The on-disk format expects a tree-like structure where the leaves are
181 The on-disk format expects a tree-like structure where the leaves are
185 written first (and sorted per-directory), going up levels until the root
182 written first (and sorted per-directory), going up levels until the root
186 node and writing that one to the docket. See more details on the on-disk
183 node and writing that one to the docket. See more details on the on-disk
187 format in `mercurial/helptext/internals/dirstate-v2`.
184 format in `mercurial/helptext/internals/dirstate-v2`.
188
185
189 Since both `map` and `copy_map` are flat dicts we need to figure out the
186 Since both `map` and `copy_map` are flat dicts we need to figure out the
190 hierarchy. This algorithm does so without having to build the entire tree
187 hierarchy. This algorithm does so without having to build the entire tree
191 in-memory: it only keeps the minimum number of nodes around to satisfy the
188 in-memory: it only keeps the minimum number of nodes around to satisfy the
192 format.
189 format.
193
190
194 # Algorithm explanation
191 # Algorithm explanation
195
192
196 This explanation does not talk about the different counters for tracked
193 This explanation does not talk about the different counters for tracked
197 descendents and storing the copies, but that work is pretty simple once this
194 descendents and storing the copies, but that work is pretty simple once this
198 algorithm is in place.
195 algorithm is in place.
199
196
200 ## Building a subtree
197 ## Building a subtree
201
198
202 First, sort `map`: this makes it so the leaves of the tree are contiguous
199 First, sort `map`: this makes it so the leaves of the tree are contiguous
203 per directory (i.e. a/b/c and a/b/d will be next to each other in the list),
200 per directory (i.e. a/b/c and a/b/d will be next to each other in the list),
204 and enables us to use the ordering of folders to have a "cursor" of the
201 and enables us to use the ordering of folders to have a "cursor" of the
205 current folder we're in without ever going twice in the same branch of the
202 current folder we're in without ever going twice in the same branch of the
206 tree. The cursor is a node that remembers its parent and any information
203 tree. The cursor is a node that remembers its parent and any information
207 relevant to the format (see the `Node` class), building the relevant part
204 relevant to the format (see the `Node` class), building the relevant part
208 of the tree lazily.
205 of the tree lazily.
209 Then, for each file in `map`, move the cursor into the tree to the
206 Then, for each file in `map`, move the cursor into the tree to the
210 corresponding folder of the file: for example, if the very first file
207 corresponding folder of the file: for example, if the very first file
211 is "a/b/c", we start from `Node[""]`, create `Node["a"]` which points to
208 is "a/b/c", we start from `Node[""]`, create `Node["a"]` which points to
212 its parent `Node[""]`, then create `Node["a/b"]`, which points to its parent
209 its parent `Node[""]`, then create `Node["a/b"]`, which points to its parent
213 `Node["a"]`. These nodes are kept around in a stack.
210 `Node["a"]`. These nodes are kept around in a stack.
214 If the next file in `map` is in the same subtree ("a/b/d" or "a/b/e/f"), we
211 If the next file in `map` is in the same subtree ("a/b/d" or "a/b/e/f"), we
215 add it to the stack and keep looping with the same logic of creating the
212 add it to the stack and keep looping with the same logic of creating the
216 tree nodes as needed. If however the next file in `map` is *not* in the same
213 tree nodes as needed. If however the next file in `map` is *not* in the same
217 subtree ("a/other", if we're still in the "a/b" folder), then we know that
214 subtree ("a/other", if we're still in the "a/b" folder), then we know that
218 the subtree we're in is complete.
215 the subtree we're in is complete.
219
216
220 ## Writing the subtree
217 ## Writing the subtree
221
218
222 We have the entire subtree in the stack, so we start writing it to disk
219 We have the entire subtree in the stack, so we start writing it to disk
223 folder by folder. The way we write a folder is to pop the stack into a list
220 folder by folder. The way we write a folder is to pop the stack into a list
224 until the folder changes, revert this list of direct children (to satisfy
221 until the folder changes, revert this list of direct children (to satisfy
225 the format requirement that children be sorted). This process repeats until
222 the format requirement that children be sorted). This process repeats until
226 we hit the "other" subtree.
223 we hit the "other" subtree.
227
224
228 An example:
225 An example:
229 a
226 a
230 dir1/b
227 dir1/b
231 dir1/c
228 dir1/c
232 dir2/dir3/d
229 dir2/dir3/d
233 dir2/dir3/e
230 dir2/dir3/e
234 dir2/f
231 dir2/f
235
232
236 Would have us:
233 Would have us:
237 - add to the stack until "dir2/dir3/e"
234 - add to the stack until "dir2/dir3/e"
238 - realize that "dir2/f" is in a different subtree
235 - realize that "dir2/f" is in a different subtree
239 - pop "dir2/dir3/e", "dir2/dir3/d", reverse them so they're sorted and
236 - pop "dir2/dir3/e", "dir2/dir3/d", reverse them so they're sorted and
240 pack them since the next entry is "dir2/dir3"
237 pack them since the next entry is "dir2/dir3"
241 - go back up to "dir2"
238 - go back up to "dir2"
242 - add "dir2/f" to the stack
239 - add "dir2/f" to the stack
243 - realize we're done with the map
240 - realize we're done with the map
244 - pop "dir2/f", "dir2/dir3" from the stack, reverse and pack them
241 - pop "dir2/f", "dir2/dir3" from the stack, reverse and pack them
245 - go up to the root node, do the same to write "a", "dir1" and "dir2" in
242 - go up to the root node, do the same to write "a", "dir1" and "dir2" in
246 that order
243 that order
247
244
248 ## Special case for the root node
245 ## Special case for the root node
249
246
250 The root node is not serialized in the format, but its information is
247 The root node is not serialized in the format, but its information is
251 written to the docket. Again, see more details on the on-disk format in
248 written to the docket. Again, see more details on the on-disk format in
252 `mercurial/helptext/internals/dirstate-v2`.
249 `mercurial/helptext/internals/dirstate-v2`.
253 """
250 """
254 data = bytearray()
251 data = bytearray()
255 root_nodes_start = 0
252 root_nodes_start = 0
256 root_nodes_len = 0
253 root_nodes_len = 0
257 nodes_with_entry_count = 0
254 nodes_with_entry_count = 0
258 nodes_with_copy_source_count = 0
255 nodes_with_copy_source_count = 0
259 # Will always be 0 since this implementation always re-writes everything
256 # Will always be 0 since this implementation always re-writes everything
260 # to disk
257 # to disk
261 unreachable_bytes = 0
258 unreachable_bytes = 0
262 unused = b'\x00' * 4
259 unused = b'\x00' * 4
263 # This is an optimization that's only useful for the Rust implementation
260 # This is an optimization that's only useful for the Rust implementation
264 ignore_patterns_hash = b'\x00' * 20
261 ignore_patterns_hash = b'\x00' * 20
265
262
266 if len(map) == 0:
263 if len(map) == 0:
267 tree_metadata = TREE_METADATA.pack(
264 tree_metadata = TREE_METADATA.pack(
268 root_nodes_start,
265 root_nodes_start,
269 root_nodes_len,
266 root_nodes_len,
270 nodes_with_entry_count,
267 nodes_with_entry_count,
271 nodes_with_copy_source_count,
268 nodes_with_copy_source_count,
272 unreachable_bytes,
269 unreachable_bytes,
273 unused,
270 unused,
274 ignore_patterns_hash,
271 ignore_patterns_hash,
275 )
272 )
276 return data, tree_metadata
273 return data, tree_metadata
277
274
278 sorted_map = sorted(map.items(), key=lambda x: x[0])
275 sorted_map = sorted(map.items(), key=lambda x: x[0])
279
276
280 # Use a stack to not have to only remember the nodes we currently need
277 # Use a stack to not have to only remember the nodes we currently need
281 # instead of building the entire tree in memory
278 # instead of building the entire tree in memory
282 stack = []
279 stack = []
283 current_node = Node(b"", None)
280 current_node = Node(b"", None)
284 stack.append(current_node)
281 stack.append(current_node)
285
282
286 for index, (path, entry) in enumerate(sorted_map, 1):
283 for index, (path, entry) in enumerate(sorted_map, 1):
287 if entry.need_delay(now):
284 if entry.need_delay(now):
288 # The file was last modified "simultaneously" with the current
285 # The file was last modified "simultaneously" with the current
289 # write to dirstate (i.e. within the same second for file-
286 # write to dirstate (i.e. within the same second for file-
290 # systems with a granularity of 1 sec). This commonly happens
287 # systems with a granularity of 1 sec). This commonly happens
291 # for at least a couple of files on 'update'.
288 # for at least a couple of files on 'update'.
292 # The user could change the file without changing its size
289 # The user could change the file without changing its size
293 # within the same second. Invalidate the file's mtime in
290 # within the same second. Invalidate the file's mtime in
294 # dirstate, forcing future 'status' calls to compare the
291 # dirstate, forcing future 'status' calls to compare the
295 # contents of the file if the size is the same. This prevents
292 # contents of the file if the size is the same. This prevents
296 # mistakenly treating such files as clean.
293 # mistakenly treating such files as clean.
297 entry.set_possibly_dirty()
294 entry.set_possibly_dirty()
298 nodes_with_entry_count += 1
295 nodes_with_entry_count += 1
299 if path in copy_map:
296 if path in copy_map:
300 nodes_with_copy_source_count += 1
297 nodes_with_copy_source_count += 1
301 current_folder = get_folder(path)
298 current_folder = get_folder(path)
302 current_node = move_to_correct_node_in_tree(
299 current_node = move_to_correct_node_in_tree(
303 current_folder, current_node, stack
300 current_folder, current_node, stack
304 )
301 )
305
302
306 current_node.children_count += 1
303 current_node.children_count += 1
307 # Entries from `map` are never `None`
304 # Entries from `map` are never `None`
308 if entry.tracked:
305 if entry.tracked:
309 current_node.tracked_descendants += 1
306 current_node.tracked_descendants += 1
310 current_node.descendants_with_entry += 1
307 current_node.descendants_with_entry += 1
311 stack.append(Node(path, entry, current_node))
308 stack.append(Node(path, entry, current_node))
312
309
313 should_pack = True
310 should_pack = True
314 next_path = None
311 next_path = None
315 if index < len(sorted_map):
312 if index < len(sorted_map):
316 # Determine if the next entry is in the same sub-tree, if so don't
313 # Determine if the next entry is in the same sub-tree, if so don't
317 # pack yet
314 # pack yet
318 next_path = sorted_map[index][0]
315 next_path = sorted_map[index][0]
319 should_pack = not get_folder(next_path).startswith(current_folder)
316 should_pack = not get_folder(next_path).startswith(current_folder)
320 if should_pack:
317 if should_pack:
321 pack_directory_children(current_node, copy_map, data, stack)
318 pack_directory_children(current_node, copy_map, data, stack)
322 while stack and current_node.path != b"":
319 while stack and current_node.path != b"":
323 # Go up the tree and write until we reach the folder of the next
320 # Go up the tree and write until we reach the folder of the next
324 # entry (if any, otherwise the root)
321 # entry (if any, otherwise the root)
325 parent = current_node.parent
322 parent = current_node.parent
326 in_parent_folder_of_next_entry = next_path is not None and (
323 in_parent_folder_of_next_entry = next_path is not None and (
327 get_folder(next_path).startswith(get_folder(stack[-1].path))
324 get_folder(next_path).startswith(get_folder(stack[-1].path))
328 )
325 )
329 if parent is None or in_parent_folder_of_next_entry:
326 if parent is None or in_parent_folder_of_next_entry:
330 break
327 break
331 pack_directory_children(parent, copy_map, data, stack)
328 pack_directory_children(parent, copy_map, data, stack)
332 current_node = parent
329 current_node = parent
333
330
334 # Special case for the root node since we don't write it to disk, only its
331 # Special case for the root node since we don't write it to disk, only its
335 # children to the docket
332 # children to the docket
336 current_node = stack.pop()
333 current_node = stack.pop()
337 assert current_node.path == b"", current_node.path
334 assert current_node.path == b"", current_node.path
338 assert len(stack) == 0, len(stack)
335 assert len(stack) == 0, len(stack)
339
336
340 tree_metadata = TREE_METADATA.pack(
337 tree_metadata = TREE_METADATA.pack(
341 current_node.children_offset,
338 current_node.children_offset,
342 current_node.children_count,
339 current_node.children_count,
343 nodes_with_entry_count,
340 nodes_with_entry_count,
344 nodes_with_copy_source_count,
341 nodes_with_copy_source_count,
345 unreachable_bytes,
342 unreachable_bytes,
346 unused,
343 unused,
347 ignore_patterns_hash,
344 ignore_patterns_hash,
348 )
345 )
349
346
350 return data, tree_metadata
347 return data, tree_metadata
351
348
352
349
353 def get_folder(path):
350 def get_folder(path):
354 """
351 """
355 Return the folder of the path that's given, an empty string for root paths.
352 Return the folder of the path that's given, an empty string for root paths.
356 """
353 """
357 return path.rsplit(b'/', 1)[0] if b'/' in path else b''
354 return path.rsplit(b'/', 1)[0] if b'/' in path else b''
358
355
359
356
360 def move_to_correct_node_in_tree(target_folder, current_node, stack):
357 def move_to_correct_node_in_tree(target_folder, current_node, stack):
361 """
358 """
362 Move inside the dirstate node tree to the node corresponding to
359 Move inside the dirstate node tree to the node corresponding to
363 `target_folder`, creating the missing nodes along the way if needed.
360 `target_folder`, creating the missing nodes along the way if needed.
364 """
361 """
365 while target_folder != current_node.path:
362 while target_folder != current_node.path:
366 if target_folder.startswith(current_node.path):
363 if target_folder.startswith(current_node.path):
367 # We need to go down a folder
364 # We need to go down a folder
368 prefix = target_folder[len(current_node.path) :].lstrip(b'/')
365 prefix = target_folder[len(current_node.path) :].lstrip(b'/')
369 subfolder_name = prefix.split(b'/', 1)[0]
366 subfolder_name = prefix.split(b'/', 1)[0]
370 if current_node.path:
367 if current_node.path:
371 subfolder_path = current_node.path + b'/' + subfolder_name
368 subfolder_path = current_node.path + b'/' + subfolder_name
372 else:
369 else:
373 subfolder_path = subfolder_name
370 subfolder_path = subfolder_name
374 next_node = stack[-1]
371 next_node = stack[-1]
375 if next_node.path == target_folder:
372 if next_node.path == target_folder:
376 # This folder is now a file and only contains removed entries
373 # This folder is now a file and only contains removed entries
377 # merge with the last node
374 # merge with the last node
378 current_node = next_node
375 current_node = next_node
379 else:
376 else:
380 current_node.children_count += 1
377 current_node.children_count += 1
381 current_node = Node(subfolder_path, None, current_node)
378 current_node = Node(subfolder_path, None, current_node)
382 stack.append(current_node)
379 stack.append(current_node)
383 else:
380 else:
384 # We need to go up a folder
381 # We need to go up a folder
385 current_node = current_node.parent
382 current_node = current_node.parent
386 return current_node
383 return current_node
387
384
388
385
389 def pack_directory_children(node, copy_map, data, stack):
386 def pack_directory_children(node, copy_map, data, stack):
390 """
387 """
391 Write the binary representation of the direct sorted children of `node` to
388 Write the binary representation of the direct sorted children of `node` to
392 `data`
389 `data`
393 """
390 """
394 direct_children = []
391 direct_children = []
395
392
396 while stack[-1].path != b"" and get_folder(stack[-1].path) == node.path:
393 while stack[-1].path != b"" and get_folder(stack[-1].path) == node.path:
397 direct_children.append(stack.pop())
394 direct_children.append(stack.pop())
398 if not direct_children:
395 if not direct_children:
399 raise error.ProgrammingError(b"no direct children for %r" % node.path)
396 raise error.ProgrammingError(b"no direct children for %r" % node.path)
400
397
401 # Reverse the stack to get the correct sorted order
398 # Reverse the stack to get the correct sorted order
402 direct_children.reverse()
399 direct_children.reverse()
403 packed_children = bytearray()
400 packed_children = bytearray()
404 # Write the paths to `data`. Pack child nodes but don't write them yet
401 # Write the paths to `data`. Pack child nodes but don't write them yet
405 for child in direct_children:
402 for child in direct_children:
406 packed = child.pack(copy_map=copy_map, paths_offset=len(data))
403 packed = child.pack(copy_map=copy_map, paths_offset=len(data))
407 packed_children.extend(packed)
404 packed_children.extend(packed)
408 data.extend(child.path)
405 data.extend(child.path)
409 data.extend(copy_map.get(child.path, b""))
406 data.extend(copy_map.get(child.path, b""))
410 node.tracked_descendants += child.tracked_descendants
407 node.tracked_descendants += child.tracked_descendants
411 node.descendants_with_entry += child.descendants_with_entry
408 node.descendants_with_entry += child.descendants_with_entry
412 # Write the fixed-size child nodes all together
409 # Write the fixed-size child nodes all together
413 node.children_offset = len(data)
410 node.children_offset = len(data)
414 data.extend(packed_children)
411 data.extend(packed_children)
@@ -1,648 +1,643
1 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
1 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
2 use crate::errors::HgError;
2 use crate::errors::HgError;
3 use bitflags::bitflags;
3 use bitflags::bitflags;
4 use std::convert::{TryFrom, TryInto};
4 use std::convert::{TryFrom, TryInto};
5 use std::fs;
5 use std::fs;
6 use std::io;
6 use std::io;
7 use std::time::{SystemTime, UNIX_EPOCH};
7 use std::time::{SystemTime, UNIX_EPOCH};
8
8
9 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
9 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
10 pub enum EntryState {
10 pub enum EntryState {
11 Normal,
11 Normal,
12 Added,
12 Added,
13 Removed,
13 Removed,
14 Merged,
14 Merged,
15 }
15 }
16
16
17 /// `size` and `mtime.seconds` are truncated to 31 bits.
17 /// `size` and `mtime.seconds` are truncated to 31 bits.
18 ///
18 ///
19 /// TODO: double-check status algorithm correctness for files
19 /// TODO: double-check status algorithm correctness for files
20 /// larger than 2 GiB or modified after 2038.
20 /// larger than 2 GiB or modified after 2038.
21 #[derive(Debug, Copy, Clone)]
21 #[derive(Debug, Copy, Clone)]
22 pub struct DirstateEntry {
22 pub struct DirstateEntry {
23 pub(crate) flags: Flags,
23 pub(crate) flags: Flags,
24 mode_size: Option<(u32, u32)>,
24 mode_size: Option<(u32, u32)>,
25 mtime: Option<TruncatedTimestamp>,
25 mtime: Option<TruncatedTimestamp>,
26 }
26 }
27
27
28 bitflags! {
28 bitflags! {
29 pub(crate) struct Flags: u8 {
29 pub(crate) struct Flags: u8 {
30 const WDIR_TRACKED = 1 << 0;
30 const WDIR_TRACKED = 1 << 0;
31 const P1_TRACKED = 1 << 1;
31 const P1_TRACKED = 1 << 1;
32 const P2_INFO = 1 << 2;
32 const P2_INFO = 1 << 2;
33 const HAS_FALLBACK_EXEC = 1 << 3;
33 const HAS_FALLBACK_EXEC = 1 << 3;
34 const FALLBACK_EXEC = 1 << 4;
34 const FALLBACK_EXEC = 1 << 4;
35 const HAS_FALLBACK_SYMLINK = 1 << 5;
35 const HAS_FALLBACK_SYMLINK = 1 << 5;
36 const FALLBACK_SYMLINK = 1 << 6;
36 const FALLBACK_SYMLINK = 1 << 6;
37 }
37 }
38 }
38 }
39
39
40 /// A Unix timestamp with nanoseconds precision
40 /// A Unix timestamp with nanoseconds precision
41 #[derive(Debug, Copy, Clone)]
41 #[derive(Debug, Copy, Clone)]
42 pub struct TruncatedTimestamp {
42 pub struct TruncatedTimestamp {
43 truncated_seconds: u32,
43 truncated_seconds: u32,
44 /// Always in the `0 .. 1_000_000_000` range.
44 /// Always in the `0 .. 1_000_000_000` range.
45 nanoseconds: u32,
45 nanoseconds: u32,
46 }
46 }
47
47
48 impl TruncatedTimestamp {
48 impl TruncatedTimestamp {
49 /// Constructs from a timestamp potentially outside of the supported range,
49 /// Constructs from a timestamp potentially outside of the supported range,
50 /// and truncate the seconds components to its lower 31 bits.
50 /// and truncate the seconds components to its lower 31 bits.
51 ///
51 ///
52 /// Panics if the nanoseconds components is not in the expected range.
52 /// Panics if the nanoseconds components is not in the expected range.
53 pub fn new_truncate(seconds: i64, nanoseconds: u32) -> Self {
53 pub fn new_truncate(seconds: i64, nanoseconds: u32) -> Self {
54 assert!(nanoseconds < NSEC_PER_SEC);
54 assert!(nanoseconds < NSEC_PER_SEC);
55 Self {
55 Self {
56 truncated_seconds: seconds as u32 & RANGE_MASK_31BIT,
56 truncated_seconds: seconds as u32 & RANGE_MASK_31BIT,
57 nanoseconds,
57 nanoseconds,
58 }
58 }
59 }
59 }
60
60
61 /// Construct from components. Returns an error if they are not in the
61 /// Construct from components. Returns an error if they are not in the
62 /// expcted range.
62 /// expcted range.
63 pub fn from_already_truncated(
63 pub fn from_already_truncated(
64 truncated_seconds: u32,
64 truncated_seconds: u32,
65 nanoseconds: u32,
65 nanoseconds: u32,
66 ) -> Result<Self, DirstateV2ParseError> {
66 ) -> Result<Self, DirstateV2ParseError> {
67 if truncated_seconds & !RANGE_MASK_31BIT == 0
67 if truncated_seconds & !RANGE_MASK_31BIT == 0
68 && nanoseconds < NSEC_PER_SEC
68 && nanoseconds < NSEC_PER_SEC
69 {
69 {
70 Ok(Self {
70 Ok(Self {
71 truncated_seconds,
71 truncated_seconds,
72 nanoseconds,
72 nanoseconds,
73 })
73 })
74 } else {
74 } else {
75 Err(DirstateV2ParseError)
75 Err(DirstateV2ParseError)
76 }
76 }
77 }
77 }
78
78
79 pub fn for_mtime_of(metadata: &fs::Metadata) -> io::Result<Self> {
79 pub fn for_mtime_of(metadata: &fs::Metadata) -> io::Result<Self> {
80 #[cfg(unix)]
80 #[cfg(unix)]
81 {
81 {
82 use std::os::unix::fs::MetadataExt;
82 use std::os::unix::fs::MetadataExt;
83 let seconds = metadata.mtime();
83 let seconds = metadata.mtime();
84 // i64 -> u32 with value always in the `0 .. NSEC_PER_SEC` range
84 // i64 -> u32 with value always in the `0 .. NSEC_PER_SEC` range
85 let nanoseconds = metadata.mtime_nsec().try_into().unwrap();
85 let nanoseconds = metadata.mtime_nsec().try_into().unwrap();
86 Ok(Self::new_truncate(seconds, nanoseconds))
86 Ok(Self::new_truncate(seconds, nanoseconds))
87 }
87 }
88 #[cfg(not(unix))]
88 #[cfg(not(unix))]
89 {
89 {
90 metadata.modified().map(Self::from)
90 metadata.modified().map(Self::from)
91 }
91 }
92 }
92 }
93
93
94 pub fn to_integer_second(mut self) -> Self {
95 self.nanoseconds = 0;
96 self
97 }
98
99 /// The lower 31 bits of the number of seconds since the epoch.
94 /// The lower 31 bits of the number of seconds since the epoch.
100 pub fn truncated_seconds(&self) -> u32 {
95 pub fn truncated_seconds(&self) -> u32 {
101 self.truncated_seconds
96 self.truncated_seconds
102 }
97 }
103
98
104 /// The sub-second component of this timestamp, in nanoseconds.
99 /// The sub-second component of this timestamp, in nanoseconds.
105 /// Always in the `0 .. 1_000_000_000` range.
100 /// Always in the `0 .. 1_000_000_000` range.
106 ///
101 ///
107 /// This timestamp is after `(seconds, 0)` by this many nanoseconds.
102 /// This timestamp is after `(seconds, 0)` by this many nanoseconds.
108 pub fn nanoseconds(&self) -> u32 {
103 pub fn nanoseconds(&self) -> u32 {
109 self.nanoseconds
104 self.nanoseconds
110 }
105 }
111
106
112 /// Returns whether two timestamps are equal modulo 2**31 seconds.
107 /// Returns whether two timestamps are equal modulo 2**31 seconds.
113 ///
108 ///
114 /// If this returns `true`, the original values converted from `SystemTime`
109 /// If this returns `true`, the original values converted from `SystemTime`
115 /// or given to `new_truncate` were very likely equal. A false positive is
110 /// or given to `new_truncate` were very likely equal. A false positive is
116 /// possible if they were exactly a multiple of 2**31 seconds apart (around
111 /// possible if they were exactly a multiple of 2**31 seconds apart (around
117 /// 68 years). This is deemed very unlikely to happen by chance, especially
112 /// 68 years). This is deemed very unlikely to happen by chance, especially
118 /// on filesystems that support sub-second precision.
113 /// on filesystems that support sub-second precision.
119 ///
114 ///
120 /// If someone is manipulating the modification times of some files to
115 /// If someone is manipulating the modification times of some files to
121 /// intentionally make `hg status` return incorrect results, not truncating
116 /// intentionally make `hg status` return incorrect results, not truncating
122 /// wouldn’t help much since they can set exactly the expected timestamp.
117 /// wouldn’t help much since they can set exactly the expected timestamp.
123 ///
118 ///
124 /// Sub-second precision is ignored if it is zero in either value.
119 /// Sub-second precision is ignored if it is zero in either value.
125 /// Some APIs simply return zero when more precision is not available.
120 /// Some APIs simply return zero when more precision is not available.
126 /// When comparing values from different sources, if only one is truncated
121 /// When comparing values from different sources, if only one is truncated
127 /// in that way, doing a simple comparison would cause many false
122 /// in that way, doing a simple comparison would cause many false
128 /// negatives.
123 /// negatives.
129 pub fn likely_equal(self, other: Self) -> bool {
124 pub fn likely_equal(self, other: Self) -> bool {
130 self.truncated_seconds == other.truncated_seconds
125 self.truncated_seconds == other.truncated_seconds
131 && (self.nanoseconds == other.nanoseconds
126 && (self.nanoseconds == other.nanoseconds
132 || self.nanoseconds == 0
127 || self.nanoseconds == 0
133 || other.nanoseconds == 0)
128 || other.nanoseconds == 0)
134 }
129 }
135
130
136 pub fn likely_equal_to_mtime_of(
131 pub fn likely_equal_to_mtime_of(
137 self,
132 self,
138 metadata: &fs::Metadata,
133 metadata: &fs::Metadata,
139 ) -> io::Result<bool> {
134 ) -> io::Result<bool> {
140 Ok(self.likely_equal(Self::for_mtime_of(metadata)?))
135 Ok(self.likely_equal(Self::for_mtime_of(metadata)?))
141 }
136 }
142 }
137 }
143
138
144 impl From<SystemTime> for TruncatedTimestamp {
139 impl From<SystemTime> for TruncatedTimestamp {
145 fn from(system_time: SystemTime) -> Self {
140 fn from(system_time: SystemTime) -> Self {
146 // On Unix, `SystemTime` is a wrapper for the `timespec` C struct:
141 // On Unix, `SystemTime` is a wrapper for the `timespec` C struct:
147 // https://www.gnu.org/software/libc/manual/html_node/Time-Types.html#index-struct-timespec
142 // https://www.gnu.org/software/libc/manual/html_node/Time-Types.html#index-struct-timespec
148 // We want to effectively access its fields, but the Rust standard
143 // We want to effectively access its fields, but the Rust standard
149 // library does not expose them. The best we can do is:
144 // library does not expose them. The best we can do is:
150 let seconds;
145 let seconds;
151 let nanoseconds;
146 let nanoseconds;
152 match system_time.duration_since(UNIX_EPOCH) {
147 match system_time.duration_since(UNIX_EPOCH) {
153 Ok(duration) => {
148 Ok(duration) => {
154 seconds = duration.as_secs() as i64;
149 seconds = duration.as_secs() as i64;
155 nanoseconds = duration.subsec_nanos();
150 nanoseconds = duration.subsec_nanos();
156 }
151 }
157 Err(error) => {
152 Err(error) => {
158 // `system_time` is before `UNIX_EPOCH`.
153 // `system_time` is before `UNIX_EPOCH`.
159 // We need to undo this algorithm:
154 // We need to undo this algorithm:
160 // https://github.com/rust-lang/rust/blob/6bed1f0bc3cc50c10aab26d5f94b16a00776b8a5/library/std/src/sys/unix/time.rs#L40-L41
155 // https://github.com/rust-lang/rust/blob/6bed1f0bc3cc50c10aab26d5f94b16a00776b8a5/library/std/src/sys/unix/time.rs#L40-L41
161 let negative = error.duration();
156 let negative = error.duration();
162 let negative_secs = negative.as_secs() as i64;
157 let negative_secs = negative.as_secs() as i64;
163 let negative_nanos = negative.subsec_nanos();
158 let negative_nanos = negative.subsec_nanos();
164 if negative_nanos == 0 {
159 if negative_nanos == 0 {
165 seconds = -negative_secs;
160 seconds = -negative_secs;
166 nanoseconds = 0;
161 nanoseconds = 0;
167 } else {
162 } else {
168 // For example if `system_time` was 4.3 seconds before
163 // For example if `system_time` was 4.3 seconds before
169 // the Unix epoch we get a Duration that represents
164 // the Unix epoch we get a Duration that represents
170 // `(-4, -0.3)` but we want `(-5, +0.7)`:
165 // `(-4, -0.3)` but we want `(-5, +0.7)`:
171 seconds = -1 - negative_secs;
166 seconds = -1 - negative_secs;
172 nanoseconds = NSEC_PER_SEC - negative_nanos;
167 nanoseconds = NSEC_PER_SEC - negative_nanos;
173 }
168 }
174 }
169 }
175 };
170 };
176 Self::new_truncate(seconds, nanoseconds)
171 Self::new_truncate(seconds, nanoseconds)
177 }
172 }
178 }
173 }
179
174
180 const NSEC_PER_SEC: u32 = 1_000_000_000;
175 const NSEC_PER_SEC: u32 = 1_000_000_000;
181 const RANGE_MASK_31BIT: u32 = 0x7FFF_FFFF;
176 const RANGE_MASK_31BIT: u32 = 0x7FFF_FFFF;
182
177
183 pub const MTIME_UNSET: i32 = -1;
178 pub const MTIME_UNSET: i32 = -1;
184
179
185 /// A `DirstateEntry` with a size of `-2` means that it was merged from the
180 /// A `DirstateEntry` with a size of `-2` means that it was merged from the
186 /// other parent. This allows revert to pick the right status back during a
181 /// other parent. This allows revert to pick the right status back during a
187 /// merge.
182 /// merge.
188 pub const SIZE_FROM_OTHER_PARENT: i32 = -2;
183 pub const SIZE_FROM_OTHER_PARENT: i32 = -2;
189 /// A special value used for internal representation of special case in
184 /// A special value used for internal representation of special case in
190 /// dirstate v1 format.
185 /// dirstate v1 format.
191 pub const SIZE_NON_NORMAL: i32 = -1;
186 pub const SIZE_NON_NORMAL: i32 = -1;
192
187
193 impl DirstateEntry {
188 impl DirstateEntry {
194 pub fn from_v2_data(
189 pub fn from_v2_data(
195 wdir_tracked: bool,
190 wdir_tracked: bool,
196 p1_tracked: bool,
191 p1_tracked: bool,
197 p2_info: bool,
192 p2_info: bool,
198 mode_size: Option<(u32, u32)>,
193 mode_size: Option<(u32, u32)>,
199 mtime: Option<TruncatedTimestamp>,
194 mtime: Option<TruncatedTimestamp>,
200 fallback_exec: Option<bool>,
195 fallback_exec: Option<bool>,
201 fallback_symlink: Option<bool>,
196 fallback_symlink: Option<bool>,
202 ) -> Self {
197 ) -> Self {
203 if let Some((mode, size)) = mode_size {
198 if let Some((mode, size)) = mode_size {
204 // TODO: return an error for out of range values?
199 // TODO: return an error for out of range values?
205 assert!(mode & !RANGE_MASK_31BIT == 0);
200 assert!(mode & !RANGE_MASK_31BIT == 0);
206 assert!(size & !RANGE_MASK_31BIT == 0);
201 assert!(size & !RANGE_MASK_31BIT == 0);
207 }
202 }
208 let mut flags = Flags::empty();
203 let mut flags = Flags::empty();
209 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
204 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
210 flags.set(Flags::P1_TRACKED, p1_tracked);
205 flags.set(Flags::P1_TRACKED, p1_tracked);
211 flags.set(Flags::P2_INFO, p2_info);
206 flags.set(Flags::P2_INFO, p2_info);
212 if let Some(exec) = fallback_exec {
207 if let Some(exec) = fallback_exec {
213 flags.insert(Flags::HAS_FALLBACK_EXEC);
208 flags.insert(Flags::HAS_FALLBACK_EXEC);
214 if exec {
209 if exec {
215 flags.insert(Flags::FALLBACK_EXEC);
210 flags.insert(Flags::FALLBACK_EXEC);
216 }
211 }
217 }
212 }
218 if let Some(exec) = fallback_symlink {
213 if let Some(exec) = fallback_symlink {
219 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
214 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
220 if exec {
215 if exec {
221 flags.insert(Flags::FALLBACK_SYMLINK);
216 flags.insert(Flags::FALLBACK_SYMLINK);
222 }
217 }
223 }
218 }
224 Self {
219 Self {
225 flags,
220 flags,
226 mode_size,
221 mode_size,
227 mtime,
222 mtime,
228 }
223 }
229 }
224 }
230
225
231 pub fn from_v1_data(
226 pub fn from_v1_data(
232 state: EntryState,
227 state: EntryState,
233 mode: i32,
228 mode: i32,
234 size: i32,
229 size: i32,
235 mtime: i32,
230 mtime: i32,
236 ) -> Self {
231 ) -> Self {
237 match state {
232 match state {
238 EntryState::Normal => {
233 EntryState::Normal => {
239 if size == SIZE_FROM_OTHER_PARENT {
234 if size == SIZE_FROM_OTHER_PARENT {
240 Self {
235 Self {
241 // might be missing P1_TRACKED
236 // might be missing P1_TRACKED
242 flags: Flags::WDIR_TRACKED | Flags::P2_INFO,
237 flags: Flags::WDIR_TRACKED | Flags::P2_INFO,
243 mode_size: None,
238 mode_size: None,
244 mtime: None,
239 mtime: None,
245 }
240 }
246 } else if size == SIZE_NON_NORMAL {
241 } else if size == SIZE_NON_NORMAL {
247 Self {
242 Self {
248 flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
243 flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
249 mode_size: None,
244 mode_size: None,
250 mtime: None,
245 mtime: None,
251 }
246 }
252 } else if mtime == MTIME_UNSET {
247 } else if mtime == MTIME_UNSET {
253 // TODO: return an error for negative values?
248 // TODO: return an error for negative values?
254 let mode = u32::try_from(mode).unwrap();
249 let mode = u32::try_from(mode).unwrap();
255 let size = u32::try_from(size).unwrap();
250 let size = u32::try_from(size).unwrap();
256 Self {
251 Self {
257 flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
252 flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
258 mode_size: Some((mode, size)),
253 mode_size: Some((mode, size)),
259 mtime: None,
254 mtime: None,
260 }
255 }
261 } else {
256 } else {
262 // TODO: return an error for negative values?
257 // TODO: return an error for negative values?
263 let mode = u32::try_from(mode).unwrap();
258 let mode = u32::try_from(mode).unwrap();
264 let size = u32::try_from(size).unwrap();
259 let size = u32::try_from(size).unwrap();
265 let mtime = u32::try_from(mtime).unwrap();
260 let mtime = u32::try_from(mtime).unwrap();
266 let mtime =
261 let mtime =
267 TruncatedTimestamp::from_already_truncated(mtime, 0)
262 TruncatedTimestamp::from_already_truncated(mtime, 0)
268 .unwrap();
263 .unwrap();
269 Self {
264 Self {
270 flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
265 flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
271 mode_size: Some((mode, size)),
266 mode_size: Some((mode, size)),
272 mtime: Some(mtime),
267 mtime: Some(mtime),
273 }
268 }
274 }
269 }
275 }
270 }
276 EntryState::Added => Self {
271 EntryState::Added => Self {
277 flags: Flags::WDIR_TRACKED,
272 flags: Flags::WDIR_TRACKED,
278 mode_size: None,
273 mode_size: None,
279 mtime: None,
274 mtime: None,
280 },
275 },
281 EntryState::Removed => Self {
276 EntryState::Removed => Self {
282 flags: if size == SIZE_NON_NORMAL {
277 flags: if size == SIZE_NON_NORMAL {
283 Flags::P1_TRACKED | Flags::P2_INFO
278 Flags::P1_TRACKED | Flags::P2_INFO
284 } else if size == SIZE_FROM_OTHER_PARENT {
279 } else if size == SIZE_FROM_OTHER_PARENT {
285 // We don’t know if P1_TRACKED should be set (file history)
280 // We don’t know if P1_TRACKED should be set (file history)
286 Flags::P2_INFO
281 Flags::P2_INFO
287 } else {
282 } else {
288 Flags::P1_TRACKED
283 Flags::P1_TRACKED
289 },
284 },
290 mode_size: None,
285 mode_size: None,
291 mtime: None,
286 mtime: None,
292 },
287 },
293 EntryState::Merged => Self {
288 EntryState::Merged => Self {
294 flags: Flags::WDIR_TRACKED
289 flags: Flags::WDIR_TRACKED
295 | Flags::P1_TRACKED // might not be true because of rename ?
290 | Flags::P1_TRACKED // might not be true because of rename ?
296 | Flags::P2_INFO, // might not be true because of rename ?
291 | Flags::P2_INFO, // might not be true because of rename ?
297 mode_size: None,
292 mode_size: None,
298 mtime: None,
293 mtime: None,
299 },
294 },
300 }
295 }
301 }
296 }
302
297
303 /// Creates a new entry in "removed" state.
298 /// Creates a new entry in "removed" state.
304 ///
299 ///
305 /// `size` is expected to be zero, `SIZE_NON_NORMAL`, or
300 /// `size` is expected to be zero, `SIZE_NON_NORMAL`, or
306 /// `SIZE_FROM_OTHER_PARENT`
301 /// `SIZE_FROM_OTHER_PARENT`
307 pub fn new_removed(size: i32) -> Self {
302 pub fn new_removed(size: i32) -> Self {
308 Self::from_v1_data(EntryState::Removed, 0, size, 0)
303 Self::from_v1_data(EntryState::Removed, 0, size, 0)
309 }
304 }
310
305
311 pub fn tracked(&self) -> bool {
306 pub fn tracked(&self) -> bool {
312 self.flags.contains(Flags::WDIR_TRACKED)
307 self.flags.contains(Flags::WDIR_TRACKED)
313 }
308 }
314
309
315 pub fn p1_tracked(&self) -> bool {
310 pub fn p1_tracked(&self) -> bool {
316 self.flags.contains(Flags::P1_TRACKED)
311 self.flags.contains(Flags::P1_TRACKED)
317 }
312 }
318
313
319 fn in_either_parent(&self) -> bool {
314 fn in_either_parent(&self) -> bool {
320 self.flags.intersects(Flags::P1_TRACKED | Flags::P2_INFO)
315 self.flags.intersects(Flags::P1_TRACKED | Flags::P2_INFO)
321 }
316 }
322
317
323 pub fn removed(&self) -> bool {
318 pub fn removed(&self) -> bool {
324 self.in_either_parent() && !self.flags.contains(Flags::WDIR_TRACKED)
319 self.in_either_parent() && !self.flags.contains(Flags::WDIR_TRACKED)
325 }
320 }
326
321
327 pub fn p2_info(&self) -> bool {
322 pub fn p2_info(&self) -> bool {
328 self.flags.contains(Flags::WDIR_TRACKED | Flags::P2_INFO)
323 self.flags.contains(Flags::WDIR_TRACKED | Flags::P2_INFO)
329 }
324 }
330
325
331 pub fn added(&self) -> bool {
326 pub fn added(&self) -> bool {
332 self.flags.contains(Flags::WDIR_TRACKED) && !self.in_either_parent()
327 self.flags.contains(Flags::WDIR_TRACKED) && !self.in_either_parent()
333 }
328 }
334
329
335 pub fn maybe_clean(&self) -> bool {
330 pub fn maybe_clean(&self) -> bool {
336 if !self.flags.contains(Flags::WDIR_TRACKED) {
331 if !self.flags.contains(Flags::WDIR_TRACKED) {
337 false
332 false
338 } else if !self.flags.contains(Flags::P1_TRACKED) {
333 } else if !self.flags.contains(Flags::P1_TRACKED) {
339 false
334 false
340 } else if self.flags.contains(Flags::P2_INFO) {
335 } else if self.flags.contains(Flags::P2_INFO) {
341 false
336 false
342 } else {
337 } else {
343 true
338 true
344 }
339 }
345 }
340 }
346
341
347 pub fn any_tracked(&self) -> bool {
342 pub fn any_tracked(&self) -> bool {
348 self.flags.intersects(
343 self.flags.intersects(
349 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
344 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
350 )
345 )
351 }
346 }
352
347
353 /// Returns `(wdir_tracked, p1_tracked, p2_info, mode_size, mtime)`
348 /// Returns `(wdir_tracked, p1_tracked, p2_info, mode_size, mtime)`
354 pub(crate) fn v2_data(
349 pub(crate) fn v2_data(
355 &self,
350 &self,
356 ) -> (
351 ) -> (
357 bool,
352 bool,
358 bool,
353 bool,
359 bool,
354 bool,
360 Option<(u32, u32)>,
355 Option<(u32, u32)>,
361 Option<TruncatedTimestamp>,
356 Option<TruncatedTimestamp>,
362 Option<bool>,
357 Option<bool>,
363 Option<bool>,
358 Option<bool>,
364 ) {
359 ) {
365 if !self.any_tracked() {
360 if !self.any_tracked() {
366 // TODO: return an Option instead?
361 // TODO: return an Option instead?
367 panic!("Accessing v1_state of an untracked DirstateEntry")
362 panic!("Accessing v1_state of an untracked DirstateEntry")
368 }
363 }
369 let wdir_tracked = self.flags.contains(Flags::WDIR_TRACKED);
364 let wdir_tracked = self.flags.contains(Flags::WDIR_TRACKED);
370 let p1_tracked = self.flags.contains(Flags::P1_TRACKED);
365 let p1_tracked = self.flags.contains(Flags::P1_TRACKED);
371 let p2_info = self.flags.contains(Flags::P2_INFO);
366 let p2_info = self.flags.contains(Flags::P2_INFO);
372 let mode_size = self.mode_size;
367 let mode_size = self.mode_size;
373 let mtime = self.mtime;
368 let mtime = self.mtime;
374 (
369 (
375 wdir_tracked,
370 wdir_tracked,
376 p1_tracked,
371 p1_tracked,
377 p2_info,
372 p2_info,
378 mode_size,
373 mode_size,
379 mtime,
374 mtime,
380 self.get_fallback_exec(),
375 self.get_fallback_exec(),
381 self.get_fallback_symlink(),
376 self.get_fallback_symlink(),
382 )
377 )
383 }
378 }
384
379
385 fn v1_state(&self) -> EntryState {
380 fn v1_state(&self) -> EntryState {
386 if !self.any_tracked() {
381 if !self.any_tracked() {
387 // TODO: return an Option instead?
382 // TODO: return an Option instead?
388 panic!("Accessing v1_state of an untracked DirstateEntry")
383 panic!("Accessing v1_state of an untracked DirstateEntry")
389 }
384 }
390 if self.removed() {
385 if self.removed() {
391 EntryState::Removed
386 EntryState::Removed
392 } else if self
387 } else if self
393 .flags
388 .flags
394 .contains(Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO)
389 .contains(Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO)
395 {
390 {
396 EntryState::Merged
391 EntryState::Merged
397 } else if self.added() {
392 } else if self.added() {
398 EntryState::Added
393 EntryState::Added
399 } else {
394 } else {
400 EntryState::Normal
395 EntryState::Normal
401 }
396 }
402 }
397 }
403
398
404 fn v1_mode(&self) -> i32 {
399 fn v1_mode(&self) -> i32 {
405 if let Some((mode, _size)) = self.mode_size {
400 if let Some((mode, _size)) = self.mode_size {
406 i32::try_from(mode).unwrap()
401 i32::try_from(mode).unwrap()
407 } else {
402 } else {
408 0
403 0
409 }
404 }
410 }
405 }
411
406
412 fn v1_size(&self) -> i32 {
407 fn v1_size(&self) -> i32 {
413 if !self.any_tracked() {
408 if !self.any_tracked() {
414 // TODO: return an Option instead?
409 // TODO: return an Option instead?
415 panic!("Accessing v1_size of an untracked DirstateEntry")
410 panic!("Accessing v1_size of an untracked DirstateEntry")
416 }
411 }
417 if self.removed()
412 if self.removed()
418 && self.flags.contains(Flags::P1_TRACKED | Flags::P2_INFO)
413 && self.flags.contains(Flags::P1_TRACKED | Flags::P2_INFO)
419 {
414 {
420 SIZE_NON_NORMAL
415 SIZE_NON_NORMAL
421 } else if self.flags.contains(Flags::P2_INFO) {
416 } else if self.flags.contains(Flags::P2_INFO) {
422 SIZE_FROM_OTHER_PARENT
417 SIZE_FROM_OTHER_PARENT
423 } else if self.removed() {
418 } else if self.removed() {
424 0
419 0
425 } else if self.added() {
420 } else if self.added() {
426 SIZE_NON_NORMAL
421 SIZE_NON_NORMAL
427 } else if let Some((_mode, size)) = self.mode_size {
422 } else if let Some((_mode, size)) = self.mode_size {
428 i32::try_from(size).unwrap()
423 i32::try_from(size).unwrap()
429 } else {
424 } else {
430 SIZE_NON_NORMAL
425 SIZE_NON_NORMAL
431 }
426 }
432 }
427 }
433
428
434 fn v1_mtime(&self) -> i32 {
429 fn v1_mtime(&self) -> i32 {
435 if !self.any_tracked() {
430 if !self.any_tracked() {
436 // TODO: return an Option instead?
431 // TODO: return an Option instead?
437 panic!("Accessing v1_mtime of an untracked DirstateEntry")
432 panic!("Accessing v1_mtime of an untracked DirstateEntry")
438 }
433 }
439 if self.removed() {
434 if self.removed() {
440 0
435 0
441 } else if self.flags.contains(Flags::P2_INFO) {
436 } else if self.flags.contains(Flags::P2_INFO) {
442 MTIME_UNSET
437 MTIME_UNSET
443 } else if !self.flags.contains(Flags::P1_TRACKED) {
438 } else if !self.flags.contains(Flags::P1_TRACKED) {
444 MTIME_UNSET
439 MTIME_UNSET
445 } else if let Some(mtime) = self.mtime {
440 } else if let Some(mtime) = self.mtime {
446 i32::try_from(mtime.truncated_seconds()).unwrap()
441 i32::try_from(mtime.truncated_seconds()).unwrap()
447 } else {
442 } else {
448 MTIME_UNSET
443 MTIME_UNSET
449 }
444 }
450 }
445 }
451
446
452 // TODO: return `Option<EntryState>`? None when `!self.any_tracked`
447 // TODO: return `Option<EntryState>`? None when `!self.any_tracked`
453 pub fn state(&self) -> EntryState {
448 pub fn state(&self) -> EntryState {
454 self.v1_state()
449 self.v1_state()
455 }
450 }
456
451
457 // TODO: return Option?
452 // TODO: return Option?
458 pub fn mode(&self) -> i32 {
453 pub fn mode(&self) -> i32 {
459 self.v1_mode()
454 self.v1_mode()
460 }
455 }
461
456
462 // TODO: return Option?
457 // TODO: return Option?
463 pub fn size(&self) -> i32 {
458 pub fn size(&self) -> i32 {
464 self.v1_size()
459 self.v1_size()
465 }
460 }
466
461
467 // TODO: return Option?
462 // TODO: return Option?
468 pub fn mtime(&self) -> i32 {
463 pub fn mtime(&self) -> i32 {
469 self.v1_mtime()
464 self.v1_mtime()
470 }
465 }
471
466
472 pub fn get_fallback_exec(&self) -> Option<bool> {
467 pub fn get_fallback_exec(&self) -> Option<bool> {
473 if self.flags.contains(Flags::HAS_FALLBACK_EXEC) {
468 if self.flags.contains(Flags::HAS_FALLBACK_EXEC) {
474 Some(self.flags.contains(Flags::FALLBACK_EXEC))
469 Some(self.flags.contains(Flags::FALLBACK_EXEC))
475 } else {
470 } else {
476 None
471 None
477 }
472 }
478 }
473 }
479
474
480 pub fn set_fallback_exec(&mut self, value: Option<bool>) {
475 pub fn set_fallback_exec(&mut self, value: Option<bool>) {
481 match value {
476 match value {
482 None => {
477 None => {
483 self.flags.remove(Flags::HAS_FALLBACK_EXEC);
478 self.flags.remove(Flags::HAS_FALLBACK_EXEC);
484 self.flags.remove(Flags::FALLBACK_EXEC);
479 self.flags.remove(Flags::FALLBACK_EXEC);
485 }
480 }
486 Some(exec) => {
481 Some(exec) => {
487 self.flags.insert(Flags::HAS_FALLBACK_EXEC);
482 self.flags.insert(Flags::HAS_FALLBACK_EXEC);
488 if exec {
483 if exec {
489 self.flags.insert(Flags::FALLBACK_EXEC);
484 self.flags.insert(Flags::FALLBACK_EXEC);
490 }
485 }
491 }
486 }
492 }
487 }
493 }
488 }
494
489
495 pub fn get_fallback_symlink(&self) -> Option<bool> {
490 pub fn get_fallback_symlink(&self) -> Option<bool> {
496 if self.flags.contains(Flags::HAS_FALLBACK_SYMLINK) {
491 if self.flags.contains(Flags::HAS_FALLBACK_SYMLINK) {
497 Some(self.flags.contains(Flags::FALLBACK_SYMLINK))
492 Some(self.flags.contains(Flags::FALLBACK_SYMLINK))
498 } else {
493 } else {
499 None
494 None
500 }
495 }
501 }
496 }
502
497
503 pub fn set_fallback_symlink(&mut self, value: Option<bool>) {
498 pub fn set_fallback_symlink(&mut self, value: Option<bool>) {
504 match value {
499 match value {
505 None => {
500 None => {
506 self.flags.remove(Flags::HAS_FALLBACK_SYMLINK);
501 self.flags.remove(Flags::HAS_FALLBACK_SYMLINK);
507 self.flags.remove(Flags::FALLBACK_SYMLINK);
502 self.flags.remove(Flags::FALLBACK_SYMLINK);
508 }
503 }
509 Some(symlink) => {
504 Some(symlink) => {
510 self.flags.insert(Flags::HAS_FALLBACK_SYMLINK);
505 self.flags.insert(Flags::HAS_FALLBACK_SYMLINK);
511 if symlink {
506 if symlink {
512 self.flags.insert(Flags::FALLBACK_SYMLINK);
507 self.flags.insert(Flags::FALLBACK_SYMLINK);
513 }
508 }
514 }
509 }
515 }
510 }
516 }
511 }
517
512
518 pub fn truncated_mtime(&self) -> Option<TruncatedTimestamp> {
513 pub fn truncated_mtime(&self) -> Option<TruncatedTimestamp> {
519 self.mtime
514 self.mtime
520 }
515 }
521
516
522 pub fn drop_merge_data(&mut self) {
517 pub fn drop_merge_data(&mut self) {
523 if self.flags.contains(Flags::P2_INFO) {
518 if self.flags.contains(Flags::P2_INFO) {
524 self.flags.remove(Flags::P2_INFO);
519 self.flags.remove(Flags::P2_INFO);
525 self.mode_size = None;
520 self.mode_size = None;
526 self.mtime = None;
521 self.mtime = None;
527 }
522 }
528 }
523 }
529
524
530 pub fn set_possibly_dirty(&mut self) {
525 pub fn set_possibly_dirty(&mut self) {
531 self.mtime = None
526 self.mtime = None
532 }
527 }
533
528
534 pub fn set_clean(
529 pub fn set_clean(
535 &mut self,
530 &mut self,
536 mode: u32,
531 mode: u32,
537 size: u32,
532 size: u32,
538 mtime: TruncatedTimestamp,
533 mtime: TruncatedTimestamp,
539 ) {
534 ) {
540 let size = size & RANGE_MASK_31BIT;
535 let size = size & RANGE_MASK_31BIT;
541 self.flags.insert(Flags::WDIR_TRACKED | Flags::P1_TRACKED);
536 self.flags.insert(Flags::WDIR_TRACKED | Flags::P1_TRACKED);
542 self.mode_size = Some((mode, size));
537 self.mode_size = Some((mode, size));
543 self.mtime = Some(mtime);
538 self.mtime = Some(mtime);
544 }
539 }
545
540
546 pub fn set_tracked(&mut self) {
541 pub fn set_tracked(&mut self) {
547 self.flags.insert(Flags::WDIR_TRACKED);
542 self.flags.insert(Flags::WDIR_TRACKED);
548 // `set_tracked` is replacing various `normallookup` call. So we mark
543 // `set_tracked` is replacing various `normallookup` call. So we mark
549 // the files as needing lookup
544 // the files as needing lookup
550 //
545 //
551 // Consider dropping this in the future in favor of something less
546 // Consider dropping this in the future in favor of something less
552 // broad.
547 // broad.
553 self.mtime = None;
548 self.mtime = None;
554 }
549 }
555
550
556 pub fn set_untracked(&mut self) {
551 pub fn set_untracked(&mut self) {
557 self.flags.remove(Flags::WDIR_TRACKED);
552 self.flags.remove(Flags::WDIR_TRACKED);
558 self.mode_size = None;
553 self.mode_size = None;
559 self.mtime = None;
554 self.mtime = None;
560 }
555 }
561
556
562 /// Returns `(state, mode, size, mtime)` for the puprose of serialization
557 /// Returns `(state, mode, size, mtime)` for the puprose of serialization
563 /// in the dirstate-v1 format.
558 /// in the dirstate-v1 format.
564 ///
559 ///
565 /// This includes marker values such as `mtime == -1`. In the future we may
560 /// This includes marker values such as `mtime == -1`. In the future we may
566 /// want to not represent these cases that way in memory, but serialization
561 /// want to not represent these cases that way in memory, but serialization
567 /// will need to keep the same format.
562 /// will need to keep the same format.
568 pub fn v1_data(&self) -> (u8, i32, i32, i32) {
563 pub fn v1_data(&self) -> (u8, i32, i32, i32) {
569 (
564 (
570 self.v1_state().into(),
565 self.v1_state().into(),
571 self.v1_mode(),
566 self.v1_mode(),
572 self.v1_size(),
567 self.v1_size(),
573 self.v1_mtime(),
568 self.v1_mtime(),
574 )
569 )
575 }
570 }
576
571
577 pub(crate) fn is_from_other_parent(&self) -> bool {
572 pub(crate) fn is_from_other_parent(&self) -> bool {
578 self.state() == EntryState::Normal
573 self.state() == EntryState::Normal
579 && self.size() == SIZE_FROM_OTHER_PARENT
574 && self.size() == SIZE_FROM_OTHER_PARENT
580 }
575 }
581
576
582 // TODO: other platforms
577 // TODO: other platforms
583 #[cfg(unix)]
578 #[cfg(unix)]
584 pub fn mode_changed(
579 pub fn mode_changed(
585 &self,
580 &self,
586 filesystem_metadata: &std::fs::Metadata,
581 filesystem_metadata: &std::fs::Metadata,
587 ) -> bool {
582 ) -> bool {
588 use std::os::unix::fs::MetadataExt;
583 use std::os::unix::fs::MetadataExt;
589 const EXEC_BIT_MASK: u32 = 0o100;
584 const EXEC_BIT_MASK: u32 = 0o100;
590 let dirstate_exec_bit = (self.mode() as u32) & EXEC_BIT_MASK;
585 let dirstate_exec_bit = (self.mode() as u32) & EXEC_BIT_MASK;
591 let fs_exec_bit = filesystem_metadata.mode() & EXEC_BIT_MASK;
586 let fs_exec_bit = filesystem_metadata.mode() & EXEC_BIT_MASK;
592 dirstate_exec_bit != fs_exec_bit
587 dirstate_exec_bit != fs_exec_bit
593 }
588 }
594
589
595 /// Returns a `(state, mode, size, mtime)` tuple as for
590 /// Returns a `(state, mode, size, mtime)` tuple as for
596 /// `DirstateMapMethods::debug_iter`.
591 /// `DirstateMapMethods::debug_iter`.
597 pub fn debug_tuple(&self) -> (u8, i32, i32, i32) {
592 pub fn debug_tuple(&self) -> (u8, i32, i32, i32) {
598 (self.state().into(), self.mode(), self.size(), self.mtime())
593 (self.state().into(), self.mode(), self.size(), self.mtime())
599 }
594 }
600
595
601 /// True if the stored mtime would be ambiguous with the current time
596 /// True if the stored mtime would be ambiguous with the current time
602 pub fn need_delay(&self, now: TruncatedTimestamp) -> bool {
597 pub fn need_delay(&self, now: TruncatedTimestamp) -> bool {
603 if let Some(mtime) = self.mtime {
598 if let Some(mtime) = self.mtime {
604 self.state() == EntryState::Normal
599 self.state() == EntryState::Normal
605 && mtime.truncated_seconds() == now.truncated_seconds()
600 && mtime.truncated_seconds() == now.truncated_seconds()
606 } else {
601 } else {
607 false
602 false
608 }
603 }
609 }
604 }
610 }
605 }
611
606
612 impl EntryState {
607 impl EntryState {
613 pub fn is_tracked(self) -> bool {
608 pub fn is_tracked(self) -> bool {
614 use EntryState::*;
609 use EntryState::*;
615 match self {
610 match self {
616 Normal | Added | Merged => true,
611 Normal | Added | Merged => true,
617 Removed => false,
612 Removed => false,
618 }
613 }
619 }
614 }
620 }
615 }
621
616
622 impl TryFrom<u8> for EntryState {
617 impl TryFrom<u8> for EntryState {
623 type Error = HgError;
618 type Error = HgError;
624
619
625 fn try_from(value: u8) -> Result<Self, Self::Error> {
620 fn try_from(value: u8) -> Result<Self, Self::Error> {
626 match value {
621 match value {
627 b'n' => Ok(EntryState::Normal),
622 b'n' => Ok(EntryState::Normal),
628 b'a' => Ok(EntryState::Added),
623 b'a' => Ok(EntryState::Added),
629 b'r' => Ok(EntryState::Removed),
624 b'r' => Ok(EntryState::Removed),
630 b'm' => Ok(EntryState::Merged),
625 b'm' => Ok(EntryState::Merged),
631 _ => Err(HgError::CorruptedRepository(format!(
626 _ => Err(HgError::CorruptedRepository(format!(
632 "Incorrect dirstate entry state {}",
627 "Incorrect dirstate entry state {}",
633 value
628 value
634 ))),
629 ))),
635 }
630 }
636 }
631 }
637 }
632 }
638
633
639 impl Into<u8> for EntryState {
634 impl Into<u8> for EntryState {
640 fn into(self) -> u8 {
635 fn into(self) -> u8 {
641 match self {
636 match self {
642 EntryState::Normal => b'n',
637 EntryState::Normal => b'n',
643 EntryState::Added => b'a',
638 EntryState::Added => b'a',
644 EntryState::Removed => b'r',
639 EntryState::Removed => b'r',
645 EntryState::Merged => b'm',
640 EntryState::Merged => b'm',
646 }
641 }
647 }
642 }
648 }
643 }
@@ -1,782 +1,774
1 //! The "version 2" disk representation of the dirstate
1 //! The "version 2" disk representation of the dirstate
2 //!
2 //!
3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
4
4
5 use crate::dirstate::TruncatedTimestamp;
5 use crate::dirstate::TruncatedTimestamp;
6 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
6 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
7 use crate::dirstate_tree::path_with_basename::WithBasename;
7 use crate::dirstate_tree::path_with_basename::WithBasename;
8 use crate::errors::HgError;
8 use crate::errors::HgError;
9 use crate::utils::hg_path::HgPath;
9 use crate::utils::hg_path::HgPath;
10 use crate::DirstateEntry;
10 use crate::DirstateEntry;
11 use crate::DirstateError;
11 use crate::DirstateError;
12 use crate::DirstateParents;
12 use crate::DirstateParents;
13 use bitflags::bitflags;
13 use bitflags::bitflags;
14 use bytes_cast::unaligned::{U16Be, U32Be};
14 use bytes_cast::unaligned::{U16Be, U32Be};
15 use bytes_cast::BytesCast;
15 use bytes_cast::BytesCast;
16 use format_bytes::format_bytes;
16 use format_bytes::format_bytes;
17 use std::borrow::Cow;
17 use std::borrow::Cow;
18 use std::convert::{TryFrom, TryInto};
18 use std::convert::{TryFrom, TryInto};
19
19
20 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
20 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
21 /// This a redundant sanity check more than an actual "magic number" since
21 /// This a redundant sanity check more than an actual "magic number" since
22 /// `.hg/requires` already governs which format should be used.
22 /// `.hg/requires` already governs which format should be used.
23 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
23 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
24
24
25 /// Keep space for 256-bit hashes
25 /// Keep space for 256-bit hashes
26 const STORED_NODE_ID_BYTES: usize = 32;
26 const STORED_NODE_ID_BYTES: usize = 32;
27
27
28 /// … even though only 160 bits are used for now, with SHA-1
28 /// … even though only 160 bits are used for now, with SHA-1
29 const USED_NODE_ID_BYTES: usize = 20;
29 const USED_NODE_ID_BYTES: usize = 20;
30
30
31 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
31 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
32 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
32 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
33
33
34 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
34 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
35 const TREE_METADATA_SIZE: usize = 44;
35 const TREE_METADATA_SIZE: usize = 44;
36 const NODE_SIZE: usize = 44;
36 const NODE_SIZE: usize = 44;
37
37
38 /// Make sure that size-affecting changes are made knowingly
38 /// Make sure that size-affecting changes are made knowingly
39 #[allow(unused)]
39 #[allow(unused)]
40 fn static_assert_size_of() {
40 fn static_assert_size_of() {
41 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
41 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
42 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
42 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
43 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
43 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
44 }
44 }
45
45
46 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
46 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
47 #[derive(BytesCast)]
47 #[derive(BytesCast)]
48 #[repr(C)]
48 #[repr(C)]
49 struct DocketHeader {
49 struct DocketHeader {
50 marker: [u8; V2_FORMAT_MARKER.len()],
50 marker: [u8; V2_FORMAT_MARKER.len()],
51 parent_1: [u8; STORED_NODE_ID_BYTES],
51 parent_1: [u8; STORED_NODE_ID_BYTES],
52 parent_2: [u8; STORED_NODE_ID_BYTES],
52 parent_2: [u8; STORED_NODE_ID_BYTES],
53
53
54 metadata: TreeMetadata,
54 metadata: TreeMetadata,
55
55
56 /// Counted in bytes
56 /// Counted in bytes
57 data_size: Size,
57 data_size: Size,
58
58
59 uuid_size: u8,
59 uuid_size: u8,
60 }
60 }
61
61
62 pub struct Docket<'on_disk> {
62 pub struct Docket<'on_disk> {
63 header: &'on_disk DocketHeader,
63 header: &'on_disk DocketHeader,
64 uuid: &'on_disk [u8],
64 uuid: &'on_disk [u8],
65 }
65 }
66
66
67 /// Fields are documented in the *Tree metadata in the docket file*
67 /// Fields are documented in the *Tree metadata in the docket file*
68 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
68 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
69 #[derive(BytesCast)]
69 #[derive(BytesCast)]
70 #[repr(C)]
70 #[repr(C)]
71 struct TreeMetadata {
71 struct TreeMetadata {
72 root_nodes: ChildNodes,
72 root_nodes: ChildNodes,
73 nodes_with_entry_count: Size,
73 nodes_with_entry_count: Size,
74 nodes_with_copy_source_count: Size,
74 nodes_with_copy_source_count: Size,
75 unreachable_bytes: Size,
75 unreachable_bytes: Size,
76 unused: [u8; 4],
76 unused: [u8; 4],
77
77
78 /// See *Optional hash of ignore patterns* section of
78 /// See *Optional hash of ignore patterns* section of
79 /// `mercurial/helptext/internals/dirstate-v2.txt`
79 /// `mercurial/helptext/internals/dirstate-v2.txt`
80 ignore_patterns_hash: IgnorePatternsHash,
80 ignore_patterns_hash: IgnorePatternsHash,
81 }
81 }
82
82
83 /// Fields are documented in the *The data file format*
83 /// Fields are documented in the *The data file format*
84 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
84 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
85 #[derive(BytesCast)]
85 #[derive(BytesCast)]
86 #[repr(C)]
86 #[repr(C)]
87 pub(super) struct Node {
87 pub(super) struct Node {
88 full_path: PathSlice,
88 full_path: PathSlice,
89
89
90 /// In bytes from `self.full_path.start`
90 /// In bytes from `self.full_path.start`
91 base_name_start: PathSize,
91 base_name_start: PathSize,
92
92
93 copy_source: OptPathSlice,
93 copy_source: OptPathSlice,
94 children: ChildNodes,
94 children: ChildNodes,
95 pub(super) descendants_with_entry_count: Size,
95 pub(super) descendants_with_entry_count: Size,
96 pub(super) tracked_descendants_count: Size,
96 pub(super) tracked_descendants_count: Size,
97 flags: U16Be,
97 flags: U16Be,
98 size: U32Be,
98 size: U32Be,
99 mtime: PackedTruncatedTimestamp,
99 mtime: PackedTruncatedTimestamp,
100 }
100 }
101
101
102 bitflags! {
102 bitflags! {
103 #[repr(C)]
103 #[repr(C)]
104 struct Flags: u16 {
104 struct Flags: u16 {
105 const WDIR_TRACKED = 1 << 0;
105 const WDIR_TRACKED = 1 << 0;
106 const P1_TRACKED = 1 << 1;
106 const P1_TRACKED = 1 << 1;
107 const P2_INFO = 1 << 2;
107 const P2_INFO = 1 << 2;
108 const HAS_MODE_AND_SIZE = 1 << 3;
108 const HAS_MODE_AND_SIZE = 1 << 3;
109 const HAS_FILE_MTIME = 1 << 4;
109 const HAS_FILE_MTIME = 1 << 4;
110 const HAS_DIRECTORY_MTIME = 1 << 5;
110 const HAS_DIRECTORY_MTIME = 1 << 5;
111 const MODE_EXEC_PERM = 1 << 6;
111 const MODE_EXEC_PERM = 1 << 6;
112 const MODE_IS_SYMLINK = 1 << 7;
112 const MODE_IS_SYMLINK = 1 << 7;
113 const EXPECTED_STATE_IS_MODIFIED = 1 << 8;
113 const EXPECTED_STATE_IS_MODIFIED = 1 << 8;
114 const ALL_UNKNOWN_RECORDED = 1 << 9;
114 const ALL_UNKNOWN_RECORDED = 1 << 9;
115 const ALL_IGNORED_RECORDED = 1 << 10;
115 const ALL_IGNORED_RECORDED = 1 << 10;
116 const HAS_FALLBACK_EXEC = 1 << 11;
116 const HAS_FALLBACK_EXEC = 1 << 11;
117 const FALLBACK_EXEC = 1 << 12;
117 const FALLBACK_EXEC = 1 << 12;
118 const HAS_FALLBACK_SYMLINK = 1 << 13;
118 const HAS_FALLBACK_SYMLINK = 1 << 13;
119 const FALLBACK_SYMLINK = 1 << 14;
119 const FALLBACK_SYMLINK = 1 << 14;
120 const MTIME_SECOND_AMBIGUOUS = 1 << 15;
120 const MTIME_SECOND_AMBIGUOUS = 1 << 15;
121 }
121 }
122 }
122 }
123
123
124 /// Duration since the Unix epoch
124 /// Duration since the Unix epoch
125 #[derive(BytesCast, Copy, Clone)]
125 #[derive(BytesCast, Copy, Clone)]
126 #[repr(C)]
126 #[repr(C)]
127 struct PackedTruncatedTimestamp {
127 struct PackedTruncatedTimestamp {
128 truncated_seconds: U32Be,
128 truncated_seconds: U32Be,
129 nanoseconds: U32Be,
129 nanoseconds: U32Be,
130 }
130 }
131
131
132 /// Counted in bytes from the start of the file
132 /// Counted in bytes from the start of the file
133 ///
133 ///
134 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
134 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
135 type Offset = U32Be;
135 type Offset = U32Be;
136
136
137 /// Counted in number of items
137 /// Counted in number of items
138 ///
138 ///
139 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
139 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
140 type Size = U32Be;
140 type Size = U32Be;
141
141
142 /// Counted in bytes
142 /// Counted in bytes
143 ///
143 ///
144 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
144 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
145 type PathSize = U16Be;
145 type PathSize = U16Be;
146
146
147 /// A contiguous sequence of `len` times `Node`, representing the child nodes
147 /// A contiguous sequence of `len` times `Node`, representing the child nodes
148 /// of either some other node or of the repository root.
148 /// of either some other node or of the repository root.
149 ///
149 ///
150 /// Always sorted by ascending `full_path`, to allow binary search.
150 /// Always sorted by ascending `full_path`, to allow binary search.
151 /// Since nodes with the same parent nodes also have the same parent path,
151 /// Since nodes with the same parent nodes also have the same parent path,
152 /// only the `base_name`s need to be compared during binary search.
152 /// only the `base_name`s need to be compared during binary search.
153 #[derive(BytesCast, Copy, Clone)]
153 #[derive(BytesCast, Copy, Clone)]
154 #[repr(C)]
154 #[repr(C)]
155 struct ChildNodes {
155 struct ChildNodes {
156 start: Offset,
156 start: Offset,
157 len: Size,
157 len: Size,
158 }
158 }
159
159
160 /// A `HgPath` of `len` bytes
160 /// A `HgPath` of `len` bytes
161 #[derive(BytesCast, Copy, Clone)]
161 #[derive(BytesCast, Copy, Clone)]
162 #[repr(C)]
162 #[repr(C)]
163 struct PathSlice {
163 struct PathSlice {
164 start: Offset,
164 start: Offset,
165 len: PathSize,
165 len: PathSize,
166 }
166 }
167
167
168 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
168 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
169 type OptPathSlice = PathSlice;
169 type OptPathSlice = PathSlice;
170
170
171 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
171 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
172 ///
172 ///
173 /// This should only happen if Mercurial is buggy or a repository is corrupted.
173 /// This should only happen if Mercurial is buggy or a repository is corrupted.
174 #[derive(Debug)]
174 #[derive(Debug)]
175 pub struct DirstateV2ParseError;
175 pub struct DirstateV2ParseError;
176
176
177 impl From<DirstateV2ParseError> for HgError {
177 impl From<DirstateV2ParseError> for HgError {
178 fn from(_: DirstateV2ParseError) -> Self {
178 fn from(_: DirstateV2ParseError) -> Self {
179 HgError::corrupted("dirstate-v2 parse error")
179 HgError::corrupted("dirstate-v2 parse error")
180 }
180 }
181 }
181 }
182
182
183 impl From<DirstateV2ParseError> for crate::DirstateError {
183 impl From<DirstateV2ParseError> for crate::DirstateError {
184 fn from(error: DirstateV2ParseError) -> Self {
184 fn from(error: DirstateV2ParseError) -> Self {
185 HgError::from(error).into()
185 HgError::from(error).into()
186 }
186 }
187 }
187 }
188
188
189 impl<'on_disk> Docket<'on_disk> {
189 impl<'on_disk> Docket<'on_disk> {
190 pub fn parents(&self) -> DirstateParents {
190 pub fn parents(&self) -> DirstateParents {
191 use crate::Node;
191 use crate::Node;
192 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
192 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
193 .unwrap()
193 .unwrap()
194 .clone();
194 .clone();
195 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
195 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
196 .unwrap()
196 .unwrap()
197 .clone();
197 .clone();
198 DirstateParents { p1, p2 }
198 DirstateParents { p1, p2 }
199 }
199 }
200
200
201 pub fn tree_metadata(&self) -> &[u8] {
201 pub fn tree_metadata(&self) -> &[u8] {
202 self.header.metadata.as_bytes()
202 self.header.metadata.as_bytes()
203 }
203 }
204
204
205 pub fn data_size(&self) -> usize {
205 pub fn data_size(&self) -> usize {
206 // This `unwrap` could only panic on a 16-bit CPU
206 // This `unwrap` could only panic on a 16-bit CPU
207 self.header.data_size.get().try_into().unwrap()
207 self.header.data_size.get().try_into().unwrap()
208 }
208 }
209
209
210 pub fn data_filename(&self) -> String {
210 pub fn data_filename(&self) -> String {
211 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
211 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
212 }
212 }
213 }
213 }
214
214
215 pub fn read_docket(
215 pub fn read_docket(
216 on_disk: &[u8],
216 on_disk: &[u8],
217 ) -> Result<Docket<'_>, DirstateV2ParseError> {
217 ) -> Result<Docket<'_>, DirstateV2ParseError> {
218 let (header, uuid) =
218 let (header, uuid) =
219 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
219 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
220 let uuid_size = header.uuid_size as usize;
220 let uuid_size = header.uuid_size as usize;
221 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
221 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
222 Ok(Docket { header, uuid })
222 Ok(Docket { header, uuid })
223 } else {
223 } else {
224 Err(DirstateV2ParseError)
224 Err(DirstateV2ParseError)
225 }
225 }
226 }
226 }
227
227
228 pub(super) fn read<'on_disk>(
228 pub(super) fn read<'on_disk>(
229 on_disk: &'on_disk [u8],
229 on_disk: &'on_disk [u8],
230 metadata: &[u8],
230 metadata: &[u8],
231 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
231 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
232 if on_disk.is_empty() {
232 if on_disk.is_empty() {
233 return Ok(DirstateMap::empty(on_disk));
233 return Ok(DirstateMap::empty(on_disk));
234 }
234 }
235 let (meta, _) = TreeMetadata::from_bytes(metadata)
235 let (meta, _) = TreeMetadata::from_bytes(metadata)
236 .map_err(|_| DirstateV2ParseError)?;
236 .map_err(|_| DirstateV2ParseError)?;
237 let dirstate_map = DirstateMap {
237 let dirstate_map = DirstateMap {
238 on_disk,
238 on_disk,
239 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
239 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
240 on_disk,
240 on_disk,
241 meta.root_nodes,
241 meta.root_nodes,
242 )?),
242 )?),
243 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
243 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
244 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
244 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
245 ignore_patterns_hash: meta.ignore_patterns_hash,
245 ignore_patterns_hash: meta.ignore_patterns_hash,
246 unreachable_bytes: meta.unreachable_bytes.get(),
246 unreachable_bytes: meta.unreachable_bytes.get(),
247 };
247 };
248 Ok(dirstate_map)
248 Ok(dirstate_map)
249 }
249 }
250
250
251 impl Node {
251 impl Node {
252 pub(super) fn full_path<'on_disk>(
252 pub(super) fn full_path<'on_disk>(
253 &self,
253 &self,
254 on_disk: &'on_disk [u8],
254 on_disk: &'on_disk [u8],
255 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
255 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
256 read_hg_path(on_disk, self.full_path)
256 read_hg_path(on_disk, self.full_path)
257 }
257 }
258
258
259 pub(super) fn base_name_start<'on_disk>(
259 pub(super) fn base_name_start<'on_disk>(
260 &self,
260 &self,
261 ) -> Result<usize, DirstateV2ParseError> {
261 ) -> Result<usize, DirstateV2ParseError> {
262 let start = self.base_name_start.get();
262 let start = self.base_name_start.get();
263 if start < self.full_path.len.get() {
263 if start < self.full_path.len.get() {
264 let start = usize::try_from(start)
264 let start = usize::try_from(start)
265 // u32 -> usize, could only panic on a 16-bit CPU
265 // u32 -> usize, could only panic on a 16-bit CPU
266 .expect("dirstate-v2 base_name_start out of bounds");
266 .expect("dirstate-v2 base_name_start out of bounds");
267 Ok(start)
267 Ok(start)
268 } else {
268 } else {
269 Err(DirstateV2ParseError)
269 Err(DirstateV2ParseError)
270 }
270 }
271 }
271 }
272
272
273 pub(super) fn base_name<'on_disk>(
273 pub(super) fn base_name<'on_disk>(
274 &self,
274 &self,
275 on_disk: &'on_disk [u8],
275 on_disk: &'on_disk [u8],
276 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
276 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
277 let full_path = self.full_path(on_disk)?;
277 let full_path = self.full_path(on_disk)?;
278 let base_name_start = self.base_name_start()?;
278 let base_name_start = self.base_name_start()?;
279 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
279 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
280 }
280 }
281
281
282 pub(super) fn path<'on_disk>(
282 pub(super) fn path<'on_disk>(
283 &self,
283 &self,
284 on_disk: &'on_disk [u8],
284 on_disk: &'on_disk [u8],
285 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
285 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
286 Ok(WithBasename::from_raw_parts(
286 Ok(WithBasename::from_raw_parts(
287 Cow::Borrowed(self.full_path(on_disk)?),
287 Cow::Borrowed(self.full_path(on_disk)?),
288 self.base_name_start()?,
288 self.base_name_start()?,
289 ))
289 ))
290 }
290 }
291
291
292 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
292 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
293 self.copy_source.start.get() != 0
293 self.copy_source.start.get() != 0
294 }
294 }
295
295
296 pub(super) fn copy_source<'on_disk>(
296 pub(super) fn copy_source<'on_disk>(
297 &self,
297 &self,
298 on_disk: &'on_disk [u8],
298 on_disk: &'on_disk [u8],
299 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
299 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
300 Ok(if self.has_copy_source() {
300 Ok(if self.has_copy_source() {
301 Some(read_hg_path(on_disk, self.copy_source)?)
301 Some(read_hg_path(on_disk, self.copy_source)?)
302 } else {
302 } else {
303 None
303 None
304 })
304 })
305 }
305 }
306
306
307 fn flags(&self) -> Flags {
307 fn flags(&self) -> Flags {
308 Flags::from_bits_truncate(self.flags.get())
308 Flags::from_bits_truncate(self.flags.get())
309 }
309 }
310
310
311 fn has_entry(&self) -> bool {
311 fn has_entry(&self) -> bool {
312 self.flags().intersects(
312 self.flags().intersects(
313 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
313 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
314 )
314 )
315 }
315 }
316
316
317 pub(super) fn node_data(
317 pub(super) fn node_data(
318 &self,
318 &self,
319 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
319 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
320 if self.has_entry() {
320 if self.has_entry() {
321 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
321 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
322 } else if let Some(mtime) = self.cached_directory_mtime()? {
322 } else if let Some(mtime) = self.cached_directory_mtime()? {
323 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
323 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
324 } else {
324 } else {
325 Ok(dirstate_map::NodeData::None)
325 Ok(dirstate_map::NodeData::None)
326 }
326 }
327 }
327 }
328
328
329 pub(super) fn cached_directory_mtime(
329 pub(super) fn cached_directory_mtime(
330 &self,
330 &self,
331 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
331 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
332 // For now we do not have code to handle ALL_UNKNOWN_RECORDED, so we
332 // For now we do not have code to handle ALL_UNKNOWN_RECORDED, so we
333 // ignore the mtime if the flag is set.
333 // ignore the mtime if the flag is set.
334 if self.flags().contains(Flags::HAS_DIRECTORY_MTIME)
334 if self.flags().contains(Flags::HAS_DIRECTORY_MTIME)
335 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
335 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
336 {
336 {
337 if self.flags().contains(Flags::HAS_FILE_MTIME) {
337 if self.flags().contains(Flags::HAS_FILE_MTIME) {
338 Err(DirstateV2ParseError)
338 Err(DirstateV2ParseError)
339 } else {
339 } else {
340 Ok(Some(self.mtime.try_into()?))
340 Ok(Some(self.mtime.try_into()?))
341 }
341 }
342 } else {
342 } else {
343 Ok(None)
343 Ok(None)
344 }
344 }
345 }
345 }
346
346
347 fn synthesize_unix_mode(&self) -> u32 {
347 fn synthesize_unix_mode(&self) -> u32 {
348 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
348 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
349 libc::S_IFLNK
349 libc::S_IFLNK
350 } else {
350 } else {
351 libc::S_IFREG
351 libc::S_IFREG
352 };
352 };
353 let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
353 let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
354 0o755
354 0o755
355 } else {
355 } else {
356 0o644
356 0o644
357 };
357 };
358 file_type | permisions
358 file_type | permisions
359 }
359 }
360
360
361 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
361 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
362 // TODO: convert through raw bits instead?
362 // TODO: convert through raw bits instead?
363 let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED);
363 let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED);
364 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
364 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
365 let p2_info = self.flags().contains(Flags::P2_INFO);
365 let p2_info = self.flags().contains(Flags::P2_INFO);
366 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
366 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
367 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
367 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
368 {
368 {
369 Some((self.synthesize_unix_mode(), self.size.into()))
369 Some((self.synthesize_unix_mode(), self.size.into()))
370 } else {
370 } else {
371 None
371 None
372 };
372 };
373 let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME)
373 let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME)
374 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
374 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
375 // The current code is not able to do the more subtle comparison that the
375 // The current code is not able to do the more subtle comparison that the
376 // MTIME_SECOND_AMBIGUOUS requires. So we ignore the mtime
376 // MTIME_SECOND_AMBIGUOUS requires. So we ignore the mtime
377 && !self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS)
377 && !self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS)
378 {
378 {
379 // TODO: replace this by `self.mtime.try_into()?` to use
379 Some(self.mtime.try_into()?)
380 // sub-second precision from the file.
381 // We don’t do this yet because other parts of the code
382 // always set it to zero.
383 let mtime = TruncatedTimestamp::from_already_truncated(
384 self.mtime.truncated_seconds.get(),
385 0,
386 )?;
387 Some(mtime)
388 } else {
380 } else {
389 None
381 None
390 };
382 };
391 Ok(DirstateEntry::from_v2_data(
383 Ok(DirstateEntry::from_v2_data(
392 wdir_tracked,
384 wdir_tracked,
393 p1_tracked,
385 p1_tracked,
394 p2_info,
386 p2_info,
395 mode_size,
387 mode_size,
396 mtime,
388 mtime,
397 None,
389 None,
398 None,
390 None,
399 ))
391 ))
400 }
392 }
401
393
402 pub(super) fn entry(
394 pub(super) fn entry(
403 &self,
395 &self,
404 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
396 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
405 if self.has_entry() {
397 if self.has_entry() {
406 Ok(Some(self.assume_entry()?))
398 Ok(Some(self.assume_entry()?))
407 } else {
399 } else {
408 Ok(None)
400 Ok(None)
409 }
401 }
410 }
402 }
411
403
412 pub(super) fn children<'on_disk>(
404 pub(super) fn children<'on_disk>(
413 &self,
405 &self,
414 on_disk: &'on_disk [u8],
406 on_disk: &'on_disk [u8],
415 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
407 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
416 read_nodes(on_disk, self.children)
408 read_nodes(on_disk, self.children)
417 }
409 }
418
410
419 pub(super) fn to_in_memory_node<'on_disk>(
411 pub(super) fn to_in_memory_node<'on_disk>(
420 &self,
412 &self,
421 on_disk: &'on_disk [u8],
413 on_disk: &'on_disk [u8],
422 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
414 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
423 Ok(dirstate_map::Node {
415 Ok(dirstate_map::Node {
424 children: dirstate_map::ChildNodes::OnDisk(
416 children: dirstate_map::ChildNodes::OnDisk(
425 self.children(on_disk)?,
417 self.children(on_disk)?,
426 ),
418 ),
427 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
419 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
428 data: self.node_data()?,
420 data: self.node_data()?,
429 descendants_with_entry_count: self
421 descendants_with_entry_count: self
430 .descendants_with_entry_count
422 .descendants_with_entry_count
431 .get(),
423 .get(),
432 tracked_descendants_count: self.tracked_descendants_count.get(),
424 tracked_descendants_count: self.tracked_descendants_count.get(),
433 })
425 })
434 }
426 }
435
427
436 fn from_dirstate_entry(
428 fn from_dirstate_entry(
437 entry: &DirstateEntry,
429 entry: &DirstateEntry,
438 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
430 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
439 let (
431 let (
440 wdir_tracked,
432 wdir_tracked,
441 p1_tracked,
433 p1_tracked,
442 p2_info,
434 p2_info,
443 mode_size_opt,
435 mode_size_opt,
444 mtime_opt,
436 mtime_opt,
445 fallback_exec,
437 fallback_exec,
446 fallback_symlink,
438 fallback_symlink,
447 ) = entry.v2_data();
439 ) = entry.v2_data();
448 // TODO: convert throug raw flag bits instead?
440 // TODO: convert throug raw flag bits instead?
449 let mut flags = Flags::empty();
441 let mut flags = Flags::empty();
450 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
442 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
451 flags.set(Flags::P1_TRACKED, p1_tracked);
443 flags.set(Flags::P1_TRACKED, p1_tracked);
452 flags.set(Flags::P2_INFO, p2_info);
444 flags.set(Flags::P2_INFO, p2_info);
453 let size = if let Some((m, s)) = mode_size_opt {
445 let size = if let Some((m, s)) = mode_size_opt {
454 let exec_perm = m & libc::S_IXUSR != 0;
446 let exec_perm = m & libc::S_IXUSR != 0;
455 let is_symlink = m & libc::S_IFMT == libc::S_IFLNK;
447 let is_symlink = m & libc::S_IFMT == libc::S_IFLNK;
456 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
448 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
457 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
449 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
458 flags.insert(Flags::HAS_MODE_AND_SIZE);
450 flags.insert(Flags::HAS_MODE_AND_SIZE);
459 s.into()
451 s.into()
460 } else {
452 } else {
461 0.into()
453 0.into()
462 };
454 };
463 let mtime = if let Some(m) = mtime_opt {
455 let mtime = if let Some(m) = mtime_opt {
464 flags.insert(Flags::HAS_FILE_MTIME);
456 flags.insert(Flags::HAS_FILE_MTIME);
465 m.into()
457 m.into()
466 } else {
458 } else {
467 PackedTruncatedTimestamp::null()
459 PackedTruncatedTimestamp::null()
468 };
460 };
469 if let Some(f_exec) = fallback_exec {
461 if let Some(f_exec) = fallback_exec {
470 flags.insert(Flags::HAS_FALLBACK_EXEC);
462 flags.insert(Flags::HAS_FALLBACK_EXEC);
471 if f_exec {
463 if f_exec {
472 flags.insert(Flags::FALLBACK_EXEC);
464 flags.insert(Flags::FALLBACK_EXEC);
473 }
465 }
474 }
466 }
475 if let Some(f_symlink) = fallback_symlink {
467 if let Some(f_symlink) = fallback_symlink {
476 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
468 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
477 if f_symlink {
469 if f_symlink {
478 flags.insert(Flags::FALLBACK_SYMLINK);
470 flags.insert(Flags::FALLBACK_SYMLINK);
479 }
471 }
480 }
472 }
481 (flags, size, mtime)
473 (flags, size, mtime)
482 }
474 }
483 }
475 }
484
476
485 fn read_hg_path(
477 fn read_hg_path(
486 on_disk: &[u8],
478 on_disk: &[u8],
487 slice: PathSlice,
479 slice: PathSlice,
488 ) -> Result<&HgPath, DirstateV2ParseError> {
480 ) -> Result<&HgPath, DirstateV2ParseError> {
489 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
481 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
490 }
482 }
491
483
492 fn read_nodes(
484 fn read_nodes(
493 on_disk: &[u8],
485 on_disk: &[u8],
494 slice: ChildNodes,
486 slice: ChildNodes,
495 ) -> Result<&[Node], DirstateV2ParseError> {
487 ) -> Result<&[Node], DirstateV2ParseError> {
496 read_slice(on_disk, slice.start, slice.len.get())
488 read_slice(on_disk, slice.start, slice.len.get())
497 }
489 }
498
490
499 fn read_slice<T, Len>(
491 fn read_slice<T, Len>(
500 on_disk: &[u8],
492 on_disk: &[u8],
501 start: Offset,
493 start: Offset,
502 len: Len,
494 len: Len,
503 ) -> Result<&[T], DirstateV2ParseError>
495 ) -> Result<&[T], DirstateV2ParseError>
504 where
496 where
505 T: BytesCast,
497 T: BytesCast,
506 Len: TryInto<usize>,
498 Len: TryInto<usize>,
507 {
499 {
508 // Either `usize::MAX` would result in "out of bounds" error since a single
500 // Either `usize::MAX` would result in "out of bounds" error since a single
509 // `&[u8]` cannot occupy the entire addess space.
501 // `&[u8]` cannot occupy the entire addess space.
510 let start = start.get().try_into().unwrap_or(std::usize::MAX);
502 let start = start.get().try_into().unwrap_or(std::usize::MAX);
511 let len = len.try_into().unwrap_or(std::usize::MAX);
503 let len = len.try_into().unwrap_or(std::usize::MAX);
512 on_disk
504 on_disk
513 .get(start..)
505 .get(start..)
514 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
506 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
515 .map(|(slice, _rest)| slice)
507 .map(|(slice, _rest)| slice)
516 .ok_or_else(|| DirstateV2ParseError)
508 .ok_or_else(|| DirstateV2ParseError)
517 }
509 }
518
510
519 pub(crate) fn for_each_tracked_path<'on_disk>(
511 pub(crate) fn for_each_tracked_path<'on_disk>(
520 on_disk: &'on_disk [u8],
512 on_disk: &'on_disk [u8],
521 metadata: &[u8],
513 metadata: &[u8],
522 mut f: impl FnMut(&'on_disk HgPath),
514 mut f: impl FnMut(&'on_disk HgPath),
523 ) -> Result<(), DirstateV2ParseError> {
515 ) -> Result<(), DirstateV2ParseError> {
524 let (meta, _) = TreeMetadata::from_bytes(metadata)
516 let (meta, _) = TreeMetadata::from_bytes(metadata)
525 .map_err(|_| DirstateV2ParseError)?;
517 .map_err(|_| DirstateV2ParseError)?;
526 fn recur<'on_disk>(
518 fn recur<'on_disk>(
527 on_disk: &'on_disk [u8],
519 on_disk: &'on_disk [u8],
528 nodes: ChildNodes,
520 nodes: ChildNodes,
529 f: &mut impl FnMut(&'on_disk HgPath),
521 f: &mut impl FnMut(&'on_disk HgPath),
530 ) -> Result<(), DirstateV2ParseError> {
522 ) -> Result<(), DirstateV2ParseError> {
531 for node in read_nodes(on_disk, nodes)? {
523 for node in read_nodes(on_disk, nodes)? {
532 if let Some(entry) = node.entry()? {
524 if let Some(entry) = node.entry()? {
533 if entry.state().is_tracked() {
525 if entry.state().is_tracked() {
534 f(node.full_path(on_disk)?)
526 f(node.full_path(on_disk)?)
535 }
527 }
536 }
528 }
537 recur(on_disk, node.children, f)?
529 recur(on_disk, node.children, f)?
538 }
530 }
539 Ok(())
531 Ok(())
540 }
532 }
541 recur(on_disk, meta.root_nodes, &mut f)
533 recur(on_disk, meta.root_nodes, &mut f)
542 }
534 }
543
535
544 /// Returns new data and metadata, together with whether that data should be
536 /// Returns new data and metadata, together with whether that data should be
545 /// appended to the existing data file whose content is at
537 /// appended to the existing data file whose content is at
546 /// `dirstate_map.on_disk` (true), instead of written to a new data file
538 /// `dirstate_map.on_disk` (true), instead of written to a new data file
547 /// (false).
539 /// (false).
548 pub(super) fn write(
540 pub(super) fn write(
549 dirstate_map: &mut DirstateMap,
541 dirstate_map: &mut DirstateMap,
550 can_append: bool,
542 can_append: bool,
551 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
543 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
552 let append = can_append && dirstate_map.write_should_append();
544 let append = can_append && dirstate_map.write_should_append();
553
545
554 // This ignores the space for paths, and for nodes without an entry.
546 // This ignores the space for paths, and for nodes without an entry.
555 // TODO: better estimate? Skip the `Vec` and write to a file directly?
547 // TODO: better estimate? Skip the `Vec` and write to a file directly?
556 let size_guess = std::mem::size_of::<Node>()
548 let size_guess = std::mem::size_of::<Node>()
557 * dirstate_map.nodes_with_entry_count as usize;
549 * dirstate_map.nodes_with_entry_count as usize;
558
550
559 let mut writer = Writer {
551 let mut writer = Writer {
560 dirstate_map,
552 dirstate_map,
561 append,
553 append,
562 out: Vec::with_capacity(size_guess),
554 out: Vec::with_capacity(size_guess),
563 };
555 };
564
556
565 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
557 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
566
558
567 let meta = TreeMetadata {
559 let meta = TreeMetadata {
568 root_nodes,
560 root_nodes,
569 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
561 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
570 nodes_with_copy_source_count: dirstate_map
562 nodes_with_copy_source_count: dirstate_map
571 .nodes_with_copy_source_count
563 .nodes_with_copy_source_count
572 .into(),
564 .into(),
573 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
565 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
574 unused: [0; 4],
566 unused: [0; 4],
575 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
567 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
576 };
568 };
577 Ok((writer.out, meta.as_bytes().to_vec(), append))
569 Ok((writer.out, meta.as_bytes().to_vec(), append))
578 }
570 }
579
571
580 struct Writer<'dmap, 'on_disk> {
572 struct Writer<'dmap, 'on_disk> {
581 dirstate_map: &'dmap DirstateMap<'on_disk>,
573 dirstate_map: &'dmap DirstateMap<'on_disk>,
582 append: bool,
574 append: bool,
583 out: Vec<u8>,
575 out: Vec<u8>,
584 }
576 }
585
577
586 impl Writer<'_, '_> {
578 impl Writer<'_, '_> {
587 fn write_nodes(
579 fn write_nodes(
588 &mut self,
580 &mut self,
589 nodes: dirstate_map::ChildNodesRef,
581 nodes: dirstate_map::ChildNodesRef,
590 ) -> Result<ChildNodes, DirstateError> {
582 ) -> Result<ChildNodes, DirstateError> {
591 // Reuse already-written nodes if possible
583 // Reuse already-written nodes if possible
592 if self.append {
584 if self.append {
593 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
585 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
594 let start = self.on_disk_offset_of(nodes_slice).expect(
586 let start = self.on_disk_offset_of(nodes_slice).expect(
595 "dirstate-v2 OnDisk nodes not found within on_disk",
587 "dirstate-v2 OnDisk nodes not found within on_disk",
596 );
588 );
597 let len = child_nodes_len_from_usize(nodes_slice.len());
589 let len = child_nodes_len_from_usize(nodes_slice.len());
598 return Ok(ChildNodes { start, len });
590 return Ok(ChildNodes { start, len });
599 }
591 }
600 }
592 }
601
593
602 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
594 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
603 // undefined iteration order. Sort to enable binary search in the
595 // undefined iteration order. Sort to enable binary search in the
604 // written file.
596 // written file.
605 let nodes = nodes.sorted();
597 let nodes = nodes.sorted();
606 let nodes_len = nodes.len();
598 let nodes_len = nodes.len();
607
599
608 // First accumulate serialized nodes in a `Vec`
600 // First accumulate serialized nodes in a `Vec`
609 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
601 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
610 for node in nodes {
602 for node in nodes {
611 let children =
603 let children =
612 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
604 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
613 let full_path = node.full_path(self.dirstate_map.on_disk)?;
605 let full_path = node.full_path(self.dirstate_map.on_disk)?;
614 let full_path = self.write_path(full_path.as_bytes());
606 let full_path = self.write_path(full_path.as_bytes());
615 let copy_source = if let Some(source) =
607 let copy_source = if let Some(source) =
616 node.copy_source(self.dirstate_map.on_disk)?
608 node.copy_source(self.dirstate_map.on_disk)?
617 {
609 {
618 self.write_path(source.as_bytes())
610 self.write_path(source.as_bytes())
619 } else {
611 } else {
620 PathSlice {
612 PathSlice {
621 start: 0.into(),
613 start: 0.into(),
622 len: 0.into(),
614 len: 0.into(),
623 }
615 }
624 };
616 };
625 on_disk_nodes.push(match node {
617 on_disk_nodes.push(match node {
626 NodeRef::InMemory(path, node) => {
618 NodeRef::InMemory(path, node) => {
627 let (flags, size, mtime) = match &node.data {
619 let (flags, size, mtime) = match &node.data {
628 dirstate_map::NodeData::Entry(entry) => {
620 dirstate_map::NodeData::Entry(entry) => {
629 Node::from_dirstate_entry(entry)
621 Node::from_dirstate_entry(entry)
630 }
622 }
631 dirstate_map::NodeData::CachedDirectory { mtime } => (
623 dirstate_map::NodeData::CachedDirectory { mtime } => (
632 // we currently never set a mtime if unknown file
624 // we currently never set a mtime if unknown file
633 // are present.
625 // are present.
634 // So if we have a mtime for a directory, we know
626 // So if we have a mtime for a directory, we know
635 // they are no unknown
627 // they are no unknown
636 // files and we
628 // files and we
637 // blindly set ALL_UNKNOWN_RECORDED.
629 // blindly set ALL_UNKNOWN_RECORDED.
638 //
630 //
639 // We never set ALL_IGNORED_RECORDED since we
631 // We never set ALL_IGNORED_RECORDED since we
640 // don't track that case
632 // don't track that case
641 // currently.
633 // currently.
642 Flags::HAS_DIRECTORY_MTIME
634 Flags::HAS_DIRECTORY_MTIME
643 | Flags::ALL_UNKNOWN_RECORDED,
635 | Flags::ALL_UNKNOWN_RECORDED,
644 0.into(),
636 0.into(),
645 (*mtime).into(),
637 (*mtime).into(),
646 ),
638 ),
647 dirstate_map::NodeData::None => (
639 dirstate_map::NodeData::None => (
648 Flags::empty(),
640 Flags::empty(),
649 0.into(),
641 0.into(),
650 PackedTruncatedTimestamp::null(),
642 PackedTruncatedTimestamp::null(),
651 ),
643 ),
652 };
644 };
653 Node {
645 Node {
654 children,
646 children,
655 copy_source,
647 copy_source,
656 full_path,
648 full_path,
657 base_name_start: u16::try_from(path.base_name_start())
649 base_name_start: u16::try_from(path.base_name_start())
658 // Could only panic for paths over 64 KiB
650 // Could only panic for paths over 64 KiB
659 .expect("dirstate-v2 path length overflow")
651 .expect("dirstate-v2 path length overflow")
660 .into(),
652 .into(),
661 descendants_with_entry_count: node
653 descendants_with_entry_count: node
662 .descendants_with_entry_count
654 .descendants_with_entry_count
663 .into(),
655 .into(),
664 tracked_descendants_count: node
656 tracked_descendants_count: node
665 .tracked_descendants_count
657 .tracked_descendants_count
666 .into(),
658 .into(),
667 flags: flags.bits().into(),
659 flags: flags.bits().into(),
668 size,
660 size,
669 mtime,
661 mtime,
670 }
662 }
671 }
663 }
672 NodeRef::OnDisk(node) => Node {
664 NodeRef::OnDisk(node) => Node {
673 children,
665 children,
674 copy_source,
666 copy_source,
675 full_path,
667 full_path,
676 ..*node
668 ..*node
677 },
669 },
678 })
670 })
679 }
671 }
680 // … so we can write them contiguously, after writing everything else
672 // … so we can write them contiguously, after writing everything else
681 // they refer to.
673 // they refer to.
682 let start = self.current_offset();
674 let start = self.current_offset();
683 let len = child_nodes_len_from_usize(nodes_len);
675 let len = child_nodes_len_from_usize(nodes_len);
684 self.out.extend(on_disk_nodes.as_bytes());
676 self.out.extend(on_disk_nodes.as_bytes());
685 Ok(ChildNodes { start, len })
677 Ok(ChildNodes { start, len })
686 }
678 }
687
679
688 /// If the given slice of items is within `on_disk`, returns its offset
680 /// If the given slice of items is within `on_disk`, returns its offset
689 /// from the start of `on_disk`.
681 /// from the start of `on_disk`.
690 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
682 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
691 where
683 where
692 T: BytesCast,
684 T: BytesCast,
693 {
685 {
694 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
686 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
695 let start = slice.as_ptr() as usize;
687 let start = slice.as_ptr() as usize;
696 let end = start + slice.len();
688 let end = start + slice.len();
697 start..=end
689 start..=end
698 }
690 }
699 let slice_addresses = address_range(slice.as_bytes());
691 let slice_addresses = address_range(slice.as_bytes());
700 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
692 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
701 if on_disk_addresses.contains(slice_addresses.start())
693 if on_disk_addresses.contains(slice_addresses.start())
702 && on_disk_addresses.contains(slice_addresses.end())
694 && on_disk_addresses.contains(slice_addresses.end())
703 {
695 {
704 let offset = slice_addresses.start() - on_disk_addresses.start();
696 let offset = slice_addresses.start() - on_disk_addresses.start();
705 Some(offset_from_usize(offset))
697 Some(offset_from_usize(offset))
706 } else {
698 } else {
707 None
699 None
708 }
700 }
709 }
701 }
710
702
711 fn current_offset(&mut self) -> Offset {
703 fn current_offset(&mut self) -> Offset {
712 let mut offset = self.out.len();
704 let mut offset = self.out.len();
713 if self.append {
705 if self.append {
714 offset += self.dirstate_map.on_disk.len()
706 offset += self.dirstate_map.on_disk.len()
715 }
707 }
716 offset_from_usize(offset)
708 offset_from_usize(offset)
717 }
709 }
718
710
719 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
711 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
720 let len = path_len_from_usize(slice.len());
712 let len = path_len_from_usize(slice.len());
721 // Reuse an already-written path if possible
713 // Reuse an already-written path if possible
722 if self.append {
714 if self.append {
723 if let Some(start) = self.on_disk_offset_of(slice) {
715 if let Some(start) = self.on_disk_offset_of(slice) {
724 return PathSlice { start, len };
716 return PathSlice { start, len };
725 }
717 }
726 }
718 }
727 let start = self.current_offset();
719 let start = self.current_offset();
728 self.out.extend(slice.as_bytes());
720 self.out.extend(slice.as_bytes());
729 PathSlice { start, len }
721 PathSlice { start, len }
730 }
722 }
731 }
723 }
732
724
733 fn offset_from_usize(x: usize) -> Offset {
725 fn offset_from_usize(x: usize) -> Offset {
734 u32::try_from(x)
726 u32::try_from(x)
735 // Could only panic for a dirstate file larger than 4 GiB
727 // Could only panic for a dirstate file larger than 4 GiB
736 .expect("dirstate-v2 offset overflow")
728 .expect("dirstate-v2 offset overflow")
737 .into()
729 .into()
738 }
730 }
739
731
740 fn child_nodes_len_from_usize(x: usize) -> Size {
732 fn child_nodes_len_from_usize(x: usize) -> Size {
741 u32::try_from(x)
733 u32::try_from(x)
742 // Could only panic with over 4 billion nodes
734 // Could only panic with over 4 billion nodes
743 .expect("dirstate-v2 slice length overflow")
735 .expect("dirstate-v2 slice length overflow")
744 .into()
736 .into()
745 }
737 }
746
738
747 fn path_len_from_usize(x: usize) -> PathSize {
739 fn path_len_from_usize(x: usize) -> PathSize {
748 u16::try_from(x)
740 u16::try_from(x)
749 // Could only panic for paths over 64 KiB
741 // Could only panic for paths over 64 KiB
750 .expect("dirstate-v2 path length overflow")
742 .expect("dirstate-v2 path length overflow")
751 .into()
743 .into()
752 }
744 }
753
745
754 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
746 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
755 fn from(timestamp: TruncatedTimestamp) -> Self {
747 fn from(timestamp: TruncatedTimestamp) -> Self {
756 Self {
748 Self {
757 truncated_seconds: timestamp.truncated_seconds().into(),
749 truncated_seconds: timestamp.truncated_seconds().into(),
758 nanoseconds: timestamp.nanoseconds().into(),
750 nanoseconds: timestamp.nanoseconds().into(),
759 }
751 }
760 }
752 }
761 }
753 }
762
754
763 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
755 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
764 type Error = DirstateV2ParseError;
756 type Error = DirstateV2ParseError;
765
757
766 fn try_from(
758 fn try_from(
767 timestamp: PackedTruncatedTimestamp,
759 timestamp: PackedTruncatedTimestamp,
768 ) -> Result<Self, Self::Error> {
760 ) -> Result<Self, Self::Error> {
769 Self::from_already_truncated(
761 Self::from_already_truncated(
770 timestamp.truncated_seconds.get(),
762 timestamp.truncated_seconds.get(),
771 timestamp.nanoseconds.get(),
763 timestamp.nanoseconds.get(),
772 )
764 )
773 }
765 }
774 }
766 }
775 impl PackedTruncatedTimestamp {
767 impl PackedTruncatedTimestamp {
776 fn null() -> Self {
768 fn null() -> Self {
777 Self {
769 Self {
778 truncated_seconds: 0.into(),
770 truncated_seconds: 0.into(),
779 nanoseconds: 0.into(),
771 nanoseconds: 0.into(),
780 }
772 }
781 }
773 }
782 }
774 }
@@ -1,758 +1,756
1 use crate::dirstate::entry::TruncatedTimestamp;
1 use crate::dirstate::entry::TruncatedTimestamp;
2 use crate::dirstate::status::IgnoreFnType;
2 use crate::dirstate::status::IgnoreFnType;
3 use crate::dirstate_tree::dirstate_map::BorrowedPath;
3 use crate::dirstate_tree::dirstate_map::BorrowedPath;
4 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
4 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
5 use crate::dirstate_tree::dirstate_map::DirstateMap;
5 use crate::dirstate_tree::dirstate_map::DirstateMap;
6 use crate::dirstate_tree::dirstate_map::NodeData;
6 use crate::dirstate_tree::dirstate_map::NodeData;
7 use crate::dirstate_tree::dirstate_map::NodeRef;
7 use crate::dirstate_tree::dirstate_map::NodeRef;
8 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
8 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
9 use crate::matchers::get_ignore_function;
9 use crate::matchers::get_ignore_function;
10 use crate::matchers::Matcher;
10 use crate::matchers::Matcher;
11 use crate::utils::files::get_bytes_from_os_string;
11 use crate::utils::files::get_bytes_from_os_string;
12 use crate::utils::files::get_path_from_bytes;
12 use crate::utils::files::get_path_from_bytes;
13 use crate::utils::hg_path::HgPath;
13 use crate::utils::hg_path::HgPath;
14 use crate::BadMatch;
14 use crate::BadMatch;
15 use crate::DirstateStatus;
15 use crate::DirstateStatus;
16 use crate::EntryState;
16 use crate::EntryState;
17 use crate::HgPathBuf;
17 use crate::HgPathBuf;
18 use crate::PatternFileWarning;
18 use crate::PatternFileWarning;
19 use crate::StatusError;
19 use crate::StatusError;
20 use crate::StatusOptions;
20 use crate::StatusOptions;
21 use micro_timer::timed;
21 use micro_timer::timed;
22 use rayon::prelude::*;
22 use rayon::prelude::*;
23 use sha1::{Digest, Sha1};
23 use sha1::{Digest, Sha1};
24 use std::borrow::Cow;
24 use std::borrow::Cow;
25 use std::io;
25 use std::io;
26 use std::path::Path;
26 use std::path::Path;
27 use std::path::PathBuf;
27 use std::path::PathBuf;
28 use std::sync::Mutex;
28 use std::sync::Mutex;
29 use std::time::SystemTime;
29 use std::time::SystemTime;
30
30
31 /// Returns the status of the working directory compared to its parent
31 /// Returns the status of the working directory compared to its parent
32 /// changeset.
32 /// changeset.
33 ///
33 ///
34 /// This algorithm is based on traversing the filesystem tree (`fs` in function
34 /// This algorithm is based on traversing the filesystem tree (`fs` in function
35 /// and variable names) and dirstate tree at the same time. The core of this
35 /// and variable names) and dirstate tree at the same time. The core of this
36 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
36 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
37 /// and its use of `itertools::merge_join_by`. When reaching a path that only
37 /// and its use of `itertools::merge_join_by`. When reaching a path that only
38 /// exists in one of the two trees, depending on information requested by
38 /// exists in one of the two trees, depending on information requested by
39 /// `options` we may need to traverse the remaining subtree.
39 /// `options` we may need to traverse the remaining subtree.
40 #[timed]
40 #[timed]
41 pub fn status<'tree, 'on_disk: 'tree>(
41 pub fn status<'tree, 'on_disk: 'tree>(
42 dmap: &'tree mut DirstateMap<'on_disk>,
42 dmap: &'tree mut DirstateMap<'on_disk>,
43 matcher: &(dyn Matcher + Sync),
43 matcher: &(dyn Matcher + Sync),
44 root_dir: PathBuf,
44 root_dir: PathBuf,
45 ignore_files: Vec<PathBuf>,
45 ignore_files: Vec<PathBuf>,
46 options: StatusOptions,
46 options: StatusOptions,
47 ) -> Result<(DirstateStatus<'on_disk>, Vec<PatternFileWarning>), StatusError> {
47 ) -> Result<(DirstateStatus<'on_disk>, Vec<PatternFileWarning>), StatusError> {
48 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
48 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
49 if options.list_ignored || options.list_unknown {
49 if options.list_ignored || options.list_unknown {
50 let mut hasher = Sha1::new();
50 let mut hasher = Sha1::new();
51 let (ignore_fn, warnings) = get_ignore_function(
51 let (ignore_fn, warnings) = get_ignore_function(
52 ignore_files,
52 ignore_files,
53 &root_dir,
53 &root_dir,
54 &mut |pattern_bytes| hasher.update(pattern_bytes),
54 &mut |pattern_bytes| hasher.update(pattern_bytes),
55 )?;
55 )?;
56 let new_hash = *hasher.finalize().as_ref();
56 let new_hash = *hasher.finalize().as_ref();
57 let changed = new_hash != dmap.ignore_patterns_hash;
57 let changed = new_hash != dmap.ignore_patterns_hash;
58 dmap.ignore_patterns_hash = new_hash;
58 dmap.ignore_patterns_hash = new_hash;
59 (ignore_fn, warnings, Some(changed))
59 (ignore_fn, warnings, Some(changed))
60 } else {
60 } else {
61 (Box::new(|&_| true), vec![], None)
61 (Box::new(|&_| true), vec![], None)
62 };
62 };
63
63
64 let common = StatusCommon {
64 let common = StatusCommon {
65 dmap,
65 dmap,
66 options,
66 options,
67 matcher,
67 matcher,
68 ignore_fn,
68 ignore_fn,
69 outcome: Default::default(),
69 outcome: Default::default(),
70 ignore_patterns_have_changed: patterns_changed,
70 ignore_patterns_have_changed: patterns_changed,
71 new_cachable_directories: Default::default(),
71 new_cachable_directories: Default::default(),
72 outated_cached_directories: Default::default(),
72 outated_cached_directories: Default::default(),
73 filesystem_time_at_status_start: filesystem_now(&root_dir).ok(),
73 filesystem_time_at_status_start: filesystem_now(&root_dir).ok(),
74 };
74 };
75 let is_at_repo_root = true;
75 let is_at_repo_root = true;
76 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
76 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
77 let has_ignored_ancestor = false;
77 let has_ignored_ancestor = false;
78 let root_cached_mtime = None;
78 let root_cached_mtime = None;
79 let root_dir_metadata = None;
79 let root_dir_metadata = None;
80 // If the path we have for the repository root is a symlink, do follow it.
80 // If the path we have for the repository root is a symlink, do follow it.
81 // (As opposed to symlinks within the working directory which are not
81 // (As opposed to symlinks within the working directory which are not
82 // followed, using `std::fs::symlink_metadata`.)
82 // followed, using `std::fs::symlink_metadata`.)
83 common.traverse_fs_directory_and_dirstate(
83 common.traverse_fs_directory_and_dirstate(
84 has_ignored_ancestor,
84 has_ignored_ancestor,
85 dmap.root.as_ref(),
85 dmap.root.as_ref(),
86 hg_path,
86 hg_path,
87 &root_dir,
87 &root_dir,
88 root_dir_metadata,
88 root_dir_metadata,
89 root_cached_mtime,
89 root_cached_mtime,
90 is_at_repo_root,
90 is_at_repo_root,
91 )?;
91 )?;
92 let mut outcome = common.outcome.into_inner().unwrap();
92 let mut outcome = common.outcome.into_inner().unwrap();
93 let new_cachable = common.new_cachable_directories.into_inner().unwrap();
93 let new_cachable = common.new_cachable_directories.into_inner().unwrap();
94 let outdated = common.outated_cached_directories.into_inner().unwrap();
94 let outdated = common.outated_cached_directories.into_inner().unwrap();
95
95
96 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
96 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
97 || !outdated.is_empty()
97 || !outdated.is_empty()
98 || !new_cachable.is_empty();
98 || !new_cachable.is_empty();
99
99
100 // Remove outdated mtimes before adding new mtimes, in case a given
100 // Remove outdated mtimes before adding new mtimes, in case a given
101 // directory is both
101 // directory is both
102 for path in &outdated {
102 for path in &outdated {
103 let node = dmap.get_or_insert(path)?;
103 let node = dmap.get_or_insert(path)?;
104 if let NodeData::CachedDirectory { .. } = &node.data {
104 if let NodeData::CachedDirectory { .. } = &node.data {
105 node.data = NodeData::None
105 node.data = NodeData::None
106 }
106 }
107 }
107 }
108 for (path, mtime) in &new_cachable {
108 for (path, mtime) in &new_cachable {
109 let node = dmap.get_or_insert(path)?;
109 let node = dmap.get_or_insert(path)?;
110 match &node.data {
110 match &node.data {
111 NodeData::Entry(_) => {} // Don’t overwrite an entry
111 NodeData::Entry(_) => {} // Don’t overwrite an entry
112 NodeData::CachedDirectory { .. } | NodeData::None => {
112 NodeData::CachedDirectory { .. } | NodeData::None => {
113 node.data = NodeData::CachedDirectory { mtime: *mtime }
113 node.data = NodeData::CachedDirectory { mtime: *mtime }
114 }
114 }
115 }
115 }
116 }
116 }
117
117
118 Ok((outcome, warnings))
118 Ok((outcome, warnings))
119 }
119 }
120
120
121 /// Bag of random things needed by various parts of the algorithm. Reduces the
121 /// Bag of random things needed by various parts of the algorithm. Reduces the
122 /// number of parameters passed to functions.
122 /// number of parameters passed to functions.
123 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
123 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
124 dmap: &'tree DirstateMap<'on_disk>,
124 dmap: &'tree DirstateMap<'on_disk>,
125 options: StatusOptions,
125 options: StatusOptions,
126 matcher: &'a (dyn Matcher + Sync),
126 matcher: &'a (dyn Matcher + Sync),
127 ignore_fn: IgnoreFnType<'a>,
127 ignore_fn: IgnoreFnType<'a>,
128 outcome: Mutex<DirstateStatus<'on_disk>>,
128 outcome: Mutex<DirstateStatus<'on_disk>>,
129 new_cachable_directories:
129 new_cachable_directories:
130 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
130 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
131 outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
131 outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
132
132
133 /// Whether ignore files like `.hgignore` have changed since the previous
133 /// Whether ignore files like `.hgignore` have changed since the previous
134 /// time a `status()` call wrote their hash to the dirstate. `None` means
134 /// time a `status()` call wrote their hash to the dirstate. `None` means
135 /// we don’t know as this run doesn’t list either ignored or uknown files
135 /// we don’t know as this run doesn’t list either ignored or uknown files
136 /// and therefore isn’t reading `.hgignore`.
136 /// and therefore isn’t reading `.hgignore`.
137 ignore_patterns_have_changed: Option<bool>,
137 ignore_patterns_have_changed: Option<bool>,
138
138
139 /// The current time at the start of the `status()` algorithm, as measured
139 /// The current time at the start of the `status()` algorithm, as measured
140 /// and possibly truncated by the filesystem.
140 /// and possibly truncated by the filesystem.
141 filesystem_time_at_status_start: Option<SystemTime>,
141 filesystem_time_at_status_start: Option<SystemTime>,
142 }
142 }
143
143
144 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
144 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
145 fn read_dir(
145 fn read_dir(
146 &self,
146 &self,
147 hg_path: &HgPath,
147 hg_path: &HgPath,
148 fs_path: &Path,
148 fs_path: &Path,
149 is_at_repo_root: bool,
149 is_at_repo_root: bool,
150 ) -> Result<Vec<DirEntry>, ()> {
150 ) -> Result<Vec<DirEntry>, ()> {
151 DirEntry::read_dir(fs_path, is_at_repo_root)
151 DirEntry::read_dir(fs_path, is_at_repo_root)
152 .map_err(|error| self.io_error(error, hg_path))
152 .map_err(|error| self.io_error(error, hg_path))
153 }
153 }
154
154
155 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
155 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
156 let errno = error.raw_os_error().expect("expected real OS error");
156 let errno = error.raw_os_error().expect("expected real OS error");
157 self.outcome
157 self.outcome
158 .lock()
158 .lock()
159 .unwrap()
159 .unwrap()
160 .bad
160 .bad
161 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
161 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
162 }
162 }
163
163
164 fn check_for_outdated_directory_cache(
164 fn check_for_outdated_directory_cache(
165 &self,
165 &self,
166 dirstate_node: &NodeRef<'tree, 'on_disk>,
166 dirstate_node: &NodeRef<'tree, 'on_disk>,
167 ) -> Result<(), DirstateV2ParseError> {
167 ) -> Result<(), DirstateV2ParseError> {
168 if self.ignore_patterns_have_changed == Some(true)
168 if self.ignore_patterns_have_changed == Some(true)
169 && dirstate_node.cached_directory_mtime()?.is_some()
169 && dirstate_node.cached_directory_mtime()?.is_some()
170 {
170 {
171 self.outated_cached_directories.lock().unwrap().push(
171 self.outated_cached_directories.lock().unwrap().push(
172 dirstate_node
172 dirstate_node
173 .full_path_borrowed(self.dmap.on_disk)?
173 .full_path_borrowed(self.dmap.on_disk)?
174 .detach_from_tree(),
174 .detach_from_tree(),
175 )
175 )
176 }
176 }
177 Ok(())
177 Ok(())
178 }
178 }
179
179
180 /// If this returns true, we can get accurate results by only using
180 /// If this returns true, we can get accurate results by only using
181 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
181 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
182 /// need to call `read_dir`.
182 /// need to call `read_dir`.
183 fn can_skip_fs_readdir(
183 fn can_skip_fs_readdir(
184 &self,
184 &self,
185 directory_metadata: Option<&std::fs::Metadata>,
185 directory_metadata: Option<&std::fs::Metadata>,
186 cached_directory_mtime: Option<TruncatedTimestamp>,
186 cached_directory_mtime: Option<TruncatedTimestamp>,
187 ) -> bool {
187 ) -> bool {
188 if !self.options.list_unknown && !self.options.list_ignored {
188 if !self.options.list_unknown && !self.options.list_ignored {
189 // All states that we care about listing have corresponding
189 // All states that we care about listing have corresponding
190 // dirstate entries.
190 // dirstate entries.
191 // This happens for example with `hg status -mard`.
191 // This happens for example with `hg status -mard`.
192 return true;
192 return true;
193 }
193 }
194 if !self.options.list_ignored
194 if !self.options.list_ignored
195 && self.ignore_patterns_have_changed == Some(false)
195 && self.ignore_patterns_have_changed == Some(false)
196 {
196 {
197 if let Some(cached_mtime) = cached_directory_mtime {
197 if let Some(cached_mtime) = cached_directory_mtime {
198 // The dirstate contains a cached mtime for this directory, set
198 // The dirstate contains a cached mtime for this directory, set
199 // by a previous run of the `status` algorithm which found this
199 // by a previous run of the `status` algorithm which found this
200 // directory eligible for `read_dir` caching.
200 // directory eligible for `read_dir` caching.
201 if let Some(meta) = directory_metadata {
201 if let Some(meta) = directory_metadata {
202 if cached_mtime
202 if cached_mtime
203 .likely_equal_to_mtime_of(meta)
203 .likely_equal_to_mtime_of(meta)
204 .unwrap_or(false)
204 .unwrap_or(false)
205 {
205 {
206 // The mtime of that directory has not changed
206 // The mtime of that directory has not changed
207 // since then, which means that the results of
207 // since then, which means that the results of
208 // `read_dir` should also be unchanged.
208 // `read_dir` should also be unchanged.
209 return true;
209 return true;
210 }
210 }
211 }
211 }
212 }
212 }
213 }
213 }
214 false
214 false
215 }
215 }
216
216
217 /// Returns whether all child entries of the filesystem directory have a
217 /// Returns whether all child entries of the filesystem directory have a
218 /// corresponding dirstate node or are ignored.
218 /// corresponding dirstate node or are ignored.
219 fn traverse_fs_directory_and_dirstate(
219 fn traverse_fs_directory_and_dirstate(
220 &self,
220 &self,
221 has_ignored_ancestor: bool,
221 has_ignored_ancestor: bool,
222 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
222 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
223 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
223 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
224 directory_fs_path: &Path,
224 directory_fs_path: &Path,
225 directory_metadata: Option<&std::fs::Metadata>,
225 directory_metadata: Option<&std::fs::Metadata>,
226 cached_directory_mtime: Option<TruncatedTimestamp>,
226 cached_directory_mtime: Option<TruncatedTimestamp>,
227 is_at_repo_root: bool,
227 is_at_repo_root: bool,
228 ) -> Result<bool, DirstateV2ParseError> {
228 ) -> Result<bool, DirstateV2ParseError> {
229 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
229 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
230 {
230 {
231 dirstate_nodes
231 dirstate_nodes
232 .par_iter()
232 .par_iter()
233 .map(|dirstate_node| {
233 .map(|dirstate_node| {
234 let fs_path = directory_fs_path.join(get_path_from_bytes(
234 let fs_path = directory_fs_path.join(get_path_from_bytes(
235 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
235 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
236 ));
236 ));
237 match std::fs::symlink_metadata(&fs_path) {
237 match std::fs::symlink_metadata(&fs_path) {
238 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
238 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
239 &fs_path,
239 &fs_path,
240 &fs_metadata,
240 &fs_metadata,
241 dirstate_node,
241 dirstate_node,
242 has_ignored_ancestor,
242 has_ignored_ancestor,
243 ),
243 ),
244 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
244 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
245 self.traverse_dirstate_only(dirstate_node)
245 self.traverse_dirstate_only(dirstate_node)
246 }
246 }
247 Err(error) => {
247 Err(error) => {
248 let hg_path =
248 let hg_path =
249 dirstate_node.full_path(self.dmap.on_disk)?;
249 dirstate_node.full_path(self.dmap.on_disk)?;
250 Ok(self.io_error(error, hg_path))
250 Ok(self.io_error(error, hg_path))
251 }
251 }
252 }
252 }
253 })
253 })
254 .collect::<Result<_, _>>()?;
254 .collect::<Result<_, _>>()?;
255
255
256 // We don’t know, so conservatively say this isn’t the case
256 // We don’t know, so conservatively say this isn’t the case
257 let children_all_have_dirstate_node_or_are_ignored = false;
257 let children_all_have_dirstate_node_or_are_ignored = false;
258
258
259 return Ok(children_all_have_dirstate_node_or_are_ignored);
259 return Ok(children_all_have_dirstate_node_or_are_ignored);
260 }
260 }
261
261
262 let mut fs_entries = if let Ok(entries) = self.read_dir(
262 let mut fs_entries = if let Ok(entries) = self.read_dir(
263 directory_hg_path,
263 directory_hg_path,
264 directory_fs_path,
264 directory_fs_path,
265 is_at_repo_root,
265 is_at_repo_root,
266 ) {
266 ) {
267 entries
267 entries
268 } else {
268 } else {
269 // Treat an unreadable directory (typically because of insufficient
269 // Treat an unreadable directory (typically because of insufficient
270 // permissions) like an empty directory. `self.read_dir` has
270 // permissions) like an empty directory. `self.read_dir` has
271 // already called `self.io_error` so a warning will be emitted.
271 // already called `self.io_error` so a warning will be emitted.
272 Vec::new()
272 Vec::new()
273 };
273 };
274
274
275 // `merge_join_by` requires both its input iterators to be sorted:
275 // `merge_join_by` requires both its input iterators to be sorted:
276
276
277 let dirstate_nodes = dirstate_nodes.sorted();
277 let dirstate_nodes = dirstate_nodes.sorted();
278 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
278 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
279 // https://github.com/rust-lang/rust/issues/34162
279 // https://github.com/rust-lang/rust/issues/34162
280 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
280 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
281
281
282 // Propagate here any error that would happen inside the comparison
282 // Propagate here any error that would happen inside the comparison
283 // callback below
283 // callback below
284 for dirstate_node in &dirstate_nodes {
284 for dirstate_node in &dirstate_nodes {
285 dirstate_node.base_name(self.dmap.on_disk)?;
285 dirstate_node.base_name(self.dmap.on_disk)?;
286 }
286 }
287 itertools::merge_join_by(
287 itertools::merge_join_by(
288 dirstate_nodes,
288 dirstate_nodes,
289 &fs_entries,
289 &fs_entries,
290 |dirstate_node, fs_entry| {
290 |dirstate_node, fs_entry| {
291 // This `unwrap` never panics because we already propagated
291 // This `unwrap` never panics because we already propagated
292 // those errors above
292 // those errors above
293 dirstate_node
293 dirstate_node
294 .base_name(self.dmap.on_disk)
294 .base_name(self.dmap.on_disk)
295 .unwrap()
295 .unwrap()
296 .cmp(&fs_entry.base_name)
296 .cmp(&fs_entry.base_name)
297 },
297 },
298 )
298 )
299 .par_bridge()
299 .par_bridge()
300 .map(|pair| {
300 .map(|pair| {
301 use itertools::EitherOrBoth::*;
301 use itertools::EitherOrBoth::*;
302 let has_dirstate_node_or_is_ignored;
302 let has_dirstate_node_or_is_ignored;
303 match pair {
303 match pair {
304 Both(dirstate_node, fs_entry) => {
304 Both(dirstate_node, fs_entry) => {
305 self.traverse_fs_and_dirstate(
305 self.traverse_fs_and_dirstate(
306 &fs_entry.full_path,
306 &fs_entry.full_path,
307 &fs_entry.metadata,
307 &fs_entry.metadata,
308 dirstate_node,
308 dirstate_node,
309 has_ignored_ancestor,
309 has_ignored_ancestor,
310 )?;
310 )?;
311 has_dirstate_node_or_is_ignored = true
311 has_dirstate_node_or_is_ignored = true
312 }
312 }
313 Left(dirstate_node) => {
313 Left(dirstate_node) => {
314 self.traverse_dirstate_only(dirstate_node)?;
314 self.traverse_dirstate_only(dirstate_node)?;
315 has_dirstate_node_or_is_ignored = true;
315 has_dirstate_node_or_is_ignored = true;
316 }
316 }
317 Right(fs_entry) => {
317 Right(fs_entry) => {
318 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
318 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
319 has_ignored_ancestor,
319 has_ignored_ancestor,
320 directory_hg_path,
320 directory_hg_path,
321 fs_entry,
321 fs_entry,
322 )
322 )
323 }
323 }
324 }
324 }
325 Ok(has_dirstate_node_or_is_ignored)
325 Ok(has_dirstate_node_or_is_ignored)
326 })
326 })
327 .try_reduce(|| true, |a, b| Ok(a && b))
327 .try_reduce(|| true, |a, b| Ok(a && b))
328 }
328 }
329
329
330 fn traverse_fs_and_dirstate(
330 fn traverse_fs_and_dirstate(
331 &self,
331 &self,
332 fs_path: &Path,
332 fs_path: &Path,
333 fs_metadata: &std::fs::Metadata,
333 fs_metadata: &std::fs::Metadata,
334 dirstate_node: NodeRef<'tree, 'on_disk>,
334 dirstate_node: NodeRef<'tree, 'on_disk>,
335 has_ignored_ancestor: bool,
335 has_ignored_ancestor: bool,
336 ) -> Result<(), DirstateV2ParseError> {
336 ) -> Result<(), DirstateV2ParseError> {
337 self.check_for_outdated_directory_cache(&dirstate_node)?;
337 self.check_for_outdated_directory_cache(&dirstate_node)?;
338 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
338 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
339 let file_type = fs_metadata.file_type();
339 let file_type = fs_metadata.file_type();
340 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
340 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
341 if !file_or_symlink {
341 if !file_or_symlink {
342 // If we previously had a file here, it was removed (with
342 // If we previously had a file here, it was removed (with
343 // `hg rm` or similar) or deleted before it could be
343 // `hg rm` or similar) or deleted before it could be
344 // replaced by a directory or something else.
344 // replaced by a directory or something else.
345 self.mark_removed_or_deleted_if_file(
345 self.mark_removed_or_deleted_if_file(
346 &hg_path,
346 &hg_path,
347 dirstate_node.state()?,
347 dirstate_node.state()?,
348 );
348 );
349 }
349 }
350 if file_type.is_dir() {
350 if file_type.is_dir() {
351 if self.options.collect_traversed_dirs {
351 if self.options.collect_traversed_dirs {
352 self.outcome
352 self.outcome
353 .lock()
353 .lock()
354 .unwrap()
354 .unwrap()
355 .traversed
355 .traversed
356 .push(hg_path.detach_from_tree())
356 .push(hg_path.detach_from_tree())
357 }
357 }
358 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
358 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
359 let is_at_repo_root = false;
359 let is_at_repo_root = false;
360 let children_all_have_dirstate_node_or_are_ignored = self
360 let children_all_have_dirstate_node_or_are_ignored = self
361 .traverse_fs_directory_and_dirstate(
361 .traverse_fs_directory_and_dirstate(
362 is_ignored,
362 is_ignored,
363 dirstate_node.children(self.dmap.on_disk)?,
363 dirstate_node.children(self.dmap.on_disk)?,
364 hg_path,
364 hg_path,
365 fs_path,
365 fs_path,
366 Some(fs_metadata),
366 Some(fs_metadata),
367 dirstate_node.cached_directory_mtime()?,
367 dirstate_node.cached_directory_mtime()?,
368 is_at_repo_root,
368 is_at_repo_root,
369 )?;
369 )?;
370 self.maybe_save_directory_mtime(
370 self.maybe_save_directory_mtime(
371 children_all_have_dirstate_node_or_are_ignored,
371 children_all_have_dirstate_node_or_are_ignored,
372 fs_metadata,
372 fs_metadata,
373 dirstate_node,
373 dirstate_node,
374 )?
374 )?
375 } else {
375 } else {
376 if file_or_symlink && self.matcher.matches(hg_path) {
376 if file_or_symlink && self.matcher.matches(hg_path) {
377 if let Some(state) = dirstate_node.state()? {
377 if let Some(state) = dirstate_node.state()? {
378 match state {
378 match state {
379 EntryState::Added => self
379 EntryState::Added => self
380 .outcome
380 .outcome
381 .lock()
381 .lock()
382 .unwrap()
382 .unwrap()
383 .added
383 .added
384 .push(hg_path.detach_from_tree()),
384 .push(hg_path.detach_from_tree()),
385 EntryState::Removed => self
385 EntryState::Removed => self
386 .outcome
386 .outcome
387 .lock()
387 .lock()
388 .unwrap()
388 .unwrap()
389 .removed
389 .removed
390 .push(hg_path.detach_from_tree()),
390 .push(hg_path.detach_from_tree()),
391 EntryState::Merged => self
391 EntryState::Merged => self
392 .outcome
392 .outcome
393 .lock()
393 .lock()
394 .unwrap()
394 .unwrap()
395 .modified
395 .modified
396 .push(hg_path.detach_from_tree()),
396 .push(hg_path.detach_from_tree()),
397 EntryState::Normal => self
397 EntryState::Normal => self
398 .handle_normal_file(&dirstate_node, fs_metadata)?,
398 .handle_normal_file(&dirstate_node, fs_metadata)?,
399 }
399 }
400 } else {
400 } else {
401 // `node.entry.is_none()` indicates a "directory"
401 // `node.entry.is_none()` indicates a "directory"
402 // node, but the filesystem has a file
402 // node, but the filesystem has a file
403 self.mark_unknown_or_ignored(
403 self.mark_unknown_or_ignored(
404 has_ignored_ancestor,
404 has_ignored_ancestor,
405 hg_path,
405 hg_path,
406 );
406 );
407 }
407 }
408 }
408 }
409
409
410 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
410 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
411 {
411 {
412 self.traverse_dirstate_only(child_node)?
412 self.traverse_dirstate_only(child_node)?
413 }
413 }
414 }
414 }
415 Ok(())
415 Ok(())
416 }
416 }
417
417
418 fn maybe_save_directory_mtime(
418 fn maybe_save_directory_mtime(
419 &self,
419 &self,
420 children_all_have_dirstate_node_or_are_ignored: bool,
420 children_all_have_dirstate_node_or_are_ignored: bool,
421 directory_metadata: &std::fs::Metadata,
421 directory_metadata: &std::fs::Metadata,
422 dirstate_node: NodeRef<'tree, 'on_disk>,
422 dirstate_node: NodeRef<'tree, 'on_disk>,
423 ) -> Result<(), DirstateV2ParseError> {
423 ) -> Result<(), DirstateV2ParseError> {
424 if children_all_have_dirstate_node_or_are_ignored {
424 if children_all_have_dirstate_node_or_are_ignored {
425 // All filesystem directory entries from `read_dir` have a
425 // All filesystem directory entries from `read_dir` have a
426 // corresponding node in the dirstate, so we can reconstitute the
426 // corresponding node in the dirstate, so we can reconstitute the
427 // names of those entries without calling `read_dir` again.
427 // names of those entries without calling `read_dir` again.
428 if let (Some(status_start), Ok(directory_mtime)) = (
428 if let (Some(status_start), Ok(directory_mtime)) = (
429 &self.filesystem_time_at_status_start,
429 &self.filesystem_time_at_status_start,
430 directory_metadata.modified(),
430 directory_metadata.modified(),
431 ) {
431 ) {
432 // Although the Rust standard library’s `SystemTime` type
432 // Although the Rust standard library’s `SystemTime` type
433 // has nanosecond precision, the times reported for a
433 // has nanosecond precision, the times reported for a
434 // directory’s (or file’s) modified time may have lower
434 // directory’s (or file’s) modified time may have lower
435 // resolution based on the filesystem (for example ext3
435 // resolution based on the filesystem (for example ext3
436 // only stores integer seconds), kernel (see
436 // only stores integer seconds), kernel (see
437 // https://stackoverflow.com/a/14393315/1162888), etc.
437 // https://stackoverflow.com/a/14393315/1162888), etc.
438 if &directory_mtime >= status_start {
438 if &directory_mtime >= status_start {
439 // The directory was modified too recently, don’t cache its
439 // The directory was modified too recently, don’t cache its
440 // `read_dir` results.
440 // `read_dir` results.
441 //
441 //
442 // A timeline like this is possible:
442 // A timeline like this is possible:
443 //
443 //
444 // 1. A change to this directory (direct child was
444 // 1. A change to this directory (direct child was
445 // added or removed) cause its mtime to be set
445 // added or removed) cause its mtime to be set
446 // (possibly truncated) to `directory_mtime`
446 // (possibly truncated) to `directory_mtime`
447 // 2. This `status` algorithm calls `read_dir`
447 // 2. This `status` algorithm calls `read_dir`
448 // 3. An other change is made to the same directory is
448 // 3. An other change is made to the same directory is
449 // made so that calling `read_dir` agin would give
449 // made so that calling `read_dir` agin would give
450 // different results, but soon enough after 1. that
450 // different results, but soon enough after 1. that
451 // the mtime stays the same
451 // the mtime stays the same
452 //
452 //
453 // On a system where the time resolution poor, this
453 // On a system where the time resolution poor, this
454 // scenario is not unlikely if all three steps are caused
454 // scenario is not unlikely if all three steps are caused
455 // by the same script.
455 // by the same script.
456 } else {
456 } else {
457 // We’ve observed (through `status_start`) that time has
457 // We’ve observed (through `status_start`) that time has
458 // “progressed” since `directory_mtime`, so any further
458 // “progressed” since `directory_mtime`, so any further
459 // change to this directory is extremely likely to cause a
459 // change to this directory is extremely likely to cause a
460 // different mtime.
460 // different mtime.
461 //
461 //
462 // Having the same mtime again is not entirely impossible
462 // Having the same mtime again is not entirely impossible
463 // since the system clock is not monotonous. It could jump
463 // since the system clock is not monotonous. It could jump
464 // backward to some point before `directory_mtime`, then a
464 // backward to some point before `directory_mtime`, then a
465 // directory change could potentially happen during exactly
465 // directory change could potentially happen during exactly
466 // the wrong tick.
466 // the wrong tick.
467 //
467 //
468 // We deem this scenario (unlike the previous one) to be
468 // We deem this scenario (unlike the previous one) to be
469 // unlikely enough in practice.
469 // unlikely enough in practice.
470 let truncated = TruncatedTimestamp::from(directory_mtime);
470 let truncated = TruncatedTimestamp::from(directory_mtime);
471 let is_up_to_date = if let Some(cached) =
471 let is_up_to_date = if let Some(cached) =
472 dirstate_node.cached_directory_mtime()?
472 dirstate_node.cached_directory_mtime()?
473 {
473 {
474 cached.likely_equal(truncated)
474 cached.likely_equal(truncated)
475 } else {
475 } else {
476 false
476 false
477 };
477 };
478 if !is_up_to_date {
478 if !is_up_to_date {
479 let hg_path = dirstate_node
479 let hg_path = dirstate_node
480 .full_path_borrowed(self.dmap.on_disk)?
480 .full_path_borrowed(self.dmap.on_disk)?
481 .detach_from_tree();
481 .detach_from_tree();
482 self.new_cachable_directories
482 self.new_cachable_directories
483 .lock()
483 .lock()
484 .unwrap()
484 .unwrap()
485 .push((hg_path, truncated))
485 .push((hg_path, truncated))
486 }
486 }
487 }
487 }
488 }
488 }
489 }
489 }
490 Ok(())
490 Ok(())
491 }
491 }
492
492
493 /// A file with `EntryState::Normal` in the dirstate was found in the
493 /// A file with `EntryState::Normal` in the dirstate was found in the
494 /// filesystem
494 /// filesystem
495 fn handle_normal_file(
495 fn handle_normal_file(
496 &self,
496 &self,
497 dirstate_node: &NodeRef<'tree, 'on_disk>,
497 dirstate_node: &NodeRef<'tree, 'on_disk>,
498 fs_metadata: &std::fs::Metadata,
498 fs_metadata: &std::fs::Metadata,
499 ) -> Result<(), DirstateV2ParseError> {
499 ) -> Result<(), DirstateV2ParseError> {
500 // Keep the low 31 bits
500 // Keep the low 31 bits
501 fn truncate_u64(value: u64) -> i32 {
501 fn truncate_u64(value: u64) -> i32 {
502 (value & 0x7FFF_FFFF) as i32
502 (value & 0x7FFF_FFFF) as i32
503 }
503 }
504
504
505 let entry = dirstate_node
505 let entry = dirstate_node
506 .entry()?
506 .entry()?
507 .expect("handle_normal_file called with entry-less node");
507 .expect("handle_normal_file called with entry-less node");
508 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
508 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
509 let mode_changed =
509 let mode_changed =
510 || self.options.check_exec && entry.mode_changed(fs_metadata);
510 || self.options.check_exec && entry.mode_changed(fs_metadata);
511 let size = entry.size();
511 let size = entry.size();
512 let size_changed = size != truncate_u64(fs_metadata.len());
512 let size_changed = size != truncate_u64(fs_metadata.len());
513 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
513 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
514 // issue6456: Size returned may be longer due to encryption
514 // issue6456: Size returned may be longer due to encryption
515 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
515 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
516 self.outcome
516 self.outcome
517 .lock()
517 .lock()
518 .unwrap()
518 .unwrap()
519 .unsure
519 .unsure
520 .push(hg_path.detach_from_tree())
520 .push(hg_path.detach_from_tree())
521 } else if dirstate_node.has_copy_source()
521 } else if dirstate_node.has_copy_source()
522 || entry.is_from_other_parent()
522 || entry.is_from_other_parent()
523 || (size >= 0 && (size_changed || mode_changed()))
523 || (size >= 0 && (size_changed || mode_changed()))
524 {
524 {
525 self.outcome
525 self.outcome
526 .lock()
526 .lock()
527 .unwrap()
527 .unwrap()
528 .modified
528 .modified
529 .push(hg_path.detach_from_tree())
529 .push(hg_path.detach_from_tree())
530 } else {
530 } else {
531 let mtime_looks_clean;
531 let mtime_looks_clean;
532 if let Some(dirstate_mtime) = entry.truncated_mtime() {
532 if let Some(dirstate_mtime) = entry.truncated_mtime() {
533 let fs_mtime = TruncatedTimestamp::for_mtime_of(fs_metadata)
533 let fs_mtime = TruncatedTimestamp::for_mtime_of(fs_metadata)
534 .expect("OS/libc does not support mtime?")
534 .expect("OS/libc does not support mtime?");
535 // For now don’t use sub-second precision for file mtimes
536 .to_integer_second();
537 mtime_looks_clean = fs_mtime.likely_equal(dirstate_mtime)
535 mtime_looks_clean = fs_mtime.likely_equal(dirstate_mtime)
538 && !fs_mtime.likely_equal(self.options.last_normal_time)
536 && !fs_mtime.likely_equal(self.options.last_normal_time)
539 } else {
537 } else {
540 // No mtime in the dirstate entry
538 // No mtime in the dirstate entry
541 mtime_looks_clean = false
539 mtime_looks_clean = false
542 };
540 };
543 if !mtime_looks_clean {
541 if !mtime_looks_clean {
544 self.outcome
542 self.outcome
545 .lock()
543 .lock()
546 .unwrap()
544 .unwrap()
547 .unsure
545 .unsure
548 .push(hg_path.detach_from_tree())
546 .push(hg_path.detach_from_tree())
549 } else if self.options.list_clean {
547 } else if self.options.list_clean {
550 self.outcome
548 self.outcome
551 .lock()
549 .lock()
552 .unwrap()
550 .unwrap()
553 .clean
551 .clean
554 .push(hg_path.detach_from_tree())
552 .push(hg_path.detach_from_tree())
555 }
553 }
556 }
554 }
557 Ok(())
555 Ok(())
558 }
556 }
559
557
560 /// A node in the dirstate tree has no corresponding filesystem entry
558 /// A node in the dirstate tree has no corresponding filesystem entry
561 fn traverse_dirstate_only(
559 fn traverse_dirstate_only(
562 &self,
560 &self,
563 dirstate_node: NodeRef<'tree, 'on_disk>,
561 dirstate_node: NodeRef<'tree, 'on_disk>,
564 ) -> Result<(), DirstateV2ParseError> {
562 ) -> Result<(), DirstateV2ParseError> {
565 self.check_for_outdated_directory_cache(&dirstate_node)?;
563 self.check_for_outdated_directory_cache(&dirstate_node)?;
566 self.mark_removed_or_deleted_if_file(
564 self.mark_removed_or_deleted_if_file(
567 &dirstate_node.full_path_borrowed(self.dmap.on_disk)?,
565 &dirstate_node.full_path_borrowed(self.dmap.on_disk)?,
568 dirstate_node.state()?,
566 dirstate_node.state()?,
569 );
567 );
570 dirstate_node
568 dirstate_node
571 .children(self.dmap.on_disk)?
569 .children(self.dmap.on_disk)?
572 .par_iter()
570 .par_iter()
573 .map(|child_node| self.traverse_dirstate_only(child_node))
571 .map(|child_node| self.traverse_dirstate_only(child_node))
574 .collect()
572 .collect()
575 }
573 }
576
574
577 /// A node in the dirstate tree has no corresponding *file* on the
575 /// A node in the dirstate tree has no corresponding *file* on the
578 /// filesystem
576 /// filesystem
579 ///
577 ///
580 /// Does nothing on a "directory" node
578 /// Does nothing on a "directory" node
581 fn mark_removed_or_deleted_if_file(
579 fn mark_removed_or_deleted_if_file(
582 &self,
580 &self,
583 hg_path: &BorrowedPath<'tree, 'on_disk>,
581 hg_path: &BorrowedPath<'tree, 'on_disk>,
584 dirstate_node_state: Option<EntryState>,
582 dirstate_node_state: Option<EntryState>,
585 ) {
583 ) {
586 if let Some(state) = dirstate_node_state {
584 if let Some(state) = dirstate_node_state {
587 if self.matcher.matches(hg_path) {
585 if self.matcher.matches(hg_path) {
588 if let EntryState::Removed = state {
586 if let EntryState::Removed = state {
589 self.outcome
587 self.outcome
590 .lock()
588 .lock()
591 .unwrap()
589 .unwrap()
592 .removed
590 .removed
593 .push(hg_path.detach_from_tree())
591 .push(hg_path.detach_from_tree())
594 } else {
592 } else {
595 self.outcome
593 self.outcome
596 .lock()
594 .lock()
597 .unwrap()
595 .unwrap()
598 .deleted
596 .deleted
599 .push(hg_path.detach_from_tree())
597 .push(hg_path.detach_from_tree())
600 }
598 }
601 }
599 }
602 }
600 }
603 }
601 }
604
602
605 /// Something in the filesystem has no corresponding dirstate node
603 /// Something in the filesystem has no corresponding dirstate node
606 ///
604 ///
607 /// Returns whether that path is ignored
605 /// Returns whether that path is ignored
608 fn traverse_fs_only(
606 fn traverse_fs_only(
609 &self,
607 &self,
610 has_ignored_ancestor: bool,
608 has_ignored_ancestor: bool,
611 directory_hg_path: &HgPath,
609 directory_hg_path: &HgPath,
612 fs_entry: &DirEntry,
610 fs_entry: &DirEntry,
613 ) -> bool {
611 ) -> bool {
614 let hg_path = directory_hg_path.join(&fs_entry.base_name);
612 let hg_path = directory_hg_path.join(&fs_entry.base_name);
615 let file_type = fs_entry.metadata.file_type();
613 let file_type = fs_entry.metadata.file_type();
616 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
614 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
617 if file_type.is_dir() {
615 if file_type.is_dir() {
618 let is_ignored =
616 let is_ignored =
619 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
617 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
620 let traverse_children = if is_ignored {
618 let traverse_children = if is_ignored {
621 // Descendants of an ignored directory are all ignored
619 // Descendants of an ignored directory are all ignored
622 self.options.list_ignored
620 self.options.list_ignored
623 } else {
621 } else {
624 // Descendants of an unknown directory may be either unknown or
622 // Descendants of an unknown directory may be either unknown or
625 // ignored
623 // ignored
626 self.options.list_unknown || self.options.list_ignored
624 self.options.list_unknown || self.options.list_ignored
627 };
625 };
628 if traverse_children {
626 if traverse_children {
629 let is_at_repo_root = false;
627 let is_at_repo_root = false;
630 if let Ok(children_fs_entries) = self.read_dir(
628 if let Ok(children_fs_entries) = self.read_dir(
631 &hg_path,
629 &hg_path,
632 &fs_entry.full_path,
630 &fs_entry.full_path,
633 is_at_repo_root,
631 is_at_repo_root,
634 ) {
632 ) {
635 children_fs_entries.par_iter().for_each(|child_fs_entry| {
633 children_fs_entries.par_iter().for_each(|child_fs_entry| {
636 self.traverse_fs_only(
634 self.traverse_fs_only(
637 is_ignored,
635 is_ignored,
638 &hg_path,
636 &hg_path,
639 child_fs_entry,
637 child_fs_entry,
640 );
638 );
641 })
639 })
642 }
640 }
643 }
641 }
644 if self.options.collect_traversed_dirs {
642 if self.options.collect_traversed_dirs {
645 self.outcome.lock().unwrap().traversed.push(hg_path.into())
643 self.outcome.lock().unwrap().traversed.push(hg_path.into())
646 }
644 }
647 is_ignored
645 is_ignored
648 } else {
646 } else {
649 if file_or_symlink {
647 if file_or_symlink {
650 if self.matcher.matches(&hg_path) {
648 if self.matcher.matches(&hg_path) {
651 self.mark_unknown_or_ignored(
649 self.mark_unknown_or_ignored(
652 has_ignored_ancestor,
650 has_ignored_ancestor,
653 &BorrowedPath::InMemory(&hg_path),
651 &BorrowedPath::InMemory(&hg_path),
654 )
652 )
655 } else {
653 } else {
656 // We haven’t computed whether this path is ignored. It
654 // We haven’t computed whether this path is ignored. It
657 // might not be, and a future run of status might have a
655 // might not be, and a future run of status might have a
658 // different matcher that matches it. So treat it as not
656 // different matcher that matches it. So treat it as not
659 // ignored. That is, inhibit readdir caching of the parent
657 // ignored. That is, inhibit readdir caching of the parent
660 // directory.
658 // directory.
661 false
659 false
662 }
660 }
663 } else {
661 } else {
664 // This is neither a directory, a plain file, or a symlink.
662 // This is neither a directory, a plain file, or a symlink.
665 // Treat it like an ignored file.
663 // Treat it like an ignored file.
666 true
664 true
667 }
665 }
668 }
666 }
669 }
667 }
670
668
671 /// Returns whether that path is ignored
669 /// Returns whether that path is ignored
672 fn mark_unknown_or_ignored(
670 fn mark_unknown_or_ignored(
673 &self,
671 &self,
674 has_ignored_ancestor: bool,
672 has_ignored_ancestor: bool,
675 hg_path: &BorrowedPath<'_, 'on_disk>,
673 hg_path: &BorrowedPath<'_, 'on_disk>,
676 ) -> bool {
674 ) -> bool {
677 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
675 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
678 if is_ignored {
676 if is_ignored {
679 if self.options.list_ignored {
677 if self.options.list_ignored {
680 self.outcome
678 self.outcome
681 .lock()
679 .lock()
682 .unwrap()
680 .unwrap()
683 .ignored
681 .ignored
684 .push(hg_path.detach_from_tree())
682 .push(hg_path.detach_from_tree())
685 }
683 }
686 } else {
684 } else {
687 if self.options.list_unknown {
685 if self.options.list_unknown {
688 self.outcome
686 self.outcome
689 .lock()
687 .lock()
690 .unwrap()
688 .unwrap()
691 .unknown
689 .unknown
692 .push(hg_path.detach_from_tree())
690 .push(hg_path.detach_from_tree())
693 }
691 }
694 }
692 }
695 is_ignored
693 is_ignored
696 }
694 }
697 }
695 }
698
696
699 struct DirEntry {
697 struct DirEntry {
700 base_name: HgPathBuf,
698 base_name: HgPathBuf,
701 full_path: PathBuf,
699 full_path: PathBuf,
702 metadata: std::fs::Metadata,
700 metadata: std::fs::Metadata,
703 }
701 }
704
702
705 impl DirEntry {
703 impl DirEntry {
706 /// Returns **unsorted** entries in the given directory, with name and
704 /// Returns **unsorted** entries in the given directory, with name and
707 /// metadata.
705 /// metadata.
708 ///
706 ///
709 /// If a `.hg` sub-directory is encountered:
707 /// If a `.hg` sub-directory is encountered:
710 ///
708 ///
711 /// * At the repository root, ignore that sub-directory
709 /// * At the repository root, ignore that sub-directory
712 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
710 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
713 /// list instead.
711 /// list instead.
714 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
712 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
715 let mut results = Vec::new();
713 let mut results = Vec::new();
716 for entry in path.read_dir()? {
714 for entry in path.read_dir()? {
717 let entry = entry?;
715 let entry = entry?;
718 let metadata = entry.metadata()?;
716 let metadata = entry.metadata()?;
719 let name = get_bytes_from_os_string(entry.file_name());
717 let name = get_bytes_from_os_string(entry.file_name());
720 // FIXME don't do this when cached
718 // FIXME don't do this when cached
721 if name == b".hg" {
719 if name == b".hg" {
722 if is_at_repo_root {
720 if is_at_repo_root {
723 // Skip the repo’s own .hg (might be a symlink)
721 // Skip the repo’s own .hg (might be a symlink)
724 continue;
722 continue;
725 } else if metadata.is_dir() {
723 } else if metadata.is_dir() {
726 // A .hg sub-directory at another location means a subrepo,
724 // A .hg sub-directory at another location means a subrepo,
727 // skip it entirely.
725 // skip it entirely.
728 return Ok(Vec::new());
726 return Ok(Vec::new());
729 }
727 }
730 }
728 }
731 results.push(DirEntry {
729 results.push(DirEntry {
732 base_name: name.into(),
730 base_name: name.into(),
733 full_path: entry.path(),
731 full_path: entry.path(),
734 metadata,
732 metadata,
735 })
733 })
736 }
734 }
737 Ok(results)
735 Ok(results)
738 }
736 }
739 }
737 }
740
738
741 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
739 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
742 /// of the give repository.
740 /// of the give repository.
743 ///
741 ///
744 /// This is similar to `SystemTime::now()`, with the result truncated to the
742 /// This is similar to `SystemTime::now()`, with the result truncated to the
745 /// same time resolution as other files’ modification times. Using `.hg`
743 /// same time resolution as other files’ modification times. Using `.hg`
746 /// instead of the system’s default temporary directory (such as `/tmp`) makes
744 /// instead of the system’s default temporary directory (such as `/tmp`) makes
747 /// it more likely the temporary file is in the same disk partition as contents
745 /// it more likely the temporary file is in the same disk partition as contents
748 /// of the working directory, which can matter since different filesystems may
746 /// of the working directory, which can matter since different filesystems may
749 /// store timestamps with different resolutions.
747 /// store timestamps with different resolutions.
750 ///
748 ///
751 /// This may fail, typically if we lack write permissions. In that case we
749 /// This may fail, typically if we lack write permissions. In that case we
752 /// should continue the `status()` algoritm anyway and consider the current
750 /// should continue the `status()` algoritm anyway and consider the current
753 /// date/time to be unknown.
751 /// date/time to be unknown.
754 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
752 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
755 tempfile::tempfile_in(repo_root.join(".hg"))?
753 tempfile::tempfile_in(repo_root.join(".hg"))?
756 .metadata()?
754 .metadata()?
757 .modified()
755 .modified()
758 }
756 }
General Comments 0
You need to be logged in to leave comments. Login now