##// END OF EJS Templates
dirstate-v2: actually use sub-second mtime precision...
Simon Sapin -
r49082:83d0bd45 default
parent child Browse files
Show More
@@ -1,77 +1,87
1 1 # Copyright Mercurial Contributors
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 from __future__ import absolute_import
7 7
8 8 import functools
9 9 import stat
10 10
11 11
12 12 rangemask = 0x7FFFFFFF
13 13
14 14
15 15 @functools.total_ordering
16 16 class timestamp(tuple):
17 17 """
18 18 A Unix timestamp with optional nanoseconds precision,
19 19 modulo 2**31 seconds.
20 20
21 21 A 2-tuple containing:
22 22
23 23 `truncated_seconds`: seconds since the Unix epoch,
24 24 truncated to its lower 31 bits
25 25
26 26 `subsecond_nanoseconds`: number of nanoseconds since `truncated_seconds`.
27 27 When this is zero, the sub-second precision is considered unknown.
28 28 """
29 29
30 30 def __new__(cls, value):
31 31 truncated_seconds, subsec_nanos = value
32 32 value = (truncated_seconds & rangemask, subsec_nanos)
33 33 return super(timestamp, cls).__new__(cls, value)
34 34
35 35 def __eq__(self, other):
36 36 self_secs, self_subsec_nanos = self
37 37 other_secs, other_subsec_nanos = other
38 38 return self_secs == other_secs and (
39 39 self_subsec_nanos == other_subsec_nanos
40 40 or self_subsec_nanos == 0
41 41 or other_subsec_nanos == 0
42 42 )
43 43
44 44 def __gt__(self, other):
45 45 self_secs, self_subsec_nanos = self
46 46 other_secs, other_subsec_nanos = other
47 47 if self_secs > other_secs:
48 48 return True
49 49 if self_secs < other_secs:
50 50 return False
51 51 if self_subsec_nanos == 0 or other_subsec_nanos == 0:
52 52 # they are considered equal, so not "greater than"
53 53 return False
54 54 return self_subsec_nanos > other_subsec_nanos
55 55
56 56
57 57 def zero():
58 58 """
59 59 Returns the `timestamp` at the Unix epoch.
60 60 """
61 61 return tuple.__new__(timestamp, (0, 0))
62 62
63 63
64 64 def mtime_of(stat_result):
65 65 """
66 66 Takes an `os.stat_result`-like object and returns a `timestamp` object
67 67 for its modification time.
68 68 """
69 # https://docs.python.org/2/library/os.html#os.stat_float_times
70 # "For compatibility with older Python versions,
71 # accessing stat_result as a tuple always returns integers."
72 secs = stat_result[stat.ST_MTIME]
69 try:
70 # TODO: add this attribute to `osutil.stat` objects,
71 # see `mercurial/cext/osutil.c`.
72 #
73 # This attribute is also not available on Python 2.
74 nanos = stat_result.st_mtime_ns
75 except AttributeError:
76 # https://docs.python.org/2/library/os.html#os.stat_float_times
77 # "For compatibility with older Python versions,
78 # accessing stat_result as a tuple always returns integers."
79 secs = stat_result[stat.ST_MTIME]
73 80
74 # For now
75 subsec_nanos = 0
81 subsec_nanos = 0
82 else:
83 billion = int(1e9)
84 secs = nanos // billion
85 subsec_nanos = nanos % billion
76 86
77 87 return timestamp((secs, subsec_nanos))
@@ -1,414 +1,411
1 1 # v2.py - Pure-Python implementation of the dirstate-v2 file format
2 2 #
3 3 # Copyright Mercurial Contributors
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11
12 12 from ..thirdparty import attr
13 13 from .. import error, policy
14 14
15 15 parsers = policy.importmod('parsers')
16 16
17 17
18 18 # Must match the constant of the same name in
19 19 # `rust/hg-core/src/dirstate_tree/on_disk.rs`
20 20 TREE_METADATA_SIZE = 44
21 21 NODE_SIZE = 44
22 22
23 23
24 24 # Must match the `TreeMetadata` Rust struct in
25 25 # `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.
26 26 #
27 27 # * 4 bytes: start offset of root nodes
28 28 # * 4 bytes: number of root nodes
29 29 # * 4 bytes: total number of nodes in the tree that have an entry
30 30 # * 4 bytes: total number of nodes in the tree that have a copy source
31 31 # * 4 bytes: number of bytes in the data file that are not used anymore
32 32 # * 4 bytes: unused
33 33 # * 20 bytes: SHA-1 hash of ignore patterns
34 34 TREE_METADATA = struct.Struct('>LLLLL4s20s')
35 35
36 36
37 37 # Must match the `Node` Rust struct in
38 38 # `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.
39 39 #
40 40 # * 4 bytes: start offset of full path
41 41 # * 2 bytes: length of the full path
42 42 # * 2 bytes: length within the full path before its "base name"
43 43 # * 4 bytes: start offset of the copy source if any, or zero for no copy source
44 44 # * 2 bytes: length of the copy source if any, or unused
45 45 # * 4 bytes: start offset of child nodes
46 46 # * 4 bytes: number of child nodes
47 47 # * 4 bytes: number of descendant nodes that have an entry
48 48 # * 4 bytes: number of descendant nodes that have a "tracked" state
49 49 # * 1 byte: flags
50 50 # * 4 bytes: expected size
51 51 # * 4 bytes: mtime seconds
52 52 # * 4 bytes: mtime nanoseconds
53 53 NODE = struct.Struct('>LHHLHLLLLHlll')
54 54
55 55
56 56 assert TREE_METADATA_SIZE == TREE_METADATA.size
57 57 assert NODE_SIZE == NODE.size
58 58
59 59
60 60 def parse_dirstate(map, copy_map, data, tree_metadata):
61 61 """parse a full v2-dirstate from a binary data into dictionnaries:
62 62
63 63 - map: a {path: entry} mapping that will be filled
64 64 - copy_map: a {path: copy-source} mapping that will be filled
65 65 - data: a binary blob contains v2 nodes data
66 66 - tree_metadata:: a binary blob of the top level node (from the docket)
67 67 """
68 68 (
69 69 root_nodes_start,
70 70 root_nodes_len,
71 71 _nodes_with_entry_count,
72 72 _nodes_with_copy_source_count,
73 73 _unreachable_bytes,
74 74 _unused,
75 75 _ignore_patterns_hash,
76 76 ) = TREE_METADATA.unpack(tree_metadata)
77 77 parse_nodes(map, copy_map, data, root_nodes_start, root_nodes_len)
78 78
79 79
80 80 def parse_nodes(map, copy_map, data, start, len):
81 81 """parse <len> nodes from <data> starting at offset <start>
82 82
83 83 This is used by parse_dirstate to recursively fill `map` and `copy_map`.
84 84
85 85 All directory specific information is ignored and do not need any
86 86 processing (HAS_DIRECTORY_MTIME, ALL_UNKNOWN_RECORDED, ALL_IGNORED_RECORDED)
87 87 """
88 88 for i in range(len):
89 89 node_start = start + NODE_SIZE * i
90 90 node_bytes = slice_with_len(data, node_start, NODE_SIZE)
91 91 (
92 92 path_start,
93 93 path_len,
94 94 _basename_start,
95 95 copy_source_start,
96 96 copy_source_len,
97 97 children_start,
98 98 children_count,
99 99 _descendants_with_entry_count,
100 100 _tracked_descendants_count,
101 101 flags,
102 102 size,
103 103 mtime_s,
104 _mtime_ns,
104 mtime_ns,
105 105 ) = NODE.unpack(node_bytes)
106 106
107 107 # Parse child nodes of this node recursively
108 108 parse_nodes(map, copy_map, data, children_start, children_count)
109 109
110 # Don’t yet use sub-second precision if it exists in the file,
111 # since other parts of the code still set it to zero.
112 mtime_ns = 0
113 110 item = parsers.DirstateItem.from_v2_data(flags, size, mtime_s, mtime_ns)
114 111 if not item.any_tracked:
115 112 continue
116 113 path = slice_with_len(data, path_start, path_len)
117 114 map[path] = item
118 115 if copy_source_start:
119 116 copy_map[path] = slice_with_len(
120 117 data, copy_source_start, copy_source_len
121 118 )
122 119
123 120
124 121 def slice_with_len(data, start, len):
125 122 return data[start : start + len]
126 123
127 124
128 125 @attr.s
129 126 class Node(object):
130 127 path = attr.ib()
131 128 entry = attr.ib()
132 129 parent = attr.ib(default=None)
133 130 children_count = attr.ib(default=0)
134 131 children_offset = attr.ib(default=0)
135 132 descendants_with_entry = attr.ib(default=0)
136 133 tracked_descendants = attr.ib(default=0)
137 134
138 135 def pack(self, copy_map, paths_offset):
139 136 path = self.path
140 137 copy = copy_map.get(path)
141 138 entry = self.entry
142 139
143 140 path_start = paths_offset
144 141 path_len = len(path)
145 142 basename_start = path.rfind(b'/') + 1 # 0 if rfind returns -1
146 143 if copy is not None:
147 144 copy_source_start = paths_offset + len(path)
148 145 copy_source_len = len(copy)
149 146 else:
150 147 copy_source_start = 0
151 148 copy_source_len = 0
152 149 if entry is not None:
153 150 flags, size, mtime_s, mtime_ns = entry.v2_data()
154 151 else:
155 152 # There are no mtime-cached directories in the Python implementation
156 153 flags = 0
157 154 size = 0
158 155 mtime_s = 0
159 156 mtime_ns = 0
160 157 return NODE.pack(
161 158 path_start,
162 159 path_len,
163 160 basename_start,
164 161 copy_source_start,
165 162 copy_source_len,
166 163 self.children_offset,
167 164 self.children_count,
168 165 self.descendants_with_entry,
169 166 self.tracked_descendants,
170 167 flags,
171 168 size,
172 169 mtime_s,
173 170 mtime_ns,
174 171 )
175 172
176 173
177 174 def pack_dirstate(map, copy_map, now):
178 175 """
179 176 Pack `map` and `copy_map` into the dirstate v2 binary format and return
180 177 the bytearray.
181 178 `now` is a timestamp of the current filesystem time used to detect race
182 179 conditions in writing the dirstate to disk, see inline comment.
183 180
184 181 The on-disk format expects a tree-like structure where the leaves are
185 182 written first (and sorted per-directory), going up levels until the root
186 183 node and writing that one to the docket. See more details on the on-disk
187 184 format in `mercurial/helptext/internals/dirstate-v2`.
188 185
189 186 Since both `map` and `copy_map` are flat dicts we need to figure out the
190 187 hierarchy. This algorithm does so without having to build the entire tree
191 188 in-memory: it only keeps the minimum number of nodes around to satisfy the
192 189 format.
193 190
194 191 # Algorithm explanation
195 192
196 193 This explanation does not talk about the different counters for tracked
197 194 descendents and storing the copies, but that work is pretty simple once this
198 195 algorithm is in place.
199 196
200 197 ## Building a subtree
201 198
202 199 First, sort `map`: this makes it so the leaves of the tree are contiguous
203 200 per directory (i.e. a/b/c and a/b/d will be next to each other in the list),
204 201 and enables us to use the ordering of folders to have a "cursor" of the
205 202 current folder we're in without ever going twice in the same branch of the
206 203 tree. The cursor is a node that remembers its parent and any information
207 204 relevant to the format (see the `Node` class), building the relevant part
208 205 of the tree lazily.
209 206 Then, for each file in `map`, move the cursor into the tree to the
210 207 corresponding folder of the file: for example, if the very first file
211 208 is "a/b/c", we start from `Node[""]`, create `Node["a"]` which points to
212 209 its parent `Node[""]`, then create `Node["a/b"]`, which points to its parent
213 210 `Node["a"]`. These nodes are kept around in a stack.
214 211 If the next file in `map` is in the same subtree ("a/b/d" or "a/b/e/f"), we
215 212 add it to the stack and keep looping with the same logic of creating the
216 213 tree nodes as needed. If however the next file in `map` is *not* in the same
217 214 subtree ("a/other", if we're still in the "a/b" folder), then we know that
218 215 the subtree we're in is complete.
219 216
220 217 ## Writing the subtree
221 218
222 219 We have the entire subtree in the stack, so we start writing it to disk
223 220 folder by folder. The way we write a folder is to pop the stack into a list
224 221 until the folder changes, revert this list of direct children (to satisfy
225 222 the format requirement that children be sorted). This process repeats until
226 223 we hit the "other" subtree.
227 224
228 225 An example:
229 226 a
230 227 dir1/b
231 228 dir1/c
232 229 dir2/dir3/d
233 230 dir2/dir3/e
234 231 dir2/f
235 232
236 233 Would have us:
237 234 - add to the stack until "dir2/dir3/e"
238 235 - realize that "dir2/f" is in a different subtree
239 236 - pop "dir2/dir3/e", "dir2/dir3/d", reverse them so they're sorted and
240 237 pack them since the next entry is "dir2/dir3"
241 238 - go back up to "dir2"
242 239 - add "dir2/f" to the stack
243 240 - realize we're done with the map
244 241 - pop "dir2/f", "dir2/dir3" from the stack, reverse and pack them
245 242 - go up to the root node, do the same to write "a", "dir1" and "dir2" in
246 243 that order
247 244
248 245 ## Special case for the root node
249 246
250 247 The root node is not serialized in the format, but its information is
251 248 written to the docket. Again, see more details on the on-disk format in
252 249 `mercurial/helptext/internals/dirstate-v2`.
253 250 """
254 251 data = bytearray()
255 252 root_nodes_start = 0
256 253 root_nodes_len = 0
257 254 nodes_with_entry_count = 0
258 255 nodes_with_copy_source_count = 0
259 256 # Will always be 0 since this implementation always re-writes everything
260 257 # to disk
261 258 unreachable_bytes = 0
262 259 unused = b'\x00' * 4
263 260 # This is an optimization that's only useful for the Rust implementation
264 261 ignore_patterns_hash = b'\x00' * 20
265 262
266 263 if len(map) == 0:
267 264 tree_metadata = TREE_METADATA.pack(
268 265 root_nodes_start,
269 266 root_nodes_len,
270 267 nodes_with_entry_count,
271 268 nodes_with_copy_source_count,
272 269 unreachable_bytes,
273 270 unused,
274 271 ignore_patterns_hash,
275 272 )
276 273 return data, tree_metadata
277 274
278 275 sorted_map = sorted(map.items(), key=lambda x: x[0])
279 276
280 277 # Use a stack to not have to only remember the nodes we currently need
281 278 # instead of building the entire tree in memory
282 279 stack = []
283 280 current_node = Node(b"", None)
284 281 stack.append(current_node)
285 282
286 283 for index, (path, entry) in enumerate(sorted_map, 1):
287 284 if entry.need_delay(now):
288 285 # The file was last modified "simultaneously" with the current
289 286 # write to dirstate (i.e. within the same second for file-
290 287 # systems with a granularity of 1 sec). This commonly happens
291 288 # for at least a couple of files on 'update'.
292 289 # The user could change the file without changing its size
293 290 # within the same second. Invalidate the file's mtime in
294 291 # dirstate, forcing future 'status' calls to compare the
295 292 # contents of the file if the size is the same. This prevents
296 293 # mistakenly treating such files as clean.
297 294 entry.set_possibly_dirty()
298 295 nodes_with_entry_count += 1
299 296 if path in copy_map:
300 297 nodes_with_copy_source_count += 1
301 298 current_folder = get_folder(path)
302 299 current_node = move_to_correct_node_in_tree(
303 300 current_folder, current_node, stack
304 301 )
305 302
306 303 current_node.children_count += 1
307 304 # Entries from `map` are never `None`
308 305 if entry.tracked:
309 306 current_node.tracked_descendants += 1
310 307 current_node.descendants_with_entry += 1
311 308 stack.append(Node(path, entry, current_node))
312 309
313 310 should_pack = True
314 311 next_path = None
315 312 if index < len(sorted_map):
316 313 # Determine if the next entry is in the same sub-tree, if so don't
317 314 # pack yet
318 315 next_path = sorted_map[index][0]
319 316 should_pack = not get_folder(next_path).startswith(current_folder)
320 317 if should_pack:
321 318 pack_directory_children(current_node, copy_map, data, stack)
322 319 while stack and current_node.path != b"":
323 320 # Go up the tree and write until we reach the folder of the next
324 321 # entry (if any, otherwise the root)
325 322 parent = current_node.parent
326 323 in_parent_folder_of_next_entry = next_path is not None and (
327 324 get_folder(next_path).startswith(get_folder(stack[-1].path))
328 325 )
329 326 if parent is None or in_parent_folder_of_next_entry:
330 327 break
331 328 pack_directory_children(parent, copy_map, data, stack)
332 329 current_node = parent
333 330
334 331 # Special case for the root node since we don't write it to disk, only its
335 332 # children to the docket
336 333 current_node = stack.pop()
337 334 assert current_node.path == b"", current_node.path
338 335 assert len(stack) == 0, len(stack)
339 336
340 337 tree_metadata = TREE_METADATA.pack(
341 338 current_node.children_offset,
342 339 current_node.children_count,
343 340 nodes_with_entry_count,
344 341 nodes_with_copy_source_count,
345 342 unreachable_bytes,
346 343 unused,
347 344 ignore_patterns_hash,
348 345 )
349 346
350 347 return data, tree_metadata
351 348
352 349
353 350 def get_folder(path):
354 351 """
355 352 Return the folder of the path that's given, an empty string for root paths.
356 353 """
357 354 return path.rsplit(b'/', 1)[0] if b'/' in path else b''
358 355
359 356
360 357 def move_to_correct_node_in_tree(target_folder, current_node, stack):
361 358 """
362 359 Move inside the dirstate node tree to the node corresponding to
363 360 `target_folder`, creating the missing nodes along the way if needed.
364 361 """
365 362 while target_folder != current_node.path:
366 363 if target_folder.startswith(current_node.path):
367 364 # We need to go down a folder
368 365 prefix = target_folder[len(current_node.path) :].lstrip(b'/')
369 366 subfolder_name = prefix.split(b'/', 1)[0]
370 367 if current_node.path:
371 368 subfolder_path = current_node.path + b'/' + subfolder_name
372 369 else:
373 370 subfolder_path = subfolder_name
374 371 next_node = stack[-1]
375 372 if next_node.path == target_folder:
376 373 # This folder is now a file and only contains removed entries
377 374 # merge with the last node
378 375 current_node = next_node
379 376 else:
380 377 current_node.children_count += 1
381 378 current_node = Node(subfolder_path, None, current_node)
382 379 stack.append(current_node)
383 380 else:
384 381 # We need to go up a folder
385 382 current_node = current_node.parent
386 383 return current_node
387 384
388 385
389 386 def pack_directory_children(node, copy_map, data, stack):
390 387 """
391 388 Write the binary representation of the direct sorted children of `node` to
392 389 `data`
393 390 """
394 391 direct_children = []
395 392
396 393 while stack[-1].path != b"" and get_folder(stack[-1].path) == node.path:
397 394 direct_children.append(stack.pop())
398 395 if not direct_children:
399 396 raise error.ProgrammingError(b"no direct children for %r" % node.path)
400 397
401 398 # Reverse the stack to get the correct sorted order
402 399 direct_children.reverse()
403 400 packed_children = bytearray()
404 401 # Write the paths to `data`. Pack child nodes but don't write them yet
405 402 for child in direct_children:
406 403 packed = child.pack(copy_map=copy_map, paths_offset=len(data))
407 404 packed_children.extend(packed)
408 405 data.extend(child.path)
409 406 data.extend(copy_map.get(child.path, b""))
410 407 node.tracked_descendants += child.tracked_descendants
411 408 node.descendants_with_entry += child.descendants_with_entry
412 409 # Write the fixed-size child nodes all together
413 410 node.children_offset = len(data)
414 411 data.extend(packed_children)
@@ -1,648 +1,643
1 1 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
2 2 use crate::errors::HgError;
3 3 use bitflags::bitflags;
4 4 use std::convert::{TryFrom, TryInto};
5 5 use std::fs;
6 6 use std::io;
7 7 use std::time::{SystemTime, UNIX_EPOCH};
8 8
9 9 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
10 10 pub enum EntryState {
11 11 Normal,
12 12 Added,
13 13 Removed,
14 14 Merged,
15 15 }
16 16
17 17 /// `size` and `mtime.seconds` are truncated to 31 bits.
18 18 ///
19 19 /// TODO: double-check status algorithm correctness for files
20 20 /// larger than 2 GiB or modified after 2038.
21 21 #[derive(Debug, Copy, Clone)]
22 22 pub struct DirstateEntry {
23 23 pub(crate) flags: Flags,
24 24 mode_size: Option<(u32, u32)>,
25 25 mtime: Option<TruncatedTimestamp>,
26 26 }
27 27
28 28 bitflags! {
29 29 pub(crate) struct Flags: u8 {
30 30 const WDIR_TRACKED = 1 << 0;
31 31 const P1_TRACKED = 1 << 1;
32 32 const P2_INFO = 1 << 2;
33 33 const HAS_FALLBACK_EXEC = 1 << 3;
34 34 const FALLBACK_EXEC = 1 << 4;
35 35 const HAS_FALLBACK_SYMLINK = 1 << 5;
36 36 const FALLBACK_SYMLINK = 1 << 6;
37 37 }
38 38 }
39 39
40 40 /// A Unix timestamp with nanoseconds precision
41 41 #[derive(Debug, Copy, Clone)]
42 42 pub struct TruncatedTimestamp {
43 43 truncated_seconds: u32,
44 44 /// Always in the `0 .. 1_000_000_000` range.
45 45 nanoseconds: u32,
46 46 }
47 47
48 48 impl TruncatedTimestamp {
49 49 /// Constructs from a timestamp potentially outside of the supported range,
50 50 /// and truncate the seconds components to its lower 31 bits.
51 51 ///
52 52 /// Panics if the nanoseconds components is not in the expected range.
53 53 pub fn new_truncate(seconds: i64, nanoseconds: u32) -> Self {
54 54 assert!(nanoseconds < NSEC_PER_SEC);
55 55 Self {
56 56 truncated_seconds: seconds as u32 & RANGE_MASK_31BIT,
57 57 nanoseconds,
58 58 }
59 59 }
60 60
61 61 /// Construct from components. Returns an error if they are not in the
62 62 /// expcted range.
63 63 pub fn from_already_truncated(
64 64 truncated_seconds: u32,
65 65 nanoseconds: u32,
66 66 ) -> Result<Self, DirstateV2ParseError> {
67 67 if truncated_seconds & !RANGE_MASK_31BIT == 0
68 68 && nanoseconds < NSEC_PER_SEC
69 69 {
70 70 Ok(Self {
71 71 truncated_seconds,
72 72 nanoseconds,
73 73 })
74 74 } else {
75 75 Err(DirstateV2ParseError)
76 76 }
77 77 }
78 78
79 79 pub fn for_mtime_of(metadata: &fs::Metadata) -> io::Result<Self> {
80 80 #[cfg(unix)]
81 81 {
82 82 use std::os::unix::fs::MetadataExt;
83 83 let seconds = metadata.mtime();
84 84 // i64 -> u32 with value always in the `0 .. NSEC_PER_SEC` range
85 85 let nanoseconds = metadata.mtime_nsec().try_into().unwrap();
86 86 Ok(Self::new_truncate(seconds, nanoseconds))
87 87 }
88 88 #[cfg(not(unix))]
89 89 {
90 90 metadata.modified().map(Self::from)
91 91 }
92 92 }
93 93
94 pub fn to_integer_second(mut self) -> Self {
95 self.nanoseconds = 0;
96 self
97 }
98
99 94 /// The lower 31 bits of the number of seconds since the epoch.
100 95 pub fn truncated_seconds(&self) -> u32 {
101 96 self.truncated_seconds
102 97 }
103 98
104 99 /// The sub-second component of this timestamp, in nanoseconds.
105 100 /// Always in the `0 .. 1_000_000_000` range.
106 101 ///
107 102 /// This timestamp is after `(seconds, 0)` by this many nanoseconds.
108 103 pub fn nanoseconds(&self) -> u32 {
109 104 self.nanoseconds
110 105 }
111 106
112 107 /// Returns whether two timestamps are equal modulo 2**31 seconds.
113 108 ///
114 109 /// If this returns `true`, the original values converted from `SystemTime`
115 110 /// or given to `new_truncate` were very likely equal. A false positive is
116 111 /// possible if they were exactly a multiple of 2**31 seconds apart (around
117 112 /// 68 years). This is deemed very unlikely to happen by chance, especially
118 113 /// on filesystems that support sub-second precision.
119 114 ///
120 115 /// If someone is manipulating the modification times of some files to
121 116 /// intentionally make `hg status` return incorrect results, not truncating
122 117 /// wouldn’t help much since they can set exactly the expected timestamp.
123 118 ///
124 119 /// Sub-second precision is ignored if it is zero in either value.
125 120 /// Some APIs simply return zero when more precision is not available.
126 121 /// When comparing values from different sources, if only one is truncated
127 122 /// in that way, doing a simple comparison would cause many false
128 123 /// negatives.
129 124 pub fn likely_equal(self, other: Self) -> bool {
130 125 self.truncated_seconds == other.truncated_seconds
131 126 && (self.nanoseconds == other.nanoseconds
132 127 || self.nanoseconds == 0
133 128 || other.nanoseconds == 0)
134 129 }
135 130
136 131 pub fn likely_equal_to_mtime_of(
137 132 self,
138 133 metadata: &fs::Metadata,
139 134 ) -> io::Result<bool> {
140 135 Ok(self.likely_equal(Self::for_mtime_of(metadata)?))
141 136 }
142 137 }
143 138
144 139 impl From<SystemTime> for TruncatedTimestamp {
145 140 fn from(system_time: SystemTime) -> Self {
146 141 // On Unix, `SystemTime` is a wrapper for the `timespec` C struct:
147 142 // https://www.gnu.org/software/libc/manual/html_node/Time-Types.html#index-struct-timespec
148 143 // We want to effectively access its fields, but the Rust standard
149 144 // library does not expose them. The best we can do is:
150 145 let seconds;
151 146 let nanoseconds;
152 147 match system_time.duration_since(UNIX_EPOCH) {
153 148 Ok(duration) => {
154 149 seconds = duration.as_secs() as i64;
155 150 nanoseconds = duration.subsec_nanos();
156 151 }
157 152 Err(error) => {
158 153 // `system_time` is before `UNIX_EPOCH`.
159 154 // We need to undo this algorithm:
160 155 // https://github.com/rust-lang/rust/blob/6bed1f0bc3cc50c10aab26d5f94b16a00776b8a5/library/std/src/sys/unix/time.rs#L40-L41
161 156 let negative = error.duration();
162 157 let negative_secs = negative.as_secs() as i64;
163 158 let negative_nanos = negative.subsec_nanos();
164 159 if negative_nanos == 0 {
165 160 seconds = -negative_secs;
166 161 nanoseconds = 0;
167 162 } else {
168 163 // For example if `system_time` was 4.3 seconds before
169 164 // the Unix epoch we get a Duration that represents
170 165 // `(-4, -0.3)` but we want `(-5, +0.7)`:
171 166 seconds = -1 - negative_secs;
172 167 nanoseconds = NSEC_PER_SEC - negative_nanos;
173 168 }
174 169 }
175 170 };
176 171 Self::new_truncate(seconds, nanoseconds)
177 172 }
178 173 }
179 174
180 175 const NSEC_PER_SEC: u32 = 1_000_000_000;
181 176 const RANGE_MASK_31BIT: u32 = 0x7FFF_FFFF;
182 177
183 178 pub const MTIME_UNSET: i32 = -1;
184 179
185 180 /// A `DirstateEntry` with a size of `-2` means that it was merged from the
186 181 /// other parent. This allows revert to pick the right status back during a
187 182 /// merge.
188 183 pub const SIZE_FROM_OTHER_PARENT: i32 = -2;
189 184 /// A special value used for internal representation of special case in
190 185 /// dirstate v1 format.
191 186 pub const SIZE_NON_NORMAL: i32 = -1;
192 187
193 188 impl DirstateEntry {
194 189 pub fn from_v2_data(
195 190 wdir_tracked: bool,
196 191 p1_tracked: bool,
197 192 p2_info: bool,
198 193 mode_size: Option<(u32, u32)>,
199 194 mtime: Option<TruncatedTimestamp>,
200 195 fallback_exec: Option<bool>,
201 196 fallback_symlink: Option<bool>,
202 197 ) -> Self {
203 198 if let Some((mode, size)) = mode_size {
204 199 // TODO: return an error for out of range values?
205 200 assert!(mode & !RANGE_MASK_31BIT == 0);
206 201 assert!(size & !RANGE_MASK_31BIT == 0);
207 202 }
208 203 let mut flags = Flags::empty();
209 204 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
210 205 flags.set(Flags::P1_TRACKED, p1_tracked);
211 206 flags.set(Flags::P2_INFO, p2_info);
212 207 if let Some(exec) = fallback_exec {
213 208 flags.insert(Flags::HAS_FALLBACK_EXEC);
214 209 if exec {
215 210 flags.insert(Flags::FALLBACK_EXEC);
216 211 }
217 212 }
218 213 if let Some(exec) = fallback_symlink {
219 214 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
220 215 if exec {
221 216 flags.insert(Flags::FALLBACK_SYMLINK);
222 217 }
223 218 }
224 219 Self {
225 220 flags,
226 221 mode_size,
227 222 mtime,
228 223 }
229 224 }
230 225
231 226 pub fn from_v1_data(
232 227 state: EntryState,
233 228 mode: i32,
234 229 size: i32,
235 230 mtime: i32,
236 231 ) -> Self {
237 232 match state {
238 233 EntryState::Normal => {
239 234 if size == SIZE_FROM_OTHER_PARENT {
240 235 Self {
241 236 // might be missing P1_TRACKED
242 237 flags: Flags::WDIR_TRACKED | Flags::P2_INFO,
243 238 mode_size: None,
244 239 mtime: None,
245 240 }
246 241 } else if size == SIZE_NON_NORMAL {
247 242 Self {
248 243 flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
249 244 mode_size: None,
250 245 mtime: None,
251 246 }
252 247 } else if mtime == MTIME_UNSET {
253 248 // TODO: return an error for negative values?
254 249 let mode = u32::try_from(mode).unwrap();
255 250 let size = u32::try_from(size).unwrap();
256 251 Self {
257 252 flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
258 253 mode_size: Some((mode, size)),
259 254 mtime: None,
260 255 }
261 256 } else {
262 257 // TODO: return an error for negative values?
263 258 let mode = u32::try_from(mode).unwrap();
264 259 let size = u32::try_from(size).unwrap();
265 260 let mtime = u32::try_from(mtime).unwrap();
266 261 let mtime =
267 262 TruncatedTimestamp::from_already_truncated(mtime, 0)
268 263 .unwrap();
269 264 Self {
270 265 flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
271 266 mode_size: Some((mode, size)),
272 267 mtime: Some(mtime),
273 268 }
274 269 }
275 270 }
276 271 EntryState::Added => Self {
277 272 flags: Flags::WDIR_TRACKED,
278 273 mode_size: None,
279 274 mtime: None,
280 275 },
281 276 EntryState::Removed => Self {
282 277 flags: if size == SIZE_NON_NORMAL {
283 278 Flags::P1_TRACKED | Flags::P2_INFO
284 279 } else if size == SIZE_FROM_OTHER_PARENT {
285 280 // We don’t know if P1_TRACKED should be set (file history)
286 281 Flags::P2_INFO
287 282 } else {
288 283 Flags::P1_TRACKED
289 284 },
290 285 mode_size: None,
291 286 mtime: None,
292 287 },
293 288 EntryState::Merged => Self {
294 289 flags: Flags::WDIR_TRACKED
295 290 | Flags::P1_TRACKED // might not be true because of rename ?
296 291 | Flags::P2_INFO, // might not be true because of rename ?
297 292 mode_size: None,
298 293 mtime: None,
299 294 },
300 295 }
301 296 }
302 297
303 298 /// Creates a new entry in "removed" state.
304 299 ///
305 300 /// `size` is expected to be zero, `SIZE_NON_NORMAL`, or
306 301 /// `SIZE_FROM_OTHER_PARENT`
307 302 pub fn new_removed(size: i32) -> Self {
308 303 Self::from_v1_data(EntryState::Removed, 0, size, 0)
309 304 }
310 305
311 306 pub fn tracked(&self) -> bool {
312 307 self.flags.contains(Flags::WDIR_TRACKED)
313 308 }
314 309
315 310 pub fn p1_tracked(&self) -> bool {
316 311 self.flags.contains(Flags::P1_TRACKED)
317 312 }
318 313
319 314 fn in_either_parent(&self) -> bool {
320 315 self.flags.intersects(Flags::P1_TRACKED | Flags::P2_INFO)
321 316 }
322 317
323 318 pub fn removed(&self) -> bool {
324 319 self.in_either_parent() && !self.flags.contains(Flags::WDIR_TRACKED)
325 320 }
326 321
327 322 pub fn p2_info(&self) -> bool {
328 323 self.flags.contains(Flags::WDIR_TRACKED | Flags::P2_INFO)
329 324 }
330 325
331 326 pub fn added(&self) -> bool {
332 327 self.flags.contains(Flags::WDIR_TRACKED) && !self.in_either_parent()
333 328 }
334 329
335 330 pub fn maybe_clean(&self) -> bool {
336 331 if !self.flags.contains(Flags::WDIR_TRACKED) {
337 332 false
338 333 } else if !self.flags.contains(Flags::P1_TRACKED) {
339 334 false
340 335 } else if self.flags.contains(Flags::P2_INFO) {
341 336 false
342 337 } else {
343 338 true
344 339 }
345 340 }
346 341
347 342 pub fn any_tracked(&self) -> bool {
348 343 self.flags.intersects(
349 344 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
350 345 )
351 346 }
352 347
353 348 /// Returns `(wdir_tracked, p1_tracked, p2_info, mode_size, mtime)`
354 349 pub(crate) fn v2_data(
355 350 &self,
356 351 ) -> (
357 352 bool,
358 353 bool,
359 354 bool,
360 355 Option<(u32, u32)>,
361 356 Option<TruncatedTimestamp>,
362 357 Option<bool>,
363 358 Option<bool>,
364 359 ) {
365 360 if !self.any_tracked() {
366 361 // TODO: return an Option instead?
367 362 panic!("Accessing v1_state of an untracked DirstateEntry")
368 363 }
369 364 let wdir_tracked = self.flags.contains(Flags::WDIR_TRACKED);
370 365 let p1_tracked = self.flags.contains(Flags::P1_TRACKED);
371 366 let p2_info = self.flags.contains(Flags::P2_INFO);
372 367 let mode_size = self.mode_size;
373 368 let mtime = self.mtime;
374 369 (
375 370 wdir_tracked,
376 371 p1_tracked,
377 372 p2_info,
378 373 mode_size,
379 374 mtime,
380 375 self.get_fallback_exec(),
381 376 self.get_fallback_symlink(),
382 377 )
383 378 }
384 379
385 380 fn v1_state(&self) -> EntryState {
386 381 if !self.any_tracked() {
387 382 // TODO: return an Option instead?
388 383 panic!("Accessing v1_state of an untracked DirstateEntry")
389 384 }
390 385 if self.removed() {
391 386 EntryState::Removed
392 387 } else if self
393 388 .flags
394 389 .contains(Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO)
395 390 {
396 391 EntryState::Merged
397 392 } else if self.added() {
398 393 EntryState::Added
399 394 } else {
400 395 EntryState::Normal
401 396 }
402 397 }
403 398
404 399 fn v1_mode(&self) -> i32 {
405 400 if let Some((mode, _size)) = self.mode_size {
406 401 i32::try_from(mode).unwrap()
407 402 } else {
408 403 0
409 404 }
410 405 }
411 406
412 407 fn v1_size(&self) -> i32 {
413 408 if !self.any_tracked() {
414 409 // TODO: return an Option instead?
415 410 panic!("Accessing v1_size of an untracked DirstateEntry")
416 411 }
417 412 if self.removed()
418 413 && self.flags.contains(Flags::P1_TRACKED | Flags::P2_INFO)
419 414 {
420 415 SIZE_NON_NORMAL
421 416 } else if self.flags.contains(Flags::P2_INFO) {
422 417 SIZE_FROM_OTHER_PARENT
423 418 } else if self.removed() {
424 419 0
425 420 } else if self.added() {
426 421 SIZE_NON_NORMAL
427 422 } else if let Some((_mode, size)) = self.mode_size {
428 423 i32::try_from(size).unwrap()
429 424 } else {
430 425 SIZE_NON_NORMAL
431 426 }
432 427 }
433 428
434 429 fn v1_mtime(&self) -> i32 {
435 430 if !self.any_tracked() {
436 431 // TODO: return an Option instead?
437 432 panic!("Accessing v1_mtime of an untracked DirstateEntry")
438 433 }
439 434 if self.removed() {
440 435 0
441 436 } else if self.flags.contains(Flags::P2_INFO) {
442 437 MTIME_UNSET
443 438 } else if !self.flags.contains(Flags::P1_TRACKED) {
444 439 MTIME_UNSET
445 440 } else if let Some(mtime) = self.mtime {
446 441 i32::try_from(mtime.truncated_seconds()).unwrap()
447 442 } else {
448 443 MTIME_UNSET
449 444 }
450 445 }
451 446
452 447 // TODO: return `Option<EntryState>`? None when `!self.any_tracked`
453 448 pub fn state(&self) -> EntryState {
454 449 self.v1_state()
455 450 }
456 451
457 452 // TODO: return Option?
458 453 pub fn mode(&self) -> i32 {
459 454 self.v1_mode()
460 455 }
461 456
462 457 // TODO: return Option?
463 458 pub fn size(&self) -> i32 {
464 459 self.v1_size()
465 460 }
466 461
467 462 // TODO: return Option?
468 463 pub fn mtime(&self) -> i32 {
469 464 self.v1_mtime()
470 465 }
471 466
472 467 pub fn get_fallback_exec(&self) -> Option<bool> {
473 468 if self.flags.contains(Flags::HAS_FALLBACK_EXEC) {
474 469 Some(self.flags.contains(Flags::FALLBACK_EXEC))
475 470 } else {
476 471 None
477 472 }
478 473 }
479 474
480 475 pub fn set_fallback_exec(&mut self, value: Option<bool>) {
481 476 match value {
482 477 None => {
483 478 self.flags.remove(Flags::HAS_FALLBACK_EXEC);
484 479 self.flags.remove(Flags::FALLBACK_EXEC);
485 480 }
486 481 Some(exec) => {
487 482 self.flags.insert(Flags::HAS_FALLBACK_EXEC);
488 483 if exec {
489 484 self.flags.insert(Flags::FALLBACK_EXEC);
490 485 }
491 486 }
492 487 }
493 488 }
494 489
495 490 pub fn get_fallback_symlink(&self) -> Option<bool> {
496 491 if self.flags.contains(Flags::HAS_FALLBACK_SYMLINK) {
497 492 Some(self.flags.contains(Flags::FALLBACK_SYMLINK))
498 493 } else {
499 494 None
500 495 }
501 496 }
502 497
503 498 pub fn set_fallback_symlink(&mut self, value: Option<bool>) {
504 499 match value {
505 500 None => {
506 501 self.flags.remove(Flags::HAS_FALLBACK_SYMLINK);
507 502 self.flags.remove(Flags::FALLBACK_SYMLINK);
508 503 }
509 504 Some(symlink) => {
510 505 self.flags.insert(Flags::HAS_FALLBACK_SYMLINK);
511 506 if symlink {
512 507 self.flags.insert(Flags::FALLBACK_SYMLINK);
513 508 }
514 509 }
515 510 }
516 511 }
517 512
518 513 pub fn truncated_mtime(&self) -> Option<TruncatedTimestamp> {
519 514 self.mtime
520 515 }
521 516
522 517 pub fn drop_merge_data(&mut self) {
523 518 if self.flags.contains(Flags::P2_INFO) {
524 519 self.flags.remove(Flags::P2_INFO);
525 520 self.mode_size = None;
526 521 self.mtime = None;
527 522 }
528 523 }
529 524
530 525 pub fn set_possibly_dirty(&mut self) {
531 526 self.mtime = None
532 527 }
533 528
534 529 pub fn set_clean(
535 530 &mut self,
536 531 mode: u32,
537 532 size: u32,
538 533 mtime: TruncatedTimestamp,
539 534 ) {
540 535 let size = size & RANGE_MASK_31BIT;
541 536 self.flags.insert(Flags::WDIR_TRACKED | Flags::P1_TRACKED);
542 537 self.mode_size = Some((mode, size));
543 538 self.mtime = Some(mtime);
544 539 }
545 540
546 541 pub fn set_tracked(&mut self) {
547 542 self.flags.insert(Flags::WDIR_TRACKED);
548 543 // `set_tracked` is replacing various `normallookup` call. So we mark
549 544 // the files as needing lookup
550 545 //
551 546 // Consider dropping this in the future in favor of something less
552 547 // broad.
553 548 self.mtime = None;
554 549 }
555 550
556 551 pub fn set_untracked(&mut self) {
557 552 self.flags.remove(Flags::WDIR_TRACKED);
558 553 self.mode_size = None;
559 554 self.mtime = None;
560 555 }
561 556
562 557 /// Returns `(state, mode, size, mtime)` for the puprose of serialization
563 558 /// in the dirstate-v1 format.
564 559 ///
565 560 /// This includes marker values such as `mtime == -1`. In the future we may
566 561 /// want to not represent these cases that way in memory, but serialization
567 562 /// will need to keep the same format.
568 563 pub fn v1_data(&self) -> (u8, i32, i32, i32) {
569 564 (
570 565 self.v1_state().into(),
571 566 self.v1_mode(),
572 567 self.v1_size(),
573 568 self.v1_mtime(),
574 569 )
575 570 }
576 571
577 572 pub(crate) fn is_from_other_parent(&self) -> bool {
578 573 self.state() == EntryState::Normal
579 574 && self.size() == SIZE_FROM_OTHER_PARENT
580 575 }
581 576
582 577 // TODO: other platforms
583 578 #[cfg(unix)]
584 579 pub fn mode_changed(
585 580 &self,
586 581 filesystem_metadata: &std::fs::Metadata,
587 582 ) -> bool {
588 583 use std::os::unix::fs::MetadataExt;
589 584 const EXEC_BIT_MASK: u32 = 0o100;
590 585 let dirstate_exec_bit = (self.mode() as u32) & EXEC_BIT_MASK;
591 586 let fs_exec_bit = filesystem_metadata.mode() & EXEC_BIT_MASK;
592 587 dirstate_exec_bit != fs_exec_bit
593 588 }
594 589
595 590 /// Returns a `(state, mode, size, mtime)` tuple as for
596 591 /// `DirstateMapMethods::debug_iter`.
597 592 pub fn debug_tuple(&self) -> (u8, i32, i32, i32) {
598 593 (self.state().into(), self.mode(), self.size(), self.mtime())
599 594 }
600 595
601 596 /// True if the stored mtime would be ambiguous with the current time
602 597 pub fn need_delay(&self, now: TruncatedTimestamp) -> bool {
603 598 if let Some(mtime) = self.mtime {
604 599 self.state() == EntryState::Normal
605 600 && mtime.truncated_seconds() == now.truncated_seconds()
606 601 } else {
607 602 false
608 603 }
609 604 }
610 605 }
611 606
612 607 impl EntryState {
613 608 pub fn is_tracked(self) -> bool {
614 609 use EntryState::*;
615 610 match self {
616 611 Normal | Added | Merged => true,
617 612 Removed => false,
618 613 }
619 614 }
620 615 }
621 616
622 617 impl TryFrom<u8> for EntryState {
623 618 type Error = HgError;
624 619
625 620 fn try_from(value: u8) -> Result<Self, Self::Error> {
626 621 match value {
627 622 b'n' => Ok(EntryState::Normal),
628 623 b'a' => Ok(EntryState::Added),
629 624 b'r' => Ok(EntryState::Removed),
630 625 b'm' => Ok(EntryState::Merged),
631 626 _ => Err(HgError::CorruptedRepository(format!(
632 627 "Incorrect dirstate entry state {}",
633 628 value
634 629 ))),
635 630 }
636 631 }
637 632 }
638 633
639 634 impl Into<u8> for EntryState {
640 635 fn into(self) -> u8 {
641 636 match self {
642 637 EntryState::Normal => b'n',
643 638 EntryState::Added => b'a',
644 639 EntryState::Removed => b'r',
645 640 EntryState::Merged => b'm',
646 641 }
647 642 }
648 643 }
@@ -1,782 +1,774
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
4 4
5 5 use crate::dirstate::TruncatedTimestamp;
6 6 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
7 7 use crate::dirstate_tree::path_with_basename::WithBasename;
8 8 use crate::errors::HgError;
9 9 use crate::utils::hg_path::HgPath;
10 10 use crate::DirstateEntry;
11 11 use crate::DirstateError;
12 12 use crate::DirstateParents;
13 13 use bitflags::bitflags;
14 14 use bytes_cast::unaligned::{U16Be, U32Be};
15 15 use bytes_cast::BytesCast;
16 16 use format_bytes::format_bytes;
17 17 use std::borrow::Cow;
18 18 use std::convert::{TryFrom, TryInto};
19 19
20 20 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
21 21 /// This a redundant sanity check more than an actual "magic number" since
22 22 /// `.hg/requires` already governs which format should be used.
23 23 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
24 24
25 25 /// Keep space for 256-bit hashes
26 26 const STORED_NODE_ID_BYTES: usize = 32;
27 27
28 28 /// … even though only 160 bits are used for now, with SHA-1
29 29 const USED_NODE_ID_BYTES: usize = 20;
30 30
31 31 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
32 32 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
33 33
34 34 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
35 35 const TREE_METADATA_SIZE: usize = 44;
36 36 const NODE_SIZE: usize = 44;
37 37
38 38 /// Make sure that size-affecting changes are made knowingly
39 39 #[allow(unused)]
40 40 fn static_assert_size_of() {
41 41 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
42 42 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
43 43 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
44 44 }
45 45
46 46 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
47 47 #[derive(BytesCast)]
48 48 #[repr(C)]
49 49 struct DocketHeader {
50 50 marker: [u8; V2_FORMAT_MARKER.len()],
51 51 parent_1: [u8; STORED_NODE_ID_BYTES],
52 52 parent_2: [u8; STORED_NODE_ID_BYTES],
53 53
54 54 metadata: TreeMetadata,
55 55
56 56 /// Counted in bytes
57 57 data_size: Size,
58 58
59 59 uuid_size: u8,
60 60 }
61 61
62 62 pub struct Docket<'on_disk> {
63 63 header: &'on_disk DocketHeader,
64 64 uuid: &'on_disk [u8],
65 65 }
66 66
67 67 /// Fields are documented in the *Tree metadata in the docket file*
68 68 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
69 69 #[derive(BytesCast)]
70 70 #[repr(C)]
71 71 struct TreeMetadata {
72 72 root_nodes: ChildNodes,
73 73 nodes_with_entry_count: Size,
74 74 nodes_with_copy_source_count: Size,
75 75 unreachable_bytes: Size,
76 76 unused: [u8; 4],
77 77
78 78 /// See *Optional hash of ignore patterns* section of
79 79 /// `mercurial/helptext/internals/dirstate-v2.txt`
80 80 ignore_patterns_hash: IgnorePatternsHash,
81 81 }
82 82
83 83 /// Fields are documented in the *The data file format*
84 84 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
85 85 #[derive(BytesCast)]
86 86 #[repr(C)]
87 87 pub(super) struct Node {
88 88 full_path: PathSlice,
89 89
90 90 /// In bytes from `self.full_path.start`
91 91 base_name_start: PathSize,
92 92
93 93 copy_source: OptPathSlice,
94 94 children: ChildNodes,
95 95 pub(super) descendants_with_entry_count: Size,
96 96 pub(super) tracked_descendants_count: Size,
97 97 flags: U16Be,
98 98 size: U32Be,
99 99 mtime: PackedTruncatedTimestamp,
100 100 }
101 101
102 102 bitflags! {
103 103 #[repr(C)]
104 104 struct Flags: u16 {
105 105 const WDIR_TRACKED = 1 << 0;
106 106 const P1_TRACKED = 1 << 1;
107 107 const P2_INFO = 1 << 2;
108 108 const HAS_MODE_AND_SIZE = 1 << 3;
109 109 const HAS_FILE_MTIME = 1 << 4;
110 110 const HAS_DIRECTORY_MTIME = 1 << 5;
111 111 const MODE_EXEC_PERM = 1 << 6;
112 112 const MODE_IS_SYMLINK = 1 << 7;
113 113 const EXPECTED_STATE_IS_MODIFIED = 1 << 8;
114 114 const ALL_UNKNOWN_RECORDED = 1 << 9;
115 115 const ALL_IGNORED_RECORDED = 1 << 10;
116 116 const HAS_FALLBACK_EXEC = 1 << 11;
117 117 const FALLBACK_EXEC = 1 << 12;
118 118 const HAS_FALLBACK_SYMLINK = 1 << 13;
119 119 const FALLBACK_SYMLINK = 1 << 14;
120 120 const MTIME_SECOND_AMBIGUOUS = 1 << 15;
121 121 }
122 122 }
123 123
124 124 /// Duration since the Unix epoch
125 125 #[derive(BytesCast, Copy, Clone)]
126 126 #[repr(C)]
127 127 struct PackedTruncatedTimestamp {
128 128 truncated_seconds: U32Be,
129 129 nanoseconds: U32Be,
130 130 }
131 131
132 132 /// Counted in bytes from the start of the file
133 133 ///
134 134 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
135 135 type Offset = U32Be;
136 136
137 137 /// Counted in number of items
138 138 ///
139 139 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
140 140 type Size = U32Be;
141 141
142 142 /// Counted in bytes
143 143 ///
144 144 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
145 145 type PathSize = U16Be;
146 146
147 147 /// A contiguous sequence of `len` times `Node`, representing the child nodes
148 148 /// of either some other node or of the repository root.
149 149 ///
150 150 /// Always sorted by ascending `full_path`, to allow binary search.
151 151 /// Since nodes with the same parent nodes also have the same parent path,
152 152 /// only the `base_name`s need to be compared during binary search.
153 153 #[derive(BytesCast, Copy, Clone)]
154 154 #[repr(C)]
155 155 struct ChildNodes {
156 156 start: Offset,
157 157 len: Size,
158 158 }
159 159
160 160 /// A `HgPath` of `len` bytes
161 161 #[derive(BytesCast, Copy, Clone)]
162 162 #[repr(C)]
163 163 struct PathSlice {
164 164 start: Offset,
165 165 len: PathSize,
166 166 }
167 167
168 168 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
169 169 type OptPathSlice = PathSlice;
170 170
171 171 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
172 172 ///
173 173 /// This should only happen if Mercurial is buggy or a repository is corrupted.
174 174 #[derive(Debug)]
175 175 pub struct DirstateV2ParseError;
176 176
177 177 impl From<DirstateV2ParseError> for HgError {
178 178 fn from(_: DirstateV2ParseError) -> Self {
179 179 HgError::corrupted("dirstate-v2 parse error")
180 180 }
181 181 }
182 182
183 183 impl From<DirstateV2ParseError> for crate::DirstateError {
184 184 fn from(error: DirstateV2ParseError) -> Self {
185 185 HgError::from(error).into()
186 186 }
187 187 }
188 188
189 189 impl<'on_disk> Docket<'on_disk> {
190 190 pub fn parents(&self) -> DirstateParents {
191 191 use crate::Node;
192 192 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
193 193 .unwrap()
194 194 .clone();
195 195 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
196 196 .unwrap()
197 197 .clone();
198 198 DirstateParents { p1, p2 }
199 199 }
200 200
201 201 pub fn tree_metadata(&self) -> &[u8] {
202 202 self.header.metadata.as_bytes()
203 203 }
204 204
205 205 pub fn data_size(&self) -> usize {
206 206 // This `unwrap` could only panic on a 16-bit CPU
207 207 self.header.data_size.get().try_into().unwrap()
208 208 }
209 209
210 210 pub fn data_filename(&self) -> String {
211 211 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
212 212 }
213 213 }
214 214
215 215 pub fn read_docket(
216 216 on_disk: &[u8],
217 217 ) -> Result<Docket<'_>, DirstateV2ParseError> {
218 218 let (header, uuid) =
219 219 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
220 220 let uuid_size = header.uuid_size as usize;
221 221 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
222 222 Ok(Docket { header, uuid })
223 223 } else {
224 224 Err(DirstateV2ParseError)
225 225 }
226 226 }
227 227
228 228 pub(super) fn read<'on_disk>(
229 229 on_disk: &'on_disk [u8],
230 230 metadata: &[u8],
231 231 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
232 232 if on_disk.is_empty() {
233 233 return Ok(DirstateMap::empty(on_disk));
234 234 }
235 235 let (meta, _) = TreeMetadata::from_bytes(metadata)
236 236 .map_err(|_| DirstateV2ParseError)?;
237 237 let dirstate_map = DirstateMap {
238 238 on_disk,
239 239 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
240 240 on_disk,
241 241 meta.root_nodes,
242 242 )?),
243 243 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
244 244 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
245 245 ignore_patterns_hash: meta.ignore_patterns_hash,
246 246 unreachable_bytes: meta.unreachable_bytes.get(),
247 247 };
248 248 Ok(dirstate_map)
249 249 }
250 250
251 251 impl Node {
252 252 pub(super) fn full_path<'on_disk>(
253 253 &self,
254 254 on_disk: &'on_disk [u8],
255 255 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
256 256 read_hg_path(on_disk, self.full_path)
257 257 }
258 258
259 259 pub(super) fn base_name_start<'on_disk>(
260 260 &self,
261 261 ) -> Result<usize, DirstateV2ParseError> {
262 262 let start = self.base_name_start.get();
263 263 if start < self.full_path.len.get() {
264 264 let start = usize::try_from(start)
265 265 // u32 -> usize, could only panic on a 16-bit CPU
266 266 .expect("dirstate-v2 base_name_start out of bounds");
267 267 Ok(start)
268 268 } else {
269 269 Err(DirstateV2ParseError)
270 270 }
271 271 }
272 272
273 273 pub(super) fn base_name<'on_disk>(
274 274 &self,
275 275 on_disk: &'on_disk [u8],
276 276 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
277 277 let full_path = self.full_path(on_disk)?;
278 278 let base_name_start = self.base_name_start()?;
279 279 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
280 280 }
281 281
282 282 pub(super) fn path<'on_disk>(
283 283 &self,
284 284 on_disk: &'on_disk [u8],
285 285 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
286 286 Ok(WithBasename::from_raw_parts(
287 287 Cow::Borrowed(self.full_path(on_disk)?),
288 288 self.base_name_start()?,
289 289 ))
290 290 }
291 291
292 292 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
293 293 self.copy_source.start.get() != 0
294 294 }
295 295
296 296 pub(super) fn copy_source<'on_disk>(
297 297 &self,
298 298 on_disk: &'on_disk [u8],
299 299 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
300 300 Ok(if self.has_copy_source() {
301 301 Some(read_hg_path(on_disk, self.copy_source)?)
302 302 } else {
303 303 None
304 304 })
305 305 }
306 306
307 307 fn flags(&self) -> Flags {
308 308 Flags::from_bits_truncate(self.flags.get())
309 309 }
310 310
311 311 fn has_entry(&self) -> bool {
312 312 self.flags().intersects(
313 313 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
314 314 )
315 315 }
316 316
317 317 pub(super) fn node_data(
318 318 &self,
319 319 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
320 320 if self.has_entry() {
321 321 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
322 322 } else if let Some(mtime) = self.cached_directory_mtime()? {
323 323 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
324 324 } else {
325 325 Ok(dirstate_map::NodeData::None)
326 326 }
327 327 }
328 328
329 329 pub(super) fn cached_directory_mtime(
330 330 &self,
331 331 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
332 332 // For now we do not have code to handle ALL_UNKNOWN_RECORDED, so we
333 333 // ignore the mtime if the flag is set.
334 334 if self.flags().contains(Flags::HAS_DIRECTORY_MTIME)
335 335 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
336 336 {
337 337 if self.flags().contains(Flags::HAS_FILE_MTIME) {
338 338 Err(DirstateV2ParseError)
339 339 } else {
340 340 Ok(Some(self.mtime.try_into()?))
341 341 }
342 342 } else {
343 343 Ok(None)
344 344 }
345 345 }
346 346
347 347 fn synthesize_unix_mode(&self) -> u32 {
348 348 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
349 349 libc::S_IFLNK
350 350 } else {
351 351 libc::S_IFREG
352 352 };
353 353 let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
354 354 0o755
355 355 } else {
356 356 0o644
357 357 };
358 358 file_type | permisions
359 359 }
360 360
361 361 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
362 362 // TODO: convert through raw bits instead?
363 363 let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED);
364 364 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
365 365 let p2_info = self.flags().contains(Flags::P2_INFO);
366 366 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
367 367 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
368 368 {
369 369 Some((self.synthesize_unix_mode(), self.size.into()))
370 370 } else {
371 371 None
372 372 };
373 373 let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME)
374 374 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
375 375 // The current code is not able to do the more subtle comparison that the
376 376 // MTIME_SECOND_AMBIGUOUS requires. So we ignore the mtime
377 377 && !self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS)
378 378 {
379 // TODO: replace this by `self.mtime.try_into()?` to use
380 // sub-second precision from the file.
381 // We don’t do this yet because other parts of the code
382 // always set it to zero.
383 let mtime = TruncatedTimestamp::from_already_truncated(
384 self.mtime.truncated_seconds.get(),
385 0,
386 )?;
387 Some(mtime)
379 Some(self.mtime.try_into()?)
388 380 } else {
389 381 None
390 382 };
391 383 Ok(DirstateEntry::from_v2_data(
392 384 wdir_tracked,
393 385 p1_tracked,
394 386 p2_info,
395 387 mode_size,
396 388 mtime,
397 389 None,
398 390 None,
399 391 ))
400 392 }
401 393
402 394 pub(super) fn entry(
403 395 &self,
404 396 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
405 397 if self.has_entry() {
406 398 Ok(Some(self.assume_entry()?))
407 399 } else {
408 400 Ok(None)
409 401 }
410 402 }
411 403
412 404 pub(super) fn children<'on_disk>(
413 405 &self,
414 406 on_disk: &'on_disk [u8],
415 407 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
416 408 read_nodes(on_disk, self.children)
417 409 }
418 410
419 411 pub(super) fn to_in_memory_node<'on_disk>(
420 412 &self,
421 413 on_disk: &'on_disk [u8],
422 414 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
423 415 Ok(dirstate_map::Node {
424 416 children: dirstate_map::ChildNodes::OnDisk(
425 417 self.children(on_disk)?,
426 418 ),
427 419 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
428 420 data: self.node_data()?,
429 421 descendants_with_entry_count: self
430 422 .descendants_with_entry_count
431 423 .get(),
432 424 tracked_descendants_count: self.tracked_descendants_count.get(),
433 425 })
434 426 }
435 427
436 428 fn from_dirstate_entry(
437 429 entry: &DirstateEntry,
438 430 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
439 431 let (
440 432 wdir_tracked,
441 433 p1_tracked,
442 434 p2_info,
443 435 mode_size_opt,
444 436 mtime_opt,
445 437 fallback_exec,
446 438 fallback_symlink,
447 439 ) = entry.v2_data();
448 440 // TODO: convert throug raw flag bits instead?
449 441 let mut flags = Flags::empty();
450 442 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
451 443 flags.set(Flags::P1_TRACKED, p1_tracked);
452 444 flags.set(Flags::P2_INFO, p2_info);
453 445 let size = if let Some((m, s)) = mode_size_opt {
454 446 let exec_perm = m & libc::S_IXUSR != 0;
455 447 let is_symlink = m & libc::S_IFMT == libc::S_IFLNK;
456 448 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
457 449 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
458 450 flags.insert(Flags::HAS_MODE_AND_SIZE);
459 451 s.into()
460 452 } else {
461 453 0.into()
462 454 };
463 455 let mtime = if let Some(m) = mtime_opt {
464 456 flags.insert(Flags::HAS_FILE_MTIME);
465 457 m.into()
466 458 } else {
467 459 PackedTruncatedTimestamp::null()
468 460 };
469 461 if let Some(f_exec) = fallback_exec {
470 462 flags.insert(Flags::HAS_FALLBACK_EXEC);
471 463 if f_exec {
472 464 flags.insert(Flags::FALLBACK_EXEC);
473 465 }
474 466 }
475 467 if let Some(f_symlink) = fallback_symlink {
476 468 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
477 469 if f_symlink {
478 470 flags.insert(Flags::FALLBACK_SYMLINK);
479 471 }
480 472 }
481 473 (flags, size, mtime)
482 474 }
483 475 }
484 476
485 477 fn read_hg_path(
486 478 on_disk: &[u8],
487 479 slice: PathSlice,
488 480 ) -> Result<&HgPath, DirstateV2ParseError> {
489 481 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
490 482 }
491 483
492 484 fn read_nodes(
493 485 on_disk: &[u8],
494 486 slice: ChildNodes,
495 487 ) -> Result<&[Node], DirstateV2ParseError> {
496 488 read_slice(on_disk, slice.start, slice.len.get())
497 489 }
498 490
499 491 fn read_slice<T, Len>(
500 492 on_disk: &[u8],
501 493 start: Offset,
502 494 len: Len,
503 495 ) -> Result<&[T], DirstateV2ParseError>
504 496 where
505 497 T: BytesCast,
506 498 Len: TryInto<usize>,
507 499 {
508 500 // Either `usize::MAX` would result in "out of bounds" error since a single
509 501 // `&[u8]` cannot occupy the entire addess space.
510 502 let start = start.get().try_into().unwrap_or(std::usize::MAX);
511 503 let len = len.try_into().unwrap_or(std::usize::MAX);
512 504 on_disk
513 505 .get(start..)
514 506 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
515 507 .map(|(slice, _rest)| slice)
516 508 .ok_or_else(|| DirstateV2ParseError)
517 509 }
518 510
519 511 pub(crate) fn for_each_tracked_path<'on_disk>(
520 512 on_disk: &'on_disk [u8],
521 513 metadata: &[u8],
522 514 mut f: impl FnMut(&'on_disk HgPath),
523 515 ) -> Result<(), DirstateV2ParseError> {
524 516 let (meta, _) = TreeMetadata::from_bytes(metadata)
525 517 .map_err(|_| DirstateV2ParseError)?;
526 518 fn recur<'on_disk>(
527 519 on_disk: &'on_disk [u8],
528 520 nodes: ChildNodes,
529 521 f: &mut impl FnMut(&'on_disk HgPath),
530 522 ) -> Result<(), DirstateV2ParseError> {
531 523 for node in read_nodes(on_disk, nodes)? {
532 524 if let Some(entry) = node.entry()? {
533 525 if entry.state().is_tracked() {
534 526 f(node.full_path(on_disk)?)
535 527 }
536 528 }
537 529 recur(on_disk, node.children, f)?
538 530 }
539 531 Ok(())
540 532 }
541 533 recur(on_disk, meta.root_nodes, &mut f)
542 534 }
543 535
544 536 /// Returns new data and metadata, together with whether that data should be
545 537 /// appended to the existing data file whose content is at
546 538 /// `dirstate_map.on_disk` (true), instead of written to a new data file
547 539 /// (false).
548 540 pub(super) fn write(
549 541 dirstate_map: &mut DirstateMap,
550 542 can_append: bool,
551 543 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
552 544 let append = can_append && dirstate_map.write_should_append();
553 545
554 546 // This ignores the space for paths, and for nodes without an entry.
555 547 // TODO: better estimate? Skip the `Vec` and write to a file directly?
556 548 let size_guess = std::mem::size_of::<Node>()
557 549 * dirstate_map.nodes_with_entry_count as usize;
558 550
559 551 let mut writer = Writer {
560 552 dirstate_map,
561 553 append,
562 554 out: Vec::with_capacity(size_guess),
563 555 };
564 556
565 557 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
566 558
567 559 let meta = TreeMetadata {
568 560 root_nodes,
569 561 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
570 562 nodes_with_copy_source_count: dirstate_map
571 563 .nodes_with_copy_source_count
572 564 .into(),
573 565 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
574 566 unused: [0; 4],
575 567 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
576 568 };
577 569 Ok((writer.out, meta.as_bytes().to_vec(), append))
578 570 }
579 571
580 572 struct Writer<'dmap, 'on_disk> {
581 573 dirstate_map: &'dmap DirstateMap<'on_disk>,
582 574 append: bool,
583 575 out: Vec<u8>,
584 576 }
585 577
586 578 impl Writer<'_, '_> {
587 579 fn write_nodes(
588 580 &mut self,
589 581 nodes: dirstate_map::ChildNodesRef,
590 582 ) -> Result<ChildNodes, DirstateError> {
591 583 // Reuse already-written nodes if possible
592 584 if self.append {
593 585 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
594 586 let start = self.on_disk_offset_of(nodes_slice).expect(
595 587 "dirstate-v2 OnDisk nodes not found within on_disk",
596 588 );
597 589 let len = child_nodes_len_from_usize(nodes_slice.len());
598 590 return Ok(ChildNodes { start, len });
599 591 }
600 592 }
601 593
602 594 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
603 595 // undefined iteration order. Sort to enable binary search in the
604 596 // written file.
605 597 let nodes = nodes.sorted();
606 598 let nodes_len = nodes.len();
607 599
608 600 // First accumulate serialized nodes in a `Vec`
609 601 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
610 602 for node in nodes {
611 603 let children =
612 604 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
613 605 let full_path = node.full_path(self.dirstate_map.on_disk)?;
614 606 let full_path = self.write_path(full_path.as_bytes());
615 607 let copy_source = if let Some(source) =
616 608 node.copy_source(self.dirstate_map.on_disk)?
617 609 {
618 610 self.write_path(source.as_bytes())
619 611 } else {
620 612 PathSlice {
621 613 start: 0.into(),
622 614 len: 0.into(),
623 615 }
624 616 };
625 617 on_disk_nodes.push(match node {
626 618 NodeRef::InMemory(path, node) => {
627 619 let (flags, size, mtime) = match &node.data {
628 620 dirstate_map::NodeData::Entry(entry) => {
629 621 Node::from_dirstate_entry(entry)
630 622 }
631 623 dirstate_map::NodeData::CachedDirectory { mtime } => (
632 624 // we currently never set a mtime if unknown file
633 625 // are present.
634 626 // So if we have a mtime for a directory, we know
635 627 // they are no unknown
636 628 // files and we
637 629 // blindly set ALL_UNKNOWN_RECORDED.
638 630 //
639 631 // We never set ALL_IGNORED_RECORDED since we
640 632 // don't track that case
641 633 // currently.
642 634 Flags::HAS_DIRECTORY_MTIME
643 635 | Flags::ALL_UNKNOWN_RECORDED,
644 636 0.into(),
645 637 (*mtime).into(),
646 638 ),
647 639 dirstate_map::NodeData::None => (
648 640 Flags::empty(),
649 641 0.into(),
650 642 PackedTruncatedTimestamp::null(),
651 643 ),
652 644 };
653 645 Node {
654 646 children,
655 647 copy_source,
656 648 full_path,
657 649 base_name_start: u16::try_from(path.base_name_start())
658 650 // Could only panic for paths over 64 KiB
659 651 .expect("dirstate-v2 path length overflow")
660 652 .into(),
661 653 descendants_with_entry_count: node
662 654 .descendants_with_entry_count
663 655 .into(),
664 656 tracked_descendants_count: node
665 657 .tracked_descendants_count
666 658 .into(),
667 659 flags: flags.bits().into(),
668 660 size,
669 661 mtime,
670 662 }
671 663 }
672 664 NodeRef::OnDisk(node) => Node {
673 665 children,
674 666 copy_source,
675 667 full_path,
676 668 ..*node
677 669 },
678 670 })
679 671 }
680 672 // … so we can write them contiguously, after writing everything else
681 673 // they refer to.
682 674 let start = self.current_offset();
683 675 let len = child_nodes_len_from_usize(nodes_len);
684 676 self.out.extend(on_disk_nodes.as_bytes());
685 677 Ok(ChildNodes { start, len })
686 678 }
687 679
688 680 /// If the given slice of items is within `on_disk`, returns its offset
689 681 /// from the start of `on_disk`.
690 682 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
691 683 where
692 684 T: BytesCast,
693 685 {
694 686 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
695 687 let start = slice.as_ptr() as usize;
696 688 let end = start + slice.len();
697 689 start..=end
698 690 }
699 691 let slice_addresses = address_range(slice.as_bytes());
700 692 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
701 693 if on_disk_addresses.contains(slice_addresses.start())
702 694 && on_disk_addresses.contains(slice_addresses.end())
703 695 {
704 696 let offset = slice_addresses.start() - on_disk_addresses.start();
705 697 Some(offset_from_usize(offset))
706 698 } else {
707 699 None
708 700 }
709 701 }
710 702
711 703 fn current_offset(&mut self) -> Offset {
712 704 let mut offset = self.out.len();
713 705 if self.append {
714 706 offset += self.dirstate_map.on_disk.len()
715 707 }
716 708 offset_from_usize(offset)
717 709 }
718 710
719 711 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
720 712 let len = path_len_from_usize(slice.len());
721 713 // Reuse an already-written path if possible
722 714 if self.append {
723 715 if let Some(start) = self.on_disk_offset_of(slice) {
724 716 return PathSlice { start, len };
725 717 }
726 718 }
727 719 let start = self.current_offset();
728 720 self.out.extend(slice.as_bytes());
729 721 PathSlice { start, len }
730 722 }
731 723 }
732 724
733 725 fn offset_from_usize(x: usize) -> Offset {
734 726 u32::try_from(x)
735 727 // Could only panic for a dirstate file larger than 4 GiB
736 728 .expect("dirstate-v2 offset overflow")
737 729 .into()
738 730 }
739 731
740 732 fn child_nodes_len_from_usize(x: usize) -> Size {
741 733 u32::try_from(x)
742 734 // Could only panic with over 4 billion nodes
743 735 .expect("dirstate-v2 slice length overflow")
744 736 .into()
745 737 }
746 738
747 739 fn path_len_from_usize(x: usize) -> PathSize {
748 740 u16::try_from(x)
749 741 // Could only panic for paths over 64 KiB
750 742 .expect("dirstate-v2 path length overflow")
751 743 .into()
752 744 }
753 745
754 746 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
755 747 fn from(timestamp: TruncatedTimestamp) -> Self {
756 748 Self {
757 749 truncated_seconds: timestamp.truncated_seconds().into(),
758 750 nanoseconds: timestamp.nanoseconds().into(),
759 751 }
760 752 }
761 753 }
762 754
763 755 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
764 756 type Error = DirstateV2ParseError;
765 757
766 758 fn try_from(
767 759 timestamp: PackedTruncatedTimestamp,
768 760 ) -> Result<Self, Self::Error> {
769 761 Self::from_already_truncated(
770 762 timestamp.truncated_seconds.get(),
771 763 timestamp.nanoseconds.get(),
772 764 )
773 765 }
774 766 }
775 767 impl PackedTruncatedTimestamp {
776 768 fn null() -> Self {
777 769 Self {
778 770 truncated_seconds: 0.into(),
779 771 nanoseconds: 0.into(),
780 772 }
781 773 }
782 774 }
@@ -1,758 +1,756
1 1 use crate::dirstate::entry::TruncatedTimestamp;
2 2 use crate::dirstate::status::IgnoreFnType;
3 3 use crate::dirstate_tree::dirstate_map::BorrowedPath;
4 4 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
5 5 use crate::dirstate_tree::dirstate_map::DirstateMap;
6 6 use crate::dirstate_tree::dirstate_map::NodeData;
7 7 use crate::dirstate_tree::dirstate_map::NodeRef;
8 8 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
9 9 use crate::matchers::get_ignore_function;
10 10 use crate::matchers::Matcher;
11 11 use crate::utils::files::get_bytes_from_os_string;
12 12 use crate::utils::files::get_path_from_bytes;
13 13 use crate::utils::hg_path::HgPath;
14 14 use crate::BadMatch;
15 15 use crate::DirstateStatus;
16 16 use crate::EntryState;
17 17 use crate::HgPathBuf;
18 18 use crate::PatternFileWarning;
19 19 use crate::StatusError;
20 20 use crate::StatusOptions;
21 21 use micro_timer::timed;
22 22 use rayon::prelude::*;
23 23 use sha1::{Digest, Sha1};
24 24 use std::borrow::Cow;
25 25 use std::io;
26 26 use std::path::Path;
27 27 use std::path::PathBuf;
28 28 use std::sync::Mutex;
29 29 use std::time::SystemTime;
30 30
31 31 /// Returns the status of the working directory compared to its parent
32 32 /// changeset.
33 33 ///
34 34 /// This algorithm is based on traversing the filesystem tree (`fs` in function
35 35 /// and variable names) and dirstate tree at the same time. The core of this
36 36 /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
37 37 /// and its use of `itertools::merge_join_by`. When reaching a path that only
38 38 /// exists in one of the two trees, depending on information requested by
39 39 /// `options` we may need to traverse the remaining subtree.
40 40 #[timed]
41 41 pub fn status<'tree, 'on_disk: 'tree>(
42 42 dmap: &'tree mut DirstateMap<'on_disk>,
43 43 matcher: &(dyn Matcher + Sync),
44 44 root_dir: PathBuf,
45 45 ignore_files: Vec<PathBuf>,
46 46 options: StatusOptions,
47 47 ) -> Result<(DirstateStatus<'on_disk>, Vec<PatternFileWarning>), StatusError> {
48 48 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
49 49 if options.list_ignored || options.list_unknown {
50 50 let mut hasher = Sha1::new();
51 51 let (ignore_fn, warnings) = get_ignore_function(
52 52 ignore_files,
53 53 &root_dir,
54 54 &mut |pattern_bytes| hasher.update(pattern_bytes),
55 55 )?;
56 56 let new_hash = *hasher.finalize().as_ref();
57 57 let changed = new_hash != dmap.ignore_patterns_hash;
58 58 dmap.ignore_patterns_hash = new_hash;
59 59 (ignore_fn, warnings, Some(changed))
60 60 } else {
61 61 (Box::new(|&_| true), vec![], None)
62 62 };
63 63
64 64 let common = StatusCommon {
65 65 dmap,
66 66 options,
67 67 matcher,
68 68 ignore_fn,
69 69 outcome: Default::default(),
70 70 ignore_patterns_have_changed: patterns_changed,
71 71 new_cachable_directories: Default::default(),
72 72 outated_cached_directories: Default::default(),
73 73 filesystem_time_at_status_start: filesystem_now(&root_dir).ok(),
74 74 };
75 75 let is_at_repo_root = true;
76 76 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
77 77 let has_ignored_ancestor = false;
78 78 let root_cached_mtime = None;
79 79 let root_dir_metadata = None;
80 80 // If the path we have for the repository root is a symlink, do follow it.
81 81 // (As opposed to symlinks within the working directory which are not
82 82 // followed, using `std::fs::symlink_metadata`.)
83 83 common.traverse_fs_directory_and_dirstate(
84 84 has_ignored_ancestor,
85 85 dmap.root.as_ref(),
86 86 hg_path,
87 87 &root_dir,
88 88 root_dir_metadata,
89 89 root_cached_mtime,
90 90 is_at_repo_root,
91 91 )?;
92 92 let mut outcome = common.outcome.into_inner().unwrap();
93 93 let new_cachable = common.new_cachable_directories.into_inner().unwrap();
94 94 let outdated = common.outated_cached_directories.into_inner().unwrap();
95 95
96 96 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
97 97 || !outdated.is_empty()
98 98 || !new_cachable.is_empty();
99 99
100 100 // Remove outdated mtimes before adding new mtimes, in case a given
101 101 // directory is both
102 102 for path in &outdated {
103 103 let node = dmap.get_or_insert(path)?;
104 104 if let NodeData::CachedDirectory { .. } = &node.data {
105 105 node.data = NodeData::None
106 106 }
107 107 }
108 108 for (path, mtime) in &new_cachable {
109 109 let node = dmap.get_or_insert(path)?;
110 110 match &node.data {
111 111 NodeData::Entry(_) => {} // Don’t overwrite an entry
112 112 NodeData::CachedDirectory { .. } | NodeData::None => {
113 113 node.data = NodeData::CachedDirectory { mtime: *mtime }
114 114 }
115 115 }
116 116 }
117 117
118 118 Ok((outcome, warnings))
119 119 }
120 120
121 121 /// Bag of random things needed by various parts of the algorithm. Reduces the
122 122 /// number of parameters passed to functions.
123 123 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
124 124 dmap: &'tree DirstateMap<'on_disk>,
125 125 options: StatusOptions,
126 126 matcher: &'a (dyn Matcher + Sync),
127 127 ignore_fn: IgnoreFnType<'a>,
128 128 outcome: Mutex<DirstateStatus<'on_disk>>,
129 129 new_cachable_directories:
130 130 Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
131 131 outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
132 132
133 133 /// Whether ignore files like `.hgignore` have changed since the previous
134 134 /// time a `status()` call wrote their hash to the dirstate. `None` means
135 135 /// we don’t know as this run doesn’t list either ignored or uknown files
136 136 /// and therefore isn’t reading `.hgignore`.
137 137 ignore_patterns_have_changed: Option<bool>,
138 138
139 139 /// The current time at the start of the `status()` algorithm, as measured
140 140 /// and possibly truncated by the filesystem.
141 141 filesystem_time_at_status_start: Option<SystemTime>,
142 142 }
143 143
144 144 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
145 145 fn read_dir(
146 146 &self,
147 147 hg_path: &HgPath,
148 148 fs_path: &Path,
149 149 is_at_repo_root: bool,
150 150 ) -> Result<Vec<DirEntry>, ()> {
151 151 DirEntry::read_dir(fs_path, is_at_repo_root)
152 152 .map_err(|error| self.io_error(error, hg_path))
153 153 }
154 154
155 155 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
156 156 let errno = error.raw_os_error().expect("expected real OS error");
157 157 self.outcome
158 158 .lock()
159 159 .unwrap()
160 160 .bad
161 161 .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
162 162 }
163 163
164 164 fn check_for_outdated_directory_cache(
165 165 &self,
166 166 dirstate_node: &NodeRef<'tree, 'on_disk>,
167 167 ) -> Result<(), DirstateV2ParseError> {
168 168 if self.ignore_patterns_have_changed == Some(true)
169 169 && dirstate_node.cached_directory_mtime()?.is_some()
170 170 {
171 171 self.outated_cached_directories.lock().unwrap().push(
172 172 dirstate_node
173 173 .full_path_borrowed(self.dmap.on_disk)?
174 174 .detach_from_tree(),
175 175 )
176 176 }
177 177 Ok(())
178 178 }
179 179
180 180 /// If this returns true, we can get accurate results by only using
181 181 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
182 182 /// need to call `read_dir`.
183 183 fn can_skip_fs_readdir(
184 184 &self,
185 185 directory_metadata: Option<&std::fs::Metadata>,
186 186 cached_directory_mtime: Option<TruncatedTimestamp>,
187 187 ) -> bool {
188 188 if !self.options.list_unknown && !self.options.list_ignored {
189 189 // All states that we care about listing have corresponding
190 190 // dirstate entries.
191 191 // This happens for example with `hg status -mard`.
192 192 return true;
193 193 }
194 194 if !self.options.list_ignored
195 195 && self.ignore_patterns_have_changed == Some(false)
196 196 {
197 197 if let Some(cached_mtime) = cached_directory_mtime {
198 198 // The dirstate contains a cached mtime for this directory, set
199 199 // by a previous run of the `status` algorithm which found this
200 200 // directory eligible for `read_dir` caching.
201 201 if let Some(meta) = directory_metadata {
202 202 if cached_mtime
203 203 .likely_equal_to_mtime_of(meta)
204 204 .unwrap_or(false)
205 205 {
206 206 // The mtime of that directory has not changed
207 207 // since then, which means that the results of
208 208 // `read_dir` should also be unchanged.
209 209 return true;
210 210 }
211 211 }
212 212 }
213 213 }
214 214 false
215 215 }
216 216
217 217 /// Returns whether all child entries of the filesystem directory have a
218 218 /// corresponding dirstate node or are ignored.
219 219 fn traverse_fs_directory_and_dirstate(
220 220 &self,
221 221 has_ignored_ancestor: bool,
222 222 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
223 223 directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
224 224 directory_fs_path: &Path,
225 225 directory_metadata: Option<&std::fs::Metadata>,
226 226 cached_directory_mtime: Option<TruncatedTimestamp>,
227 227 is_at_repo_root: bool,
228 228 ) -> Result<bool, DirstateV2ParseError> {
229 229 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
230 230 {
231 231 dirstate_nodes
232 232 .par_iter()
233 233 .map(|dirstate_node| {
234 234 let fs_path = directory_fs_path.join(get_path_from_bytes(
235 235 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
236 236 ));
237 237 match std::fs::symlink_metadata(&fs_path) {
238 238 Ok(fs_metadata) => self.traverse_fs_and_dirstate(
239 239 &fs_path,
240 240 &fs_metadata,
241 241 dirstate_node,
242 242 has_ignored_ancestor,
243 243 ),
244 244 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
245 245 self.traverse_dirstate_only(dirstate_node)
246 246 }
247 247 Err(error) => {
248 248 let hg_path =
249 249 dirstate_node.full_path(self.dmap.on_disk)?;
250 250 Ok(self.io_error(error, hg_path))
251 251 }
252 252 }
253 253 })
254 254 .collect::<Result<_, _>>()?;
255 255
256 256 // We don’t know, so conservatively say this isn’t the case
257 257 let children_all_have_dirstate_node_or_are_ignored = false;
258 258
259 259 return Ok(children_all_have_dirstate_node_or_are_ignored);
260 260 }
261 261
262 262 let mut fs_entries = if let Ok(entries) = self.read_dir(
263 263 directory_hg_path,
264 264 directory_fs_path,
265 265 is_at_repo_root,
266 266 ) {
267 267 entries
268 268 } else {
269 269 // Treat an unreadable directory (typically because of insufficient
270 270 // permissions) like an empty directory. `self.read_dir` has
271 271 // already called `self.io_error` so a warning will be emitted.
272 272 Vec::new()
273 273 };
274 274
275 275 // `merge_join_by` requires both its input iterators to be sorted:
276 276
277 277 let dirstate_nodes = dirstate_nodes.sorted();
278 278 // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
279 279 // https://github.com/rust-lang/rust/issues/34162
280 280 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
281 281
282 282 // Propagate here any error that would happen inside the comparison
283 283 // callback below
284 284 for dirstate_node in &dirstate_nodes {
285 285 dirstate_node.base_name(self.dmap.on_disk)?;
286 286 }
287 287 itertools::merge_join_by(
288 288 dirstate_nodes,
289 289 &fs_entries,
290 290 |dirstate_node, fs_entry| {
291 291 // This `unwrap` never panics because we already propagated
292 292 // those errors above
293 293 dirstate_node
294 294 .base_name(self.dmap.on_disk)
295 295 .unwrap()
296 296 .cmp(&fs_entry.base_name)
297 297 },
298 298 )
299 299 .par_bridge()
300 300 .map(|pair| {
301 301 use itertools::EitherOrBoth::*;
302 302 let has_dirstate_node_or_is_ignored;
303 303 match pair {
304 304 Both(dirstate_node, fs_entry) => {
305 305 self.traverse_fs_and_dirstate(
306 306 &fs_entry.full_path,
307 307 &fs_entry.metadata,
308 308 dirstate_node,
309 309 has_ignored_ancestor,
310 310 )?;
311 311 has_dirstate_node_or_is_ignored = true
312 312 }
313 313 Left(dirstate_node) => {
314 314 self.traverse_dirstate_only(dirstate_node)?;
315 315 has_dirstate_node_or_is_ignored = true;
316 316 }
317 317 Right(fs_entry) => {
318 318 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
319 319 has_ignored_ancestor,
320 320 directory_hg_path,
321 321 fs_entry,
322 322 )
323 323 }
324 324 }
325 325 Ok(has_dirstate_node_or_is_ignored)
326 326 })
327 327 .try_reduce(|| true, |a, b| Ok(a && b))
328 328 }
329 329
330 330 fn traverse_fs_and_dirstate(
331 331 &self,
332 332 fs_path: &Path,
333 333 fs_metadata: &std::fs::Metadata,
334 334 dirstate_node: NodeRef<'tree, 'on_disk>,
335 335 has_ignored_ancestor: bool,
336 336 ) -> Result<(), DirstateV2ParseError> {
337 337 self.check_for_outdated_directory_cache(&dirstate_node)?;
338 338 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
339 339 let file_type = fs_metadata.file_type();
340 340 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
341 341 if !file_or_symlink {
342 342 // If we previously had a file here, it was removed (with
343 343 // `hg rm` or similar) or deleted before it could be
344 344 // replaced by a directory or something else.
345 345 self.mark_removed_or_deleted_if_file(
346 346 &hg_path,
347 347 dirstate_node.state()?,
348 348 );
349 349 }
350 350 if file_type.is_dir() {
351 351 if self.options.collect_traversed_dirs {
352 352 self.outcome
353 353 .lock()
354 354 .unwrap()
355 355 .traversed
356 356 .push(hg_path.detach_from_tree())
357 357 }
358 358 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
359 359 let is_at_repo_root = false;
360 360 let children_all_have_dirstate_node_or_are_ignored = self
361 361 .traverse_fs_directory_and_dirstate(
362 362 is_ignored,
363 363 dirstate_node.children(self.dmap.on_disk)?,
364 364 hg_path,
365 365 fs_path,
366 366 Some(fs_metadata),
367 367 dirstate_node.cached_directory_mtime()?,
368 368 is_at_repo_root,
369 369 )?;
370 370 self.maybe_save_directory_mtime(
371 371 children_all_have_dirstate_node_or_are_ignored,
372 372 fs_metadata,
373 373 dirstate_node,
374 374 )?
375 375 } else {
376 376 if file_or_symlink && self.matcher.matches(hg_path) {
377 377 if let Some(state) = dirstate_node.state()? {
378 378 match state {
379 379 EntryState::Added => self
380 380 .outcome
381 381 .lock()
382 382 .unwrap()
383 383 .added
384 384 .push(hg_path.detach_from_tree()),
385 385 EntryState::Removed => self
386 386 .outcome
387 387 .lock()
388 388 .unwrap()
389 389 .removed
390 390 .push(hg_path.detach_from_tree()),
391 391 EntryState::Merged => self
392 392 .outcome
393 393 .lock()
394 394 .unwrap()
395 395 .modified
396 396 .push(hg_path.detach_from_tree()),
397 397 EntryState::Normal => self
398 398 .handle_normal_file(&dirstate_node, fs_metadata)?,
399 399 }
400 400 } else {
401 401 // `node.entry.is_none()` indicates a "directory"
402 402 // node, but the filesystem has a file
403 403 self.mark_unknown_or_ignored(
404 404 has_ignored_ancestor,
405 405 hg_path,
406 406 );
407 407 }
408 408 }
409 409
410 410 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
411 411 {
412 412 self.traverse_dirstate_only(child_node)?
413 413 }
414 414 }
415 415 Ok(())
416 416 }
417 417
418 418 fn maybe_save_directory_mtime(
419 419 &self,
420 420 children_all_have_dirstate_node_or_are_ignored: bool,
421 421 directory_metadata: &std::fs::Metadata,
422 422 dirstate_node: NodeRef<'tree, 'on_disk>,
423 423 ) -> Result<(), DirstateV2ParseError> {
424 424 if children_all_have_dirstate_node_or_are_ignored {
425 425 // All filesystem directory entries from `read_dir` have a
426 426 // corresponding node in the dirstate, so we can reconstitute the
427 427 // names of those entries without calling `read_dir` again.
428 428 if let (Some(status_start), Ok(directory_mtime)) = (
429 429 &self.filesystem_time_at_status_start,
430 430 directory_metadata.modified(),
431 431 ) {
432 432 // Although the Rust standard library’s `SystemTime` type
433 433 // has nanosecond precision, the times reported for a
434 434 // directory’s (or file’s) modified time may have lower
435 435 // resolution based on the filesystem (for example ext3
436 436 // only stores integer seconds), kernel (see
437 437 // https://stackoverflow.com/a/14393315/1162888), etc.
438 438 if &directory_mtime >= status_start {
439 439 // The directory was modified too recently, don’t cache its
440 440 // `read_dir` results.
441 441 //
442 442 // A timeline like this is possible:
443 443 //
444 444 // 1. A change to this directory (direct child was
445 445 // added or removed) cause its mtime to be set
446 446 // (possibly truncated) to `directory_mtime`
447 447 // 2. This `status` algorithm calls `read_dir`
448 448 // 3. An other change is made to the same directory is
449 449 // made so that calling `read_dir` agin would give
450 450 // different results, but soon enough after 1. that
451 451 // the mtime stays the same
452 452 //
453 453 // On a system where the time resolution poor, this
454 454 // scenario is not unlikely if all three steps are caused
455 455 // by the same script.
456 456 } else {
457 457 // We’ve observed (through `status_start`) that time has
458 458 // “progressed” since `directory_mtime`, so any further
459 459 // change to this directory is extremely likely to cause a
460 460 // different mtime.
461 461 //
462 462 // Having the same mtime again is not entirely impossible
463 463 // since the system clock is not monotonous. It could jump
464 464 // backward to some point before `directory_mtime`, then a
465 465 // directory change could potentially happen during exactly
466 466 // the wrong tick.
467 467 //
468 468 // We deem this scenario (unlike the previous one) to be
469 469 // unlikely enough in practice.
470 470 let truncated = TruncatedTimestamp::from(directory_mtime);
471 471 let is_up_to_date = if let Some(cached) =
472 472 dirstate_node.cached_directory_mtime()?
473 473 {
474 474 cached.likely_equal(truncated)
475 475 } else {
476 476 false
477 477 };
478 478 if !is_up_to_date {
479 479 let hg_path = dirstate_node
480 480 .full_path_borrowed(self.dmap.on_disk)?
481 481 .detach_from_tree();
482 482 self.new_cachable_directories
483 483 .lock()
484 484 .unwrap()
485 485 .push((hg_path, truncated))
486 486 }
487 487 }
488 488 }
489 489 }
490 490 Ok(())
491 491 }
492 492
493 493 /// A file with `EntryState::Normal` in the dirstate was found in the
494 494 /// filesystem
495 495 fn handle_normal_file(
496 496 &self,
497 497 dirstate_node: &NodeRef<'tree, 'on_disk>,
498 498 fs_metadata: &std::fs::Metadata,
499 499 ) -> Result<(), DirstateV2ParseError> {
500 500 // Keep the low 31 bits
501 501 fn truncate_u64(value: u64) -> i32 {
502 502 (value & 0x7FFF_FFFF) as i32
503 503 }
504 504
505 505 let entry = dirstate_node
506 506 .entry()?
507 507 .expect("handle_normal_file called with entry-less node");
508 508 let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
509 509 let mode_changed =
510 510 || self.options.check_exec && entry.mode_changed(fs_metadata);
511 511 let size = entry.size();
512 512 let size_changed = size != truncate_u64(fs_metadata.len());
513 513 if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
514 514 // issue6456: Size returned may be longer due to encryption
515 515 // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
516 516 self.outcome
517 517 .lock()
518 518 .unwrap()
519 519 .unsure
520 520 .push(hg_path.detach_from_tree())
521 521 } else if dirstate_node.has_copy_source()
522 522 || entry.is_from_other_parent()
523 523 || (size >= 0 && (size_changed || mode_changed()))
524 524 {
525 525 self.outcome
526 526 .lock()
527 527 .unwrap()
528 528 .modified
529 529 .push(hg_path.detach_from_tree())
530 530 } else {
531 531 let mtime_looks_clean;
532 532 if let Some(dirstate_mtime) = entry.truncated_mtime() {
533 533 let fs_mtime = TruncatedTimestamp::for_mtime_of(fs_metadata)
534 .expect("OS/libc does not support mtime?")
535 // For now don’t use sub-second precision for file mtimes
536 .to_integer_second();
534 .expect("OS/libc does not support mtime?");
537 535 mtime_looks_clean = fs_mtime.likely_equal(dirstate_mtime)
538 536 && !fs_mtime.likely_equal(self.options.last_normal_time)
539 537 } else {
540 538 // No mtime in the dirstate entry
541 539 mtime_looks_clean = false
542 540 };
543 541 if !mtime_looks_clean {
544 542 self.outcome
545 543 .lock()
546 544 .unwrap()
547 545 .unsure
548 546 .push(hg_path.detach_from_tree())
549 547 } else if self.options.list_clean {
550 548 self.outcome
551 549 .lock()
552 550 .unwrap()
553 551 .clean
554 552 .push(hg_path.detach_from_tree())
555 553 }
556 554 }
557 555 Ok(())
558 556 }
559 557
560 558 /// A node in the dirstate tree has no corresponding filesystem entry
561 559 fn traverse_dirstate_only(
562 560 &self,
563 561 dirstate_node: NodeRef<'tree, 'on_disk>,
564 562 ) -> Result<(), DirstateV2ParseError> {
565 563 self.check_for_outdated_directory_cache(&dirstate_node)?;
566 564 self.mark_removed_or_deleted_if_file(
567 565 &dirstate_node.full_path_borrowed(self.dmap.on_disk)?,
568 566 dirstate_node.state()?,
569 567 );
570 568 dirstate_node
571 569 .children(self.dmap.on_disk)?
572 570 .par_iter()
573 571 .map(|child_node| self.traverse_dirstate_only(child_node))
574 572 .collect()
575 573 }
576 574
577 575 /// A node in the dirstate tree has no corresponding *file* on the
578 576 /// filesystem
579 577 ///
580 578 /// Does nothing on a "directory" node
581 579 fn mark_removed_or_deleted_if_file(
582 580 &self,
583 581 hg_path: &BorrowedPath<'tree, 'on_disk>,
584 582 dirstate_node_state: Option<EntryState>,
585 583 ) {
586 584 if let Some(state) = dirstate_node_state {
587 585 if self.matcher.matches(hg_path) {
588 586 if let EntryState::Removed = state {
589 587 self.outcome
590 588 .lock()
591 589 .unwrap()
592 590 .removed
593 591 .push(hg_path.detach_from_tree())
594 592 } else {
595 593 self.outcome
596 594 .lock()
597 595 .unwrap()
598 596 .deleted
599 597 .push(hg_path.detach_from_tree())
600 598 }
601 599 }
602 600 }
603 601 }
604 602
605 603 /// Something in the filesystem has no corresponding dirstate node
606 604 ///
607 605 /// Returns whether that path is ignored
608 606 fn traverse_fs_only(
609 607 &self,
610 608 has_ignored_ancestor: bool,
611 609 directory_hg_path: &HgPath,
612 610 fs_entry: &DirEntry,
613 611 ) -> bool {
614 612 let hg_path = directory_hg_path.join(&fs_entry.base_name);
615 613 let file_type = fs_entry.metadata.file_type();
616 614 let file_or_symlink = file_type.is_file() || file_type.is_symlink();
617 615 if file_type.is_dir() {
618 616 let is_ignored =
619 617 has_ignored_ancestor || (self.ignore_fn)(&hg_path);
620 618 let traverse_children = if is_ignored {
621 619 // Descendants of an ignored directory are all ignored
622 620 self.options.list_ignored
623 621 } else {
624 622 // Descendants of an unknown directory may be either unknown or
625 623 // ignored
626 624 self.options.list_unknown || self.options.list_ignored
627 625 };
628 626 if traverse_children {
629 627 let is_at_repo_root = false;
630 628 if let Ok(children_fs_entries) = self.read_dir(
631 629 &hg_path,
632 630 &fs_entry.full_path,
633 631 is_at_repo_root,
634 632 ) {
635 633 children_fs_entries.par_iter().for_each(|child_fs_entry| {
636 634 self.traverse_fs_only(
637 635 is_ignored,
638 636 &hg_path,
639 637 child_fs_entry,
640 638 );
641 639 })
642 640 }
643 641 }
644 642 if self.options.collect_traversed_dirs {
645 643 self.outcome.lock().unwrap().traversed.push(hg_path.into())
646 644 }
647 645 is_ignored
648 646 } else {
649 647 if file_or_symlink {
650 648 if self.matcher.matches(&hg_path) {
651 649 self.mark_unknown_or_ignored(
652 650 has_ignored_ancestor,
653 651 &BorrowedPath::InMemory(&hg_path),
654 652 )
655 653 } else {
656 654 // We haven’t computed whether this path is ignored. It
657 655 // might not be, and a future run of status might have a
658 656 // different matcher that matches it. So treat it as not
659 657 // ignored. That is, inhibit readdir caching of the parent
660 658 // directory.
661 659 false
662 660 }
663 661 } else {
664 662 // This is neither a directory, a plain file, or a symlink.
665 663 // Treat it like an ignored file.
666 664 true
667 665 }
668 666 }
669 667 }
670 668
671 669 /// Returns whether that path is ignored
672 670 fn mark_unknown_or_ignored(
673 671 &self,
674 672 has_ignored_ancestor: bool,
675 673 hg_path: &BorrowedPath<'_, 'on_disk>,
676 674 ) -> bool {
677 675 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
678 676 if is_ignored {
679 677 if self.options.list_ignored {
680 678 self.outcome
681 679 .lock()
682 680 .unwrap()
683 681 .ignored
684 682 .push(hg_path.detach_from_tree())
685 683 }
686 684 } else {
687 685 if self.options.list_unknown {
688 686 self.outcome
689 687 .lock()
690 688 .unwrap()
691 689 .unknown
692 690 .push(hg_path.detach_from_tree())
693 691 }
694 692 }
695 693 is_ignored
696 694 }
697 695 }
698 696
699 697 struct DirEntry {
700 698 base_name: HgPathBuf,
701 699 full_path: PathBuf,
702 700 metadata: std::fs::Metadata,
703 701 }
704 702
705 703 impl DirEntry {
706 704 /// Returns **unsorted** entries in the given directory, with name and
707 705 /// metadata.
708 706 ///
709 707 /// If a `.hg` sub-directory is encountered:
710 708 ///
711 709 /// * At the repository root, ignore that sub-directory
712 710 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
713 711 /// list instead.
714 712 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
715 713 let mut results = Vec::new();
716 714 for entry in path.read_dir()? {
717 715 let entry = entry?;
718 716 let metadata = entry.metadata()?;
719 717 let name = get_bytes_from_os_string(entry.file_name());
720 718 // FIXME don't do this when cached
721 719 if name == b".hg" {
722 720 if is_at_repo_root {
723 721 // Skip the repo’s own .hg (might be a symlink)
724 722 continue;
725 723 } else if metadata.is_dir() {
726 724 // A .hg sub-directory at another location means a subrepo,
727 725 // skip it entirely.
728 726 return Ok(Vec::new());
729 727 }
730 728 }
731 729 results.push(DirEntry {
732 730 base_name: name.into(),
733 731 full_path: entry.path(),
734 732 metadata,
735 733 })
736 734 }
737 735 Ok(results)
738 736 }
739 737 }
740 738
741 739 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
742 740 /// of the give repository.
743 741 ///
744 742 /// This is similar to `SystemTime::now()`, with the result truncated to the
745 743 /// same time resolution as other files’ modification times. Using `.hg`
746 744 /// instead of the system’s default temporary directory (such as `/tmp`) makes
747 745 /// it more likely the temporary file is in the same disk partition as contents
748 746 /// of the working directory, which can matter since different filesystems may
749 747 /// store timestamps with different resolutions.
750 748 ///
751 749 /// This may fail, typically if we lack write permissions. In that case we
752 750 /// should continue the `status()` algoritm anyway and consider the current
753 751 /// date/time to be unknown.
754 752 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
755 753 tempfile::tempfile_in(repo_root.join(".hg"))?
756 754 .metadata()?
757 755 .modified()
758 756 }
General Comments 0
You need to be logged in to leave comments. Login now