upstream/mercurial-mirror Commit - r49082:83d0bd45

dirstate-v2: actually use sub-second mtime precision...

Simon Sapin -

r49082:83d0bd45 default

parent child

mercurial/dirstateutils/timestamp.py

0 +16 -6

             # Copyright Mercurial Contributors
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import functools
             import stat
             rangemask = 0x7FFFFFFF
             @functools.total_ordering
             class timestamp(tuple):
                 """
                 A Unix timestamp with optional nanoseconds precision,
                 modulo 2**31 seconds.
                 A 2-tuple containing:
                 `truncated_seconds`: seconds since the Unix epoch,
                 truncated to its lower 31 bits
                 `subsecond_nanoseconds`: number of nanoseconds since `truncated_seconds`.
                 When this is zero, the sub-second precision is considered unknown.
                 """
                 def __new__(cls, value):
                     truncated_seconds, subsec_nanos = value
                     value = (truncated_seconds & rangemask, subsec_nanos)
                     return super(timestamp, cls).__new__(cls, value)
                 def __eq__(self, other):
                     self_secs, self_subsec_nanos = self
                     other_secs, other_subsec_nanos = other
                     return self_secs == other_secs and (
                         self_subsec_nanos == other_subsec_nanos
                         or self_subsec_nanos == 0
                         or other_subsec_nanos == 0
                     )
                 def __gt__(self, other):
                     self_secs, self_subsec_nanos = self
                     other_secs, other_subsec_nanos = other
                     if self_secs > other_secs:
                         return True
                     if self_secs < other_secs:
                         return False
                     if self_subsec_nanos == 0 or other_subsec_nanos == 0:
                         # they are considered equal, so not "greater than"
                         return False
                     return self_subsec_nanos > other_subsec_nanos
             def zero():
                 """
                 Returns the `timestamp` at the Unix epoch.
                 """
                 return tuple.__new__(timestamp, (0, 0))
             def mtime_of(stat_result):
                 """
                 Takes an `os.stat_result`-like object and returns a `timestamp` object
                 for its modification time.
                 """
-                # https://docs.python.org/2/library/os.html#os.stat_float_times
+                try:
-                # "For compatibility with older Python versions,
+                    # TODO: add this attribute to `osutil.stat` objects,
-                #  accessing stat_result as a tuple always returns integers."
+                    # see `mercurial/cext/osutil.c`.
-                secs = stat_result[stat.ST_MTIME]
+                    # This attribute is also not available on Python 2.
+                    nanos = stat_result.st_mtime_ns
+                except AttributeError:
+                    # https://docs.python.org/2/library/os.html#os.stat_float_times
+                    # "For compatibility with older Python versions,
+                    #  accessing stat_result as a tuple always returns integers."
+                    secs = stat_result[stat.ST_MTIME]
-                # For now
+                    subsec_nanos = 0
-                subsec_nanos = 0
+                else:
+                    billion = int(1e9)
+                    secs = nanos // billion
+                    subsec_nanos = nanos % billion
                 return timestamp((secs, subsec_nanos))

mercurial/dirstateutils/v2.py

0 +1 -4

             # v2.py - Pure-Python implementation of the dirstate-v2 file format
             #
             # Copyright Mercurial Contributors
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import struct
             from ..thirdparty import attr
             from .. import error, policy
             parsers = policy.importmod('parsers')
             # Must match the constant of the same name in
             # `rust/hg-core/src/dirstate_tree/on_disk.rs`
             TREE_METADATA_SIZE = 44
             NODE_SIZE = 44
             # Must match the `TreeMetadata` Rust struct in
             # `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.
             #
             # * 4 bytes: start offset of root nodes
             # * 4 bytes: number of root nodes
             # * 4 bytes: total number of nodes in the tree that have an entry
             # * 4 bytes: total number of nodes in the tree that have a copy source
             # * 4 bytes: number of bytes in the data file that are not used anymore
             # * 4 bytes: unused
             # * 20 bytes: SHA-1 hash of ignore patterns
             TREE_METADATA = struct.Struct('>LLLLL4s20s')
             # Must match the `Node` Rust struct in
             # `rust/hg-core/src/dirstate_tree/on_disk.rs`. See doc-comments there.
             #
             # * 4 bytes: start offset of full path
             # * 2 bytes: length of the full path
             # * 2 bytes: length within the full path before its "base name"
             # * 4 bytes: start offset of the copy source if any, or zero for no copy source
             # * 2 bytes: length of the copy source if any, or unused
             # * 4 bytes: start offset of child nodes
             # * 4 bytes: number of child nodes
             # * 4 bytes: number of descendant nodes that have an entry
             # * 4 bytes: number of descendant nodes that have a "tracked" state
             # * 1 byte: flags
             # * 4 bytes: expected size
             # * 4 bytes: mtime seconds
             # * 4 bytes: mtime nanoseconds
             NODE = struct.Struct('>LHHLHLLLLHlll')
             assert TREE_METADATA_SIZE == TREE_METADATA.size
             assert NODE_SIZE == NODE.size
             def parse_dirstate(map, copy_map, data, tree_metadata):
                 """parse a full v2-dirstate from a binary data into dictionnaries:
                 - map: a {path: entry} mapping that will be filled
                 - copy_map: a {path: copy-source} mapping that will be filled
                 - data: a binary blob contains v2 nodes data
                 - tree_metadata:: a binary blob of the top level node (from the docket)
                 """
                 (
                     root_nodes_start,
                     root_nodes_len,
                     _nodes_with_entry_count,
                     _nodes_with_copy_source_count,
                     _unreachable_bytes,
                     _unused,
                     _ignore_patterns_hash,
                 ) = TREE_METADATA.unpack(tree_metadata)
                 parse_nodes(map, copy_map, data, root_nodes_start, root_nodes_len)
             def parse_nodes(map, copy_map, data, start, len):
                 """parse <len> nodes from <data> starting at offset <start>
                 This is used by parse_dirstate to recursively fill `map` and `copy_map`.
                 All directory specific information is ignored and do not need any
                 processing (HAS_DIRECTORY_MTIME, ALL_UNKNOWN_RECORDED, ALL_IGNORED_RECORDED)
                 """
                 for i in range(len):
                     node_start = start + NODE_SIZE * i
                     node_bytes = slice_with_len(data, node_start, NODE_SIZE)
                     (
                         path_start,
                         path_len,
                         _basename_start,
                         copy_source_start,
                         copy_source_len,
                         children_start,
                         children_count,
                         _descendants_with_entry_count,
                         _tracked_descendants_count,
                         flags,
                         size,
                         mtime_s,
-                        _mtime_ns,
+                        mtime_ns,
                     ) = NODE.unpack(node_bytes)
                     # Parse child nodes of this node recursively
                     parse_nodes(map, copy_map, data, children_start, children_count)
-                    # Don’t yet use sub-second precision if it exists in the file,
-                    # since other parts of the code still set it to zero.
-                    mtime_ns = 0
                     item = parsers.DirstateItem.from_v2_data(flags, size, mtime_s, mtime_ns)
                     if not item.any_tracked:
                         continue
                     path = slice_with_len(data, path_start, path_len)
                     map[path] = item
                     if copy_source_start:
                         copy_map[path] = slice_with_len(
                             data, copy_source_start, copy_source_len
                         )
             def slice_with_len(data, start, len):
                 return data[start : start + len]
             @attr.s
             class Node(object):
                 path = attr.ib()
                 entry = attr.ib()
                 parent = attr.ib(default=None)
                 children_count = attr.ib(default=0)
                 children_offset = attr.ib(default=0)
                 descendants_with_entry = attr.ib(default=0)
                 tracked_descendants = attr.ib(default=0)
                 def pack(self, copy_map, paths_offset):
                     path = self.path
                     copy = copy_map.get(path)
                     entry = self.entry
                     path_start = paths_offset
                     path_len = len(path)
                     basename_start = path.rfind(b'/') + 1  # 0 if rfind returns -1
                     if copy is not None:
                         copy_source_start = paths_offset + len(path)
                         copy_source_len = len(copy)
                     else:
                         copy_source_start = 0
                         copy_source_len = 0
                     if entry is not None:
                         flags, size, mtime_s, mtime_ns = entry.v2_data()
                     else:
                         # There are no mtime-cached directories in the Python implementation
                         flags = 0
                         size = 0
                         mtime_s = 0
                         mtime_ns = 0
                     return NODE.pack(
                         path_start,
                         path_len,
                         basename_start,
                         copy_source_start,
                         copy_source_len,
                         self.children_offset,
                         self.children_count,
                         self.descendants_with_entry,
                         self.tracked_descendants,
                         flags,
                         size,
                         mtime_s,
                         mtime_ns,
                     )
             def pack_dirstate(map, copy_map, now):
                 """
                 Pack `map` and `copy_map` into the dirstate v2 binary format and return
                 the bytearray.
                 `now` is a timestamp of the current filesystem time used to detect race
                 conditions in writing the dirstate to disk, see inline comment.
                 The on-disk format expects a tree-like structure where the leaves are
                 written first (and sorted per-directory), going up levels until the root
                 node and writing that one to the docket. See more details on the on-disk
                 format in `mercurial/helptext/internals/dirstate-v2`.
                 Since both `map` and `copy_map` are flat dicts we need to figure out the
                 hierarchy. This algorithm does so without having to build the entire tree
                 in-memory: it only keeps the minimum number of nodes around to satisfy the
                 format.
                 # Algorithm explanation
                 This explanation does not talk about the different counters for tracked
                 descendents and storing the copies, but that work is pretty simple once this
                 algorithm is in place.
                 ## Building a subtree
                 First, sort `map`: this makes it so the leaves of the tree are contiguous
                 per directory (i.e. a/b/c and a/b/d will be next to each other in the list),
                 and enables us to use the ordering of folders to have a "cursor" of the
                 current folder we're in without ever going twice in the same branch of the
                 tree. The cursor is a node that remembers its parent and any information
                 relevant to the format (see the `Node` class), building the relevant part
                 of the tree lazily.
                 Then, for each file in `map`, move the cursor into the tree to the
                 corresponding folder of the file: for example, if the very first file
                 is "a/b/c", we start from `Node[""]`, create `Node["a"]` which points to
                 its parent `Node[""]`, then create `Node["a/b"]`, which points to its parent
                 `Node["a"]`. These nodes are kept around in a stack.
                 If the next file in `map` is in the same subtree ("a/b/d" or "a/b/e/f"), we
                 add it to the stack and keep looping with the same logic of creating the
                 tree nodes as needed. If however the next file in `map` is *not* in the same
                 subtree ("a/other", if we're still in the "a/b" folder), then we know that
                 the subtree we're in is complete.
                 ## Writing the subtree
                 We have the entire subtree in the stack, so we start writing it to disk
                 folder by folder. The way we write a folder is to pop the stack into a list
                 until the folder changes, revert this list of direct children (to satisfy
                 the format requirement that children be sorted). This process repeats until
                 we hit the "other" subtree.
                 An example:
                     a
                     dir1/b
                     dir1/c
                     dir2/dir3/d
                     dir2/dir3/e
                     dir2/f
                 Would have us:
                     - add to the stack until "dir2/dir3/e"
                     - realize that "dir2/f" is in a different subtree
                     - pop "dir2/dir3/e", "dir2/dir3/d", reverse them so they're sorted and
                       pack them since the next entry is "dir2/dir3"
                     - go back up to "dir2"
                     - add "dir2/f" to the stack
                     - realize we're done with the map
                     - pop "dir2/f", "dir2/dir3" from the stack, reverse and pack them
                     - go up to the root node, do the same to write "a", "dir1" and "dir2" in
                       that order
                 ## Special case for the root node
                 The root node is not serialized in the format, but its information is
                 written to the docket. Again, see more details on the on-disk format in
                 `mercurial/helptext/internals/dirstate-v2`.
                 """
                 data = bytearray()
                 root_nodes_start = 0
                 root_nodes_len = 0
                 nodes_with_entry_count = 0
                 nodes_with_copy_source_count = 0
                 # Will always be 0 since this implementation always re-writes everything
                 # to disk
                 unreachable_bytes = 0
                 unused = b'\x00' * 4
                 # This is an optimization that's only useful for the Rust implementation
                 ignore_patterns_hash = b'\x00' * 20
                 if len(map) == 0:
                     tree_metadata = TREE_METADATA.pack(
                         root_nodes_start,
                         root_nodes_len,
                         nodes_with_entry_count,
                         nodes_with_copy_source_count,
                         unreachable_bytes,
                         unused,
                         ignore_patterns_hash,
                     )
                     return data, tree_metadata
                 sorted_map = sorted(map.items(), key=lambda x: x[0])
                 # Use a stack to not have to only remember the nodes we currently need
                 # instead of building the entire tree in memory
                 stack = []
                 current_node = Node(b"", None)
                 stack.append(current_node)
                 for index, (path, entry) in enumerate(sorted_map, 1):
                     if entry.need_delay(now):
                         # The file was last modified "simultaneously" with the current
                         # write to dirstate (i.e. within the same second for file-
                         # systems with a granularity of 1 sec). This commonly happens
                         # for at least a couple of files on 'update'.
                         # The user could change the file without changing its size
                         # within the same second. Invalidate the file's mtime in
                         # dirstate, forcing future 'status' calls to compare the
                         # contents of the file if the size is the same. This prevents
                         # mistakenly treating such files as clean.
                         entry.set_possibly_dirty()
                     nodes_with_entry_count += 1
                     if path in copy_map:
                         nodes_with_copy_source_count += 1
                     current_folder = get_folder(path)
                     current_node = move_to_correct_node_in_tree(
                         current_folder, current_node, stack
                     )
                     current_node.children_count += 1
                     # Entries from `map` are never `None`
                     if entry.tracked:
                         current_node.tracked_descendants += 1
                     current_node.descendants_with_entry += 1
                     stack.append(Node(path, entry, current_node))
                     should_pack = True
                     next_path = None
                     if index < len(sorted_map):
                         # Determine if the next entry is in the same sub-tree, if so don't
                         # pack yet
                         next_path = sorted_map[index][0]
                         should_pack = not get_folder(next_path).startswith(current_folder)
                     if should_pack:
                         pack_directory_children(current_node, copy_map, data, stack)
                         while stack and current_node.path != b"":
                             # Go up the tree and write until we reach the folder of the next
                             # entry (if any, otherwise the root)
                             parent = current_node.parent
                             in_parent_folder_of_next_entry = next_path is not None and (
                                 get_folder(next_path).startswith(get_folder(stack[-1].path))
                             )
                             if parent is None or in_parent_folder_of_next_entry:
                                 break
                             pack_directory_children(parent, copy_map, data, stack)
                             current_node = parent
                 # Special case for the root node since we don't write it to disk, only its
                 # children to the docket
                 current_node = stack.pop()
                 assert current_node.path == b"", current_node.path
                 assert len(stack) == 0, len(stack)
                 tree_metadata = TREE_METADATA.pack(
                     current_node.children_offset,
                     current_node.children_count,
                     nodes_with_entry_count,
                     nodes_with_copy_source_count,
                     unreachable_bytes,
                     unused,
                     ignore_patterns_hash,
                 )
                 return data, tree_metadata
             def get_folder(path):
                 """
                 Return the folder of the path that's given, an empty string for root paths.
                 """
                 return path.rsplit(b'/', 1)[0] if b'/' in path else b''
             def move_to_correct_node_in_tree(target_folder, current_node, stack):
                 """
                 Move inside the dirstate node tree to the node corresponding to
                 `target_folder`, creating the missing nodes along the way if needed.
                 """
                 while target_folder != current_node.path:
                     if target_folder.startswith(current_node.path):
                         # We need to go down a folder
                         prefix = target_folder[len(current_node.path) :].lstrip(b'/')
                         subfolder_name = prefix.split(b'/', 1)[0]
                         if current_node.path:
                             subfolder_path = current_node.path + b'/' + subfolder_name
                         else:
                             subfolder_path = subfolder_name
                         next_node = stack[-1]
                         if next_node.path == target_folder:
                             # This folder is now a file and only contains removed entries
                             # merge with the last node
                             current_node = next_node
                         else:
                             current_node.children_count += 1
                             current_node = Node(subfolder_path, None, current_node)
                             stack.append(current_node)
                     else:
                         # We need to go up a folder
                         current_node = current_node.parent
                 return current_node
             def pack_directory_children(node, copy_map, data, stack):
                 """
                 Write the binary representation of the direct sorted children of `node` to
                 `data`
                 """
                 direct_children = []
                 while stack[-1].path != b"" and get_folder(stack[-1].path) == node.path:
                     direct_children.append(stack.pop())
                 if not direct_children:
                     raise error.ProgrammingError(b"no direct children for %r" % node.path)
                 # Reverse the stack to get the correct sorted order
                 direct_children.reverse()
                 packed_children = bytearray()
                 # Write the paths to `data`. Pack child nodes but don't write them yet
                 for child in direct_children:
                     packed = child.pack(copy_map=copy_map, paths_offset=len(data))
                     packed_children.extend(packed)
                     data.extend(child.path)
                     data.extend(copy_map.get(child.path, b""))
                     node.tracked_descendants += child.tracked_descendants
                     node.descendants_with_entry += child.descendants_with_entry
                 # Write the fixed-size child nodes all together
                 node.children_offset = len(data)
                 data.extend(packed_children)

rust/hg-core/src/dirstate/entry.rs

0 0 -5

             use crate::dirstate_tree::on_disk::DirstateV2ParseError;
             use crate::errors::HgError;
             use bitflags::bitflags;
             use std::convert::{TryFrom, TryInto};
             use std::fs;
             use std::io;
             use std::time::{SystemTime, UNIX_EPOCH};
             #[derive(Copy, Clone, Debug, Eq, PartialEq)]
             pub enum EntryState {
                 Normal,
                 Added,
                 Removed,
                 Merged,
             }
             /// `size` and `mtime.seconds` are truncated to 31 bits.
             ///
             /// TODO: double-check status algorithm correctness for files
             /// larger than 2 GiB or modified after 2038.
             #[derive(Debug, Copy, Clone)]
             pub struct DirstateEntry {
                 pub(crate) flags: Flags,
                 mode_size: Option<(u32, u32)>,
                 mtime: Option<TruncatedTimestamp>,
             }
             bitflags! {
                 pub(crate) struct Flags: u8 {
                     const WDIR_TRACKED = 1 << 0;
                     const P1_TRACKED = 1 << 1;
                     const P2_INFO = 1 << 2;
                     const HAS_FALLBACK_EXEC = 1 << 3;
                     const FALLBACK_EXEC = 1 << 4;
                     const HAS_FALLBACK_SYMLINK = 1 << 5;
                     const FALLBACK_SYMLINK = 1 << 6;
                 }
             }
             /// A Unix timestamp with nanoseconds precision
             #[derive(Debug, Copy, Clone)]
             pub struct TruncatedTimestamp {
                 truncated_seconds: u32,
                 /// Always in the `0 .. 1_000_000_000` range.
                 nanoseconds: u32,
             }
             impl TruncatedTimestamp {
                 /// Constructs from a timestamp potentially outside of the supported range,
                 /// and truncate the seconds components to its lower 31 bits.
                 ///
                 /// Panics if the nanoseconds components is not in the expected range.
                 pub fn new_truncate(seconds: i64, nanoseconds: u32) -> Self {
                     assert!(nanoseconds < NSEC_PER_SEC);
                     Self {
                         truncated_seconds: seconds as u32 & RANGE_MASK_31BIT,
                         nanoseconds,
                     }
                 }
                 /// Construct from components. Returns an error if they are not in the
                 /// expcted range.
                 pub fn from_already_truncated(
                     truncated_seconds: u32,
                     nanoseconds: u32,
                 ) -> Result<Self, DirstateV2ParseError> {
                     if truncated_seconds & !RANGE_MASK_31BIT == 0
                         && nanoseconds < NSEC_PER_SEC
                     {
                         Ok(Self {
                             truncated_seconds,
                             nanoseconds,
                         })
                     } else {
                         Err(DirstateV2ParseError)
                     }
                 }
                 pub fn for_mtime_of(metadata: &fs::Metadata) -> io::Result<Self> {
                     #[cfg(unix)]
                     {
                         use std::os::unix::fs::MetadataExt;
                         let seconds = metadata.mtime();
                         // i64 -> u32 with value always in the `0 .. NSEC_PER_SEC` range
                         let nanoseconds = metadata.mtime_nsec().try_into().unwrap();
                         Ok(Self::new_truncate(seconds, nanoseconds))
                     }
                     #[cfg(not(unix))]
                     {
                         metadata.modified().map(Self::from)
                     }
                 }
-                pub fn to_integer_second(mut self) -> Self {
-                    self.nanoseconds = 0;
-                    self
                 /// The lower 31 bits of the number of seconds since the epoch.
                 pub fn truncated_seconds(&self) -> u32 {
                     self.truncated_seconds
                 }
                 /// The sub-second component of this timestamp, in nanoseconds.
                 /// Always in the `0 .. 1_000_000_000` range.
                 ///
                 /// This timestamp is after `(seconds, 0)` by this many nanoseconds.
                 pub fn nanoseconds(&self) -> u32 {
                     self.nanoseconds
                 }
                 /// Returns whether two timestamps are equal modulo 2**31 seconds.
                 ///
                 /// If this returns `true`, the original values converted from `SystemTime`
                 /// or given to `new_truncate` were very likely equal. A false positive is
                 /// possible if they were exactly a multiple of 2**31 seconds apart (around
                 /// 68 years). This is deemed very unlikely to happen by chance, especially
                 /// on filesystems that support sub-second precision.
                 ///
                 /// If someone is manipulating the modification times of some files to
                 /// intentionally make `hg status` return incorrect results, not truncating
                 /// wouldn’t help much since they can set exactly the expected timestamp.
                 ///
                 /// Sub-second precision is ignored if it is zero in either value.
                 /// Some APIs simply return zero when more precision is not available.
                 /// When comparing values from different sources, if only one is truncated
                 /// in that way, doing a simple comparison would cause many false
                 /// negatives.
                 pub fn likely_equal(self, other: Self) -> bool {
                     self.truncated_seconds == other.truncated_seconds
                         && (self.nanoseconds == other.nanoseconds
                             || self.nanoseconds == 0
                             || other.nanoseconds == 0)
                 }
                 pub fn likely_equal_to_mtime_of(
                     self,
                     metadata: &fs::Metadata,
                 ) -> io::Result<bool> {
                     Ok(self.likely_equal(Self::for_mtime_of(metadata)?))
                 }
             }
             impl From<SystemTime> for TruncatedTimestamp {
                 fn from(system_time: SystemTime) -> Self {
                     // On Unix, `SystemTime` is a wrapper for the `timespec` C struct:
                     // https://www.gnu.org/software/libc/manual/html_node/Time-Types.html#index-struct-timespec
                     // We want to effectively access its fields, but the Rust standard
                     // library does not expose them. The best we can do is:
                     let seconds;
                     let nanoseconds;
                     match system_time.duration_since(UNIX_EPOCH) {
                         Ok(duration) => {
                             seconds = duration.as_secs() as i64;
                             nanoseconds = duration.subsec_nanos();
                         }
                         Err(error) => {
                             // `system_time` is before `UNIX_EPOCH`.
                             // We need to undo this algorithm:
                             // https://github.com/rust-lang/rust/blob/6bed1f0bc3cc50c10aab26d5f94b16a00776b8a5/library/std/src/sys/unix/time.rs#L40-L41
                             let negative = error.duration();
                             let negative_secs = negative.as_secs() as i64;
                             let negative_nanos = negative.subsec_nanos();
                             if negative_nanos == 0 {
                                 seconds = -negative_secs;
                                 nanoseconds = 0;
                             } else {
                                 // For example if `system_time` was 4.3 seconds before
                                 // the Unix epoch we get a Duration that represents
                                 // `(-4, -0.3)` but we want `(-5, +0.7)`:
                                 seconds = -1 - negative_secs;
                                 nanoseconds = NSEC_PER_SEC - negative_nanos;
                             }
                         }
                     };
                     Self::new_truncate(seconds, nanoseconds)
                 }
             }
             const NSEC_PER_SEC: u32 = 1_000_000_000;
             const RANGE_MASK_31BIT: u32 = 0x7FFF_FFFF;
             pub const MTIME_UNSET: i32 = -1;
             /// A `DirstateEntry` with a size of `-2` means that it was merged from the
             /// other parent. This allows revert to pick the right status back during a
             /// merge.
             pub const SIZE_FROM_OTHER_PARENT: i32 = -2;
             /// A special value used for internal representation of special case in
             /// dirstate v1 format.
             pub const SIZE_NON_NORMAL: i32 = -1;
             impl DirstateEntry {
                 pub fn from_v2_data(
                     wdir_tracked: bool,
                     p1_tracked: bool,
                     p2_info: bool,
                     mode_size: Option<(u32, u32)>,
                     mtime: Option<TruncatedTimestamp>,
                     fallback_exec: Option<bool>,
                     fallback_symlink: Option<bool>,
                 ) -> Self {
                     if let Some((mode, size)) = mode_size {
                         // TODO: return an error for out of range values?
                         assert!(mode & !RANGE_MASK_31BIT == 0);
                         assert!(size & !RANGE_MASK_31BIT == 0);
                     }
                     let mut flags = Flags::empty();
                     flags.set(Flags::WDIR_TRACKED, wdir_tracked);
                     flags.set(Flags::P1_TRACKED, p1_tracked);
                     flags.set(Flags::P2_INFO, p2_info);
                     if let Some(exec) = fallback_exec {
                         flags.insert(Flags::HAS_FALLBACK_EXEC);
                         if exec {
                             flags.insert(Flags::FALLBACK_EXEC);
                         }
                     }
                     if let Some(exec) = fallback_symlink {
                         flags.insert(Flags::HAS_FALLBACK_SYMLINK);
                         if exec {
                             flags.insert(Flags::FALLBACK_SYMLINK);
                         }
                     }
                     Self {
                         flags,
                         mode_size,
                         mtime,
                     }
                 }
                 pub fn from_v1_data(
                     state: EntryState,
                     mode: i32,
                     size: i32,
                     mtime: i32,
                 ) -> Self {
                     match state {
                         EntryState::Normal => {
                             if size == SIZE_FROM_OTHER_PARENT {
                                 Self {
                                     // might be missing P1_TRACKED
                                     flags: Flags::WDIR_TRACKED | Flags::P2_INFO,
                                     mode_size: None,
                                     mtime: None,
                                 }
                             } else if size == SIZE_NON_NORMAL {
                                 Self {
                                     flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
                                     mode_size: None,
                                     mtime: None,
                                 }
                             } else if mtime == MTIME_UNSET {
                                 // TODO: return an error for negative values?
                                 let mode = u32::try_from(mode).unwrap();
                                 let size = u32::try_from(size).unwrap();
                                 Self {
                                     flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
                                     mode_size: Some((mode, size)),
                                     mtime: None,
                                 }
                             } else {
                                 // TODO: return an error for negative values?
                                 let mode = u32::try_from(mode).unwrap();
                                 let size = u32::try_from(size).unwrap();
                                 let mtime = u32::try_from(mtime).unwrap();
                                 let mtime =
                                     TruncatedTimestamp::from_already_truncated(mtime, 0)
                                         .unwrap();
                                 Self {
                                     flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
                                     mode_size: Some((mode, size)),
                                     mtime: Some(mtime),
                                 }
                             }
                         }
                         EntryState::Added => Self {
                             flags: Flags::WDIR_TRACKED,
                             mode_size: None,
                             mtime: None,
                         },
                         EntryState::Removed => Self {
                             flags: if size == SIZE_NON_NORMAL {
                                 Flags::P1_TRACKED | Flags::P2_INFO
                             } else if size == SIZE_FROM_OTHER_PARENT {
                                 // We don’t know if P1_TRACKED should be set (file history)
                                 Flags::P2_INFO
                             } else {
                                 Flags::P1_TRACKED
                             },
                             mode_size: None,
                             mtime: None,
                         },
                         EntryState::Merged => Self {
                             flags: Flags::WDIR_TRACKED
                                 | Flags::P1_TRACKED // might not be true because of rename ?
                                 | Flags::P2_INFO, // might not be true because of rename ?
                             mode_size: None,
                             mtime: None,
                         },
                     }
                 }
                 /// Creates a new entry in "removed" state.
                 ///
                 /// `size` is expected to be zero, `SIZE_NON_NORMAL`, or
                 /// `SIZE_FROM_OTHER_PARENT`
                 pub fn new_removed(size: i32) -> Self {
                     Self::from_v1_data(EntryState::Removed, 0, size, 0)
                 }
                 pub fn tracked(&self) -> bool {
                     self.flags.contains(Flags::WDIR_TRACKED)
                 }
                 pub fn p1_tracked(&self) -> bool {
                     self.flags.contains(Flags::P1_TRACKED)
                 }
                 fn in_either_parent(&self) -> bool {
                     self.flags.intersects(Flags::P1_TRACKED | Flags::P2_INFO)
                 }
                 pub fn removed(&self) -> bool {
                     self.in_either_parent() && !self.flags.contains(Flags::WDIR_TRACKED)
                 }
                 pub fn p2_info(&self) -> bool {
                     self.flags.contains(Flags::WDIR_TRACKED | Flags::P2_INFO)
                 }
                 pub fn added(&self) -> bool {
                     self.flags.contains(Flags::WDIR_TRACKED) && !self.in_either_parent()
                 }
                 pub fn maybe_clean(&self) -> bool {
                     if !self.flags.contains(Flags::WDIR_TRACKED) {
                         false
                     } else if !self.flags.contains(Flags::P1_TRACKED) {
                         false
                     } else if self.flags.contains(Flags::P2_INFO) {
                         false
                     } else {
                         true
                     }
                 }
                 pub fn any_tracked(&self) -> bool {
                     self.flags.intersects(
                         Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
                     )
                 }
                 /// Returns `(wdir_tracked, p1_tracked, p2_info, mode_size, mtime)`
                 pub(crate) fn v2_data(
                     &self,
                 ) -> (
                     bool,
                     bool,
                     bool,
                     Option<(u32, u32)>,
                     Option<TruncatedTimestamp>,
                     Option<bool>,
                     Option<bool>,
                 ) {
                     if !self.any_tracked() {
                         // TODO: return an Option instead?
                         panic!("Accessing v1_state of an untracked DirstateEntry")
                     }
                     let wdir_tracked = self.flags.contains(Flags::WDIR_TRACKED);
                     let p1_tracked = self.flags.contains(Flags::P1_TRACKED);
                     let p2_info = self.flags.contains(Flags::P2_INFO);
                     let mode_size = self.mode_size;
                     let mtime = self.mtime;
                     (
                         wdir_tracked,
                         p1_tracked,
                         p2_info,
                         mode_size,
                         mtime,
                         self.get_fallback_exec(),
                         self.get_fallback_symlink(),
                     )
                 }
                 fn v1_state(&self) -> EntryState {
                     if !self.any_tracked() {
                         // TODO: return an Option instead?
                         panic!("Accessing v1_state of an untracked DirstateEntry")
                     }
                     if self.removed() {
                         EntryState::Removed
                     } else if self
                         .flags
                         .contains(Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO)
                     {
                         EntryState::Merged
                     } else if self.added() {
                         EntryState::Added
                     } else {
                         EntryState::Normal
                     }
                 }
                 fn v1_mode(&self) -> i32 {
                     if let Some((mode, _size)) = self.mode_size {
                         i32::try_from(mode).unwrap()
                     } else {
                     }
                 }
                 fn v1_size(&self) -> i32 {
                     if !self.any_tracked() {
                         // TODO: return an Option instead?
                         panic!("Accessing v1_size of an untracked DirstateEntry")
                     }
                     if self.removed()
                         && self.flags.contains(Flags::P1_TRACKED | Flags::P2_INFO)
                     {
                         SIZE_NON_NORMAL
                     } else if self.flags.contains(Flags::P2_INFO) {
                         SIZE_FROM_OTHER_PARENT
                     } else if self.removed() {
                     } else if self.added() {
                         SIZE_NON_NORMAL
                     } else if let Some((_mode, size)) = self.mode_size {
                         i32::try_from(size).unwrap()
                     } else {
                         SIZE_NON_NORMAL
                     }
                 }
                 fn v1_mtime(&self) -> i32 {
                     if !self.any_tracked() {
                         // TODO: return an Option instead?
                         panic!("Accessing v1_mtime of an untracked DirstateEntry")
                     }
                     if self.removed() {
                     } else if self.flags.contains(Flags::P2_INFO) {
                         MTIME_UNSET
                     } else if !self.flags.contains(Flags::P1_TRACKED) {
                         MTIME_UNSET
                     } else if let Some(mtime) = self.mtime {
                         i32::try_from(mtime.truncated_seconds()).unwrap()
                     } else {
                         MTIME_UNSET
                     }
                 }
                 // TODO: return `Option<EntryState>`? None when `!self.any_tracked`
                 pub fn state(&self) -> EntryState {
                     self.v1_state()
                 }
                 // TODO: return Option?
                 pub fn mode(&self) -> i32 {
                     self.v1_mode()
                 }
                 // TODO: return Option?
                 pub fn size(&self) -> i32 {
                     self.v1_size()
                 }
                 // TODO: return Option?
                 pub fn mtime(&self) -> i32 {
                     self.v1_mtime()
                 }
                 pub fn get_fallback_exec(&self) -> Option<bool> {
                     if self.flags.contains(Flags::HAS_FALLBACK_EXEC) {
                         Some(self.flags.contains(Flags::FALLBACK_EXEC))
                     } else {
                         None
                     }
                 }
                 pub fn set_fallback_exec(&mut self, value: Option<bool>) {
                     match value {
                         None => {
                             self.flags.remove(Flags::HAS_FALLBACK_EXEC);
                             self.flags.remove(Flags::FALLBACK_EXEC);
                         }
                         Some(exec) => {
                             self.flags.insert(Flags::HAS_FALLBACK_EXEC);
                             if exec {
                                 self.flags.insert(Flags::FALLBACK_EXEC);
                             }
                         }
                     }
                 }
                 pub fn get_fallback_symlink(&self) -> Option<bool> {
                     if self.flags.contains(Flags::HAS_FALLBACK_SYMLINK) {
                         Some(self.flags.contains(Flags::FALLBACK_SYMLINK))
                     } else {
                         None
                     }
                 }
                 pub fn set_fallback_symlink(&mut self, value: Option<bool>) {
                     match value {
                         None => {
                             self.flags.remove(Flags::HAS_FALLBACK_SYMLINK);
                             self.flags.remove(Flags::FALLBACK_SYMLINK);
                         }
                         Some(symlink) => {
                             self.flags.insert(Flags::HAS_FALLBACK_SYMLINK);
                             if symlink {
                                 self.flags.insert(Flags::FALLBACK_SYMLINK);
                             }
                         }
                     }
                 }
                 pub fn truncated_mtime(&self) -> Option<TruncatedTimestamp> {
                     self.mtime
                 }
                 pub fn drop_merge_data(&mut self) {
                     if self.flags.contains(Flags::P2_INFO) {
                         self.flags.remove(Flags::P2_INFO);
                         self.mode_size = None;
                         self.mtime = None;
                     }
                 }
                 pub fn set_possibly_dirty(&mut self) {
                     self.mtime = None
                 }
                 pub fn set_clean(
                     &mut self,
                     mode: u32,
                     size: u32,
                     mtime: TruncatedTimestamp,
                 ) {
                     let size = size & RANGE_MASK_31BIT;
                     self.flags.insert(Flags::WDIR_TRACKED | Flags::P1_TRACKED);
                     self.mode_size = Some((mode, size));
                     self.mtime = Some(mtime);
                 }
                 pub fn set_tracked(&mut self) {
                     self.flags.insert(Flags::WDIR_TRACKED);
                     // `set_tracked` is replacing various `normallookup` call. So we mark
                     // the files as needing lookup
                     //
                     // Consider dropping this in the future in favor of something less
                     // broad.
                     self.mtime = None;
                 }
                 pub fn set_untracked(&mut self) {
                     self.flags.remove(Flags::WDIR_TRACKED);
                     self.mode_size = None;
                     self.mtime = None;
                 }
                 /// Returns `(state, mode, size, mtime)` for the puprose of serialization
                 /// in the dirstate-v1 format.
                 ///
                 /// This includes marker values such as `mtime == -1`. In the future we may
                 /// want to not represent these cases that way in memory, but serialization
                 /// will need to keep the same format.
                 pub fn v1_data(&self) -> (u8, i32, i32, i32) {
                     (
                         self.v1_state().into(),
                         self.v1_mode(),
                         self.v1_size(),
                         self.v1_mtime(),
                     )
                 }
                 pub(crate) fn is_from_other_parent(&self) -> bool {
                     self.state() == EntryState::Normal
                         && self.size() == SIZE_FROM_OTHER_PARENT
                 }
                 // TODO: other platforms
                 #[cfg(unix)]
                 pub fn mode_changed(
                     &self,
                     filesystem_metadata: &std::fs::Metadata,
                 ) -> bool {
                     use std::os::unix::fs::MetadataExt;
                     const EXEC_BIT_MASK: u32 = 0o100;
                     let dirstate_exec_bit = (self.mode() as u32) & EXEC_BIT_MASK;
                     let fs_exec_bit = filesystem_metadata.mode() & EXEC_BIT_MASK;
                     dirstate_exec_bit != fs_exec_bit
                 }
                 /// Returns a `(state, mode, size, mtime)` tuple as for
                 /// `DirstateMapMethods::debug_iter`.
                 pub fn debug_tuple(&self) -> (u8, i32, i32, i32) {
                     (self.state().into(), self.mode(), self.size(), self.mtime())
                 }
                 /// True if the stored mtime would be ambiguous with the current time
                 pub fn need_delay(&self, now: TruncatedTimestamp) -> bool {
                     if let Some(mtime) = self.mtime {
                         self.state() == EntryState::Normal
                             && mtime.truncated_seconds() == now.truncated_seconds()
                     } else {
                         false
                     }
                 }
             }
             impl EntryState {
                 pub fn is_tracked(self) -> bool {
                     use EntryState::*;
                     match self {
                         Normal | Added | Merged => true,
                         Removed => false,
                     }
                 }
             }
             impl TryFrom<u8> for EntryState {
                 type Error = HgError;
                 fn try_from(value: u8) -> Result<Self, Self::Error> {
                     match value {
                         b'n' => Ok(EntryState::Normal),
                         b'a' => Ok(EntryState::Added),
                         b'r' => Ok(EntryState::Removed),
                         b'm' => Ok(EntryState::Merged),
                         _ => Err(HgError::CorruptedRepository(format!(
                             "Incorrect dirstate entry state {}",
                             value
                         ))),
                     }
                 }
             }
             impl Into<u8> for EntryState {
                 fn into(self) -> u8 {
                     match self {
                         EntryState::Normal => b'n',
                         EntryState::Added => b'a',
                         EntryState::Removed => b'r',
                         EntryState::Merged => b'm',
                     }
                 }
             }

rust/hg-core/src/dirstate_tree/on_disk.rs

0 +1 -9

             //! The "version 2" disk representation of the dirstate
             //!
             //! See `mercurial/helptext/internals/dirstate-v2.txt`
             use crate::dirstate::TruncatedTimestamp;
             use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
             use crate::dirstate_tree::path_with_basename::WithBasename;
             use crate::errors::HgError;
             use crate::utils::hg_path::HgPath;
             use crate::DirstateEntry;
             use crate::DirstateError;
             use crate::DirstateParents;
             use bitflags::bitflags;
             use bytes_cast::unaligned::{U16Be, U32Be};
             use bytes_cast::BytesCast;
             use format_bytes::format_bytes;
             use std::borrow::Cow;
             use std::convert::{TryFrom, TryInto};
             /// Added at the start of `.hg/dirstate` when the "v2" format is used.
             /// This a redundant sanity check more than an actual "magic number" since
             /// `.hg/requires` already governs which format should be used.
             pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
             /// Keep space for 256-bit hashes
             const STORED_NODE_ID_BYTES: usize = 32;
             /// … even though only 160 bits are used for now, with SHA-1
             const USED_NODE_ID_BYTES: usize = 20;
             pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
             pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
             /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
             const TREE_METADATA_SIZE: usize = 44;
             const NODE_SIZE: usize = 44;
             /// Make sure that size-affecting changes are made knowingly
             #[allow(unused)]
             fn static_assert_size_of() {
                 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
                 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
                 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
             }
             // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
             #[derive(BytesCast)]
             #[repr(C)]
             struct DocketHeader {
                 marker: [u8; V2_FORMAT_MARKER.len()],
                 parent_1: [u8; STORED_NODE_ID_BYTES],
                 parent_2: [u8; STORED_NODE_ID_BYTES],
                 metadata: TreeMetadata,
                 /// Counted in bytes
                 data_size: Size,
                 uuid_size: u8,
             }
             pub struct Docket<'on_disk> {
                 header: &'on_disk DocketHeader,
                 uuid: &'on_disk [u8],
             }
             /// Fields are documented in the *Tree metadata in the docket file*
             /// section of `mercurial/helptext/internals/dirstate-v2.txt`
             #[derive(BytesCast)]
             #[repr(C)]
             struct TreeMetadata {
                 root_nodes: ChildNodes,
                 nodes_with_entry_count: Size,
                 nodes_with_copy_source_count: Size,
                 unreachable_bytes: Size,
                 unused: [u8; 4],
                 /// See *Optional hash of ignore patterns* section of
                 /// `mercurial/helptext/internals/dirstate-v2.txt`
                 ignore_patterns_hash: IgnorePatternsHash,
             }
             /// Fields are documented in the *The data file format*
             /// section of `mercurial/helptext/internals/dirstate-v2.txt`
             #[derive(BytesCast)]
             #[repr(C)]
             pub(super) struct Node {
                 full_path: PathSlice,
                 /// In bytes from `self.full_path.start`
                 base_name_start: PathSize,
                 copy_source: OptPathSlice,
                 children: ChildNodes,
                 pub(super) descendants_with_entry_count: Size,
                 pub(super) tracked_descendants_count: Size,
                 flags: U16Be,
                 size: U32Be,
                 mtime: PackedTruncatedTimestamp,
             }
             bitflags! {
                 #[repr(C)]
                 struct Flags: u16 {
                     const WDIR_TRACKED = 1 << 0;
                     const P1_TRACKED = 1 << 1;
                     const P2_INFO = 1 << 2;
                     const HAS_MODE_AND_SIZE = 1 << 3;
                     const HAS_FILE_MTIME = 1 << 4;
                     const HAS_DIRECTORY_MTIME = 1 << 5;
                     const MODE_EXEC_PERM = 1 << 6;
                     const MODE_IS_SYMLINK = 1 << 7;
                     const EXPECTED_STATE_IS_MODIFIED = 1 << 8;
                     const ALL_UNKNOWN_RECORDED = 1 << 9;
                     const ALL_IGNORED_RECORDED = 1 << 10;
                     const HAS_FALLBACK_EXEC = 1 << 11;
                     const FALLBACK_EXEC = 1 << 12;
                     const HAS_FALLBACK_SYMLINK = 1 << 13;
                     const FALLBACK_SYMLINK = 1 << 14;
                     const MTIME_SECOND_AMBIGUOUS = 1 << 15;
                 }
             }
             /// Duration since the Unix epoch
             #[derive(BytesCast, Copy, Clone)]
             #[repr(C)]
             struct PackedTruncatedTimestamp {
                 truncated_seconds: U32Be,
                 nanoseconds: U32Be,
             }
             /// Counted in bytes from the start of the file
             ///
             /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
             type Offset = U32Be;
             /// Counted in number of items
             ///
             /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
             type Size = U32Be;
             /// Counted in bytes
             ///
             /// NOTE: we choose not to support file names/paths longer than 64 KiB.
             type PathSize = U16Be;
             /// A contiguous sequence of `len` times `Node`, representing the child nodes
             /// of either some other node or of the repository root.
             ///
             /// Always sorted by ascending `full_path`, to allow binary search.
             /// Since nodes with the same parent nodes also have the same parent path,
             /// only the `base_name`s need to be compared during binary search.
             #[derive(BytesCast, Copy, Clone)]
             #[repr(C)]
             struct ChildNodes {
                 start: Offset,
                 len: Size,
             }
             /// A `HgPath` of `len` bytes
             #[derive(BytesCast, Copy, Clone)]
             #[repr(C)]
             struct PathSlice {
                 start: Offset,
                 len: PathSize,
             }
             /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
             type OptPathSlice = PathSlice;
             /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
             ///
             /// This should only happen if Mercurial is buggy or a repository is corrupted.
             #[derive(Debug)]
             pub struct DirstateV2ParseError;
             impl From<DirstateV2ParseError> for HgError {
                 fn from(_: DirstateV2ParseError) -> Self {
                     HgError::corrupted("dirstate-v2 parse error")
                 }
             }
             impl From<DirstateV2ParseError> for crate::DirstateError {
                 fn from(error: DirstateV2ParseError) -> Self {
                     HgError::from(error).into()
                 }
             }
             impl<'on_disk> Docket<'on_disk> {
                 pub fn parents(&self) -> DirstateParents {
                     use crate::Node;
                     let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
                         .unwrap()
                         .clone();
                     let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
                         .unwrap()
                         .clone();
                     DirstateParents { p1, p2 }
                 }
                 pub fn tree_metadata(&self) -> &[u8] {
                     self.header.metadata.as_bytes()
                 }
                 pub fn data_size(&self) -> usize {
                     // This `unwrap` could only panic on a 16-bit CPU
                     self.header.data_size.get().try_into().unwrap()
                 }
                 pub fn data_filename(&self) -> String {
                     String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
                 }
             }
             pub fn read_docket(
                 on_disk: &[u8],
             ) -> Result<Docket<'_>, DirstateV2ParseError> {
                 let (header, uuid) =
                     DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
                 let uuid_size = header.uuid_size as usize;
                 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
                     Ok(Docket { header, uuid })
                 } else {
                     Err(DirstateV2ParseError)
                 }
             }
             pub(super) fn read<'on_disk>(
                 on_disk: &'on_disk [u8],
                 metadata: &[u8],
             ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
                 if on_disk.is_empty() {
                     return Ok(DirstateMap::empty(on_disk));
                 }
                 let (meta, _) = TreeMetadata::from_bytes(metadata)
                     .map_err(|_| DirstateV2ParseError)?;
                 let dirstate_map = DirstateMap {
                     on_disk,
                     root: dirstate_map::ChildNodes::OnDisk(read_nodes(
                         on_disk,
                         meta.root_nodes,
                     )?),
                     nodes_with_entry_count: meta.nodes_with_entry_count.get(),
                     nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
                     ignore_patterns_hash: meta.ignore_patterns_hash,
                     unreachable_bytes: meta.unreachable_bytes.get(),
                 };
                 Ok(dirstate_map)
             }
             impl Node {
                 pub(super) fn full_path<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
                     read_hg_path(on_disk, self.full_path)
                 }
                 pub(super) fn base_name_start<'on_disk>(
                     &self,
                 ) -> Result<usize, DirstateV2ParseError> {
                     let start = self.base_name_start.get();
                     if start < self.full_path.len.get() {
                         let start = usize::try_from(start)
                             // u32 -> usize, could only panic on a 16-bit CPU
                             .expect("dirstate-v2 base_name_start out of bounds");
                         Ok(start)
                     } else {
                         Err(DirstateV2ParseError)
                     }
                 }
                 pub(super) fn base_name<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
                     let full_path = self.full_path(on_disk)?;
                     let base_name_start = self.base_name_start()?;
                     Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
                 }
                 pub(super) fn path<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
                     Ok(WithBasename::from_raw_parts(
                         Cow::Borrowed(self.full_path(on_disk)?),
                         self.base_name_start()?,
                     ))
                 }
                 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
                     self.copy_source.start.get() != 0
                 }
                 pub(super) fn copy_source<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
                     Ok(if self.has_copy_source() {
                         Some(read_hg_path(on_disk, self.copy_source)?)
                     } else {
                         None
                     })
                 }
                 fn flags(&self) -> Flags {
                     Flags::from_bits_truncate(self.flags.get())
                 }
                 fn has_entry(&self) -> bool {
                     self.flags().intersects(
                         Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
                     )
                 }
                 pub(super) fn node_data(
                     &self,
                 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
                     if self.has_entry() {
                         Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
                     } else if let Some(mtime) = self.cached_directory_mtime()? {
                         Ok(dirstate_map::NodeData::CachedDirectory { mtime })
                     } else {
                         Ok(dirstate_map::NodeData::None)
                     }
                 }
                 pub(super) fn cached_directory_mtime(
                     &self,
                 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
                     // For now we do not have code to handle ALL_UNKNOWN_RECORDED, so we
                     // ignore the mtime if the flag is set.
                     if self.flags().contains(Flags::HAS_DIRECTORY_MTIME)
                         && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
                     {
                         if self.flags().contains(Flags::HAS_FILE_MTIME) {
                             Err(DirstateV2ParseError)
                         } else {
                             Ok(Some(self.mtime.try_into()?))
                         }
                     } else {
                         Ok(None)
                     }
                 }
                 fn synthesize_unix_mode(&self) -> u32 {
                     let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
                         libc::S_IFLNK
                     } else {
                         libc::S_IFREG
                     };
                     let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
 o755
                     } else {
 o644
                     };
                     file_type | permisions
                 }
                 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
                     // TODO: convert through raw bits instead?
                     let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED);
                     let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
                     let p2_info = self.flags().contains(Flags::P2_INFO);
                     let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
                         && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
                     {
                         Some((self.synthesize_unix_mode(), self.size.into()))
                     } else {
                         None
                     };
                     let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME)
                         && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
                         // The current code is not able to do the more subtle comparison that the
                         // MTIME_SECOND_AMBIGUOUS requires. So we ignore the mtime
                         && !self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS)
                     {
-                        // TODO: replace this by `self.mtime.try_into()?` to use
+                        Some(self.mtime.try_into()?)
-                        // sub-second precision from the file.
-                        // We don’t do this yet because other parts of the code
-                        // always set it to zero.
-                        let mtime = TruncatedTimestamp::from_already_truncated(
-                            self.mtime.truncated_seconds.get(),
-,
-                        )?;
-                        Some(mtime)
                     } else {
                         None
                     };
                     Ok(DirstateEntry::from_v2_data(
                         wdir_tracked,
                         p1_tracked,
                         p2_info,
                         mode_size,
                         mtime,
                         None,
                         None,
                     ))
                 }
                 pub(super) fn entry(
                     &self,
                 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
                     if self.has_entry() {
                         Ok(Some(self.assume_entry()?))
                     } else {
                         Ok(None)
                     }
                 }
                 pub(super) fn children<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
                     read_nodes(on_disk, self.children)
                 }
                 pub(super) fn to_in_memory_node<'on_disk>(
                     &self,
                     on_disk: &'on_disk [u8],
                 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
                     Ok(dirstate_map::Node {
                         children: dirstate_map::ChildNodes::OnDisk(
                             self.children(on_disk)?,
                         ),
                         copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
                         data: self.node_data()?,
                         descendants_with_entry_count: self
                             .descendants_with_entry_count
                             .get(),
                         tracked_descendants_count: self.tracked_descendants_count.get(),
                     })
                 }
                 fn from_dirstate_entry(
                     entry: &DirstateEntry,
                 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
                     let (
                         wdir_tracked,
                         p1_tracked,
                         p2_info,
                         mode_size_opt,
                         mtime_opt,
                         fallback_exec,
                         fallback_symlink,
                     ) = entry.v2_data();
                     // TODO: convert throug raw flag bits instead?
                     let mut flags = Flags::empty();
                     flags.set(Flags::WDIR_TRACKED, wdir_tracked);
                     flags.set(Flags::P1_TRACKED, p1_tracked);
                     flags.set(Flags::P2_INFO, p2_info);
                     let size = if let Some((m, s)) = mode_size_opt {
                         let exec_perm = m & libc::S_IXUSR != 0;
                         let is_symlink = m & libc::S_IFMT == libc::S_IFLNK;
                         flags.set(Flags::MODE_EXEC_PERM, exec_perm);
                         flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
                         flags.insert(Flags::HAS_MODE_AND_SIZE);
                         s.into()
                     } else {
 .into()
                     };
                     let mtime = if let Some(m) = mtime_opt {
                         flags.insert(Flags::HAS_FILE_MTIME);
                         m.into()
                     } else {
                         PackedTruncatedTimestamp::null()
                     };
                     if let Some(f_exec) = fallback_exec {
                         flags.insert(Flags::HAS_FALLBACK_EXEC);
                         if f_exec {
                             flags.insert(Flags::FALLBACK_EXEC);
                         }
                     }
                     if let Some(f_symlink) = fallback_symlink {
                         flags.insert(Flags::HAS_FALLBACK_SYMLINK);
                         if f_symlink {
                             flags.insert(Flags::FALLBACK_SYMLINK);
                         }
                     }
                     (flags, size, mtime)
                 }
             }
             fn read_hg_path(
                 on_disk: &[u8],
                 slice: PathSlice,
             ) -> Result<&HgPath, DirstateV2ParseError> {
                 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
             }
             fn read_nodes(
                 on_disk: &[u8],
                 slice: ChildNodes,
             ) -> Result<&[Node], DirstateV2ParseError> {
                 read_slice(on_disk, slice.start, slice.len.get())
             }
             fn read_slice<T, Len>(
                 on_disk: &[u8],
                 start: Offset,
                 len: Len,
             ) -> Result<&[T], DirstateV2ParseError>
             where
                 T: BytesCast,
                 Len: TryInto<usize>,
             {
                 // Either `usize::MAX` would result in "out of bounds" error since a single
                 // `&[u8]` cannot occupy the entire addess space.
                 let start = start.get().try_into().unwrap_or(std::usize::MAX);
                 let len = len.try_into().unwrap_or(std::usize::MAX);
                 on_disk
                     .get(start..)
                     .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
                     .map(|(slice, _rest)| slice)
                     .ok_or_else(|| DirstateV2ParseError)
             }
             pub(crate) fn for_each_tracked_path<'on_disk>(
                 on_disk: &'on_disk [u8],
                 metadata: &[u8],
                 mut f: impl FnMut(&'on_disk HgPath),
             ) -> Result<(), DirstateV2ParseError> {
                 let (meta, _) = TreeMetadata::from_bytes(metadata)
                     .map_err(|_| DirstateV2ParseError)?;
                 fn recur<'on_disk>(
                     on_disk: &'on_disk [u8],
                     nodes: ChildNodes,
                     f: &mut impl FnMut(&'on_disk HgPath),
                 ) -> Result<(), DirstateV2ParseError> {
                     for node in read_nodes(on_disk, nodes)? {
                         if let Some(entry) = node.entry()? {
                             if entry.state().is_tracked() {
                                 f(node.full_path(on_disk)?)
                             }
                         }
                         recur(on_disk, node.children, f)?
                     }
                     Ok(())
                 }
                 recur(on_disk, meta.root_nodes, &mut f)
             }
             /// Returns new data and metadata, together with whether that data should be
             /// appended to the existing data file whose content is at
             /// `dirstate_map.on_disk` (true), instead of written to a new data file
             /// (false).
             pub(super) fn write(
                 dirstate_map: &mut DirstateMap,
                 can_append: bool,
             ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
                 let append = can_append && dirstate_map.write_should_append();
                 // This ignores the space for paths, and for nodes without an entry.
                 // TODO: better estimate? Skip the `Vec` and write to a file directly?
                 let size_guess = std::mem::size_of::<Node>()
                     * dirstate_map.nodes_with_entry_count as usize;
                 let mut writer = Writer {
                     dirstate_map,
                     append,
                     out: Vec::with_capacity(size_guess),
                 };
                 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
                 let meta = TreeMetadata {
                     root_nodes,
                     nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
                     nodes_with_copy_source_count: dirstate_map
                         .nodes_with_copy_source_count
                         .into(),
                     unreachable_bytes: dirstate_map.unreachable_bytes.into(),
                     unused: [0; 4],
                     ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
                 };
                 Ok((writer.out, meta.as_bytes().to_vec(), append))
             }
             struct Writer<'dmap, 'on_disk> {
                 dirstate_map: &'dmap DirstateMap<'on_disk>,
                 append: bool,
                 out: Vec<u8>,
             }
             impl Writer<'_, '_> {
                 fn write_nodes(
                     &mut self,
                     nodes: dirstate_map::ChildNodesRef,
                 ) -> Result<ChildNodes, DirstateError> {
                     // Reuse already-written nodes if possible
                     if self.append {
                         if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
                             let start = self.on_disk_offset_of(nodes_slice).expect(
                                 "dirstate-v2 OnDisk nodes not found within on_disk",
                             );
                             let len = child_nodes_len_from_usize(nodes_slice.len());
                             return Ok(ChildNodes { start, len });
                         }
                     }
                     // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
                     // undefined iteration order. Sort to enable binary search in the
                     // written file.
                     let nodes = nodes.sorted();
                     let nodes_len = nodes.len();
                     // First accumulate serialized nodes in a `Vec`
                     let mut on_disk_nodes = Vec::with_capacity(nodes_len);
                     for node in nodes {
                         let children =
                             self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
                         let full_path = node.full_path(self.dirstate_map.on_disk)?;
                         let full_path = self.write_path(full_path.as_bytes());
                         let copy_source = if let Some(source) =
                             node.copy_source(self.dirstate_map.on_disk)?
                         {
                             self.write_path(source.as_bytes())
                         } else {
                             PathSlice {
                                 start: 0.into(),
                                 len: 0.into(),
                             }
                         };
                         on_disk_nodes.push(match node {
                             NodeRef::InMemory(path, node) => {
                                 let (flags, size, mtime) = match &node.data {
                                     dirstate_map::NodeData::Entry(entry) => {
                                         Node::from_dirstate_entry(entry)
                                     }
                                     dirstate_map::NodeData::CachedDirectory { mtime } => (
                                         // we currently never set a mtime if unknown file
                                         // are present.
                                         // So if we have a mtime for a directory, we know
                                         // they are no unknown
                                         // files and we
                                         // blindly set ALL_UNKNOWN_RECORDED.
                                         //
                                         // We never set ALL_IGNORED_RECORDED since we
                                         // don't track that case
                                         // currently.
                                         Flags::HAS_DIRECTORY_MTIME
                                             | Flags::ALL_UNKNOWN_RECORDED,
 .into(),
                                         (*mtime).into(),
                                     ),
                                     dirstate_map::NodeData::None => (
                                         Flags::empty(),
 .into(),
                                         PackedTruncatedTimestamp::null(),
                                     ),
                                 };
                                 Node {
                                     children,
                                     copy_source,
                                     full_path,
                                     base_name_start: u16::try_from(path.base_name_start())
                                         // Could only panic for paths over 64 KiB
                                         .expect("dirstate-v2 path length overflow")
                                         .into(),
                                     descendants_with_entry_count: node
                                         .descendants_with_entry_count
                                         .into(),
                                     tracked_descendants_count: node
                                         .tracked_descendants_count
                                         .into(),
                                     flags: flags.bits().into(),
                                     size,
                                     mtime,
                                 }
                             }
                             NodeRef::OnDisk(node) => Node {
                                 children,
                                 copy_source,
                                 full_path,
                                 ..*node
                             },
                         })
                     }
                     // … so we can write them contiguously, after writing everything else
                     // they refer to.
                     let start = self.current_offset();
                     let len = child_nodes_len_from_usize(nodes_len);
                     self.out.extend(on_disk_nodes.as_bytes());
                     Ok(ChildNodes { start, len })
                 }
                 /// If the given slice of items is within `on_disk`, returns its offset
                 /// from the start of `on_disk`.
                 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
                 where
                     T: BytesCast,
                 {
                     fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
                         let start = slice.as_ptr() as usize;
                         let end = start + slice.len();
                         start..=end
                     }
                     let slice_addresses = address_range(slice.as_bytes());
                     let on_disk_addresses = address_range(self.dirstate_map.on_disk);
                     if on_disk_addresses.contains(slice_addresses.start())
                         && on_disk_addresses.contains(slice_addresses.end())
                     {
                         let offset = slice_addresses.start() - on_disk_addresses.start();
                         Some(offset_from_usize(offset))
                     } else {
                         None
                     }
                 }
                 fn current_offset(&mut self) -> Offset {
                     let mut offset = self.out.len();
                     if self.append {
                         offset += self.dirstate_map.on_disk.len()
                     }
                     offset_from_usize(offset)
                 }
                 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
                     let len = path_len_from_usize(slice.len());
                     // Reuse an already-written path if possible
                     if self.append {
                         if let Some(start) = self.on_disk_offset_of(slice) {
                             return PathSlice { start, len };
                         }
                     }
                     let start = self.current_offset();
                     self.out.extend(slice.as_bytes());
                     PathSlice { start, len }
                 }
             }
             fn offset_from_usize(x: usize) -> Offset {
                 u32::try_from(x)
                     // Could only panic for a dirstate file larger than 4 GiB
                     .expect("dirstate-v2 offset overflow")
                     .into()
             }
             fn child_nodes_len_from_usize(x: usize) -> Size {
                 u32::try_from(x)
                     // Could only panic with over 4 billion nodes
                     .expect("dirstate-v2 slice length overflow")
                     .into()
             }
             fn path_len_from_usize(x: usize) -> PathSize {
                 u16::try_from(x)
                     // Could only panic for paths over 64 KiB
                     .expect("dirstate-v2 path length overflow")
                     .into()
             }
             impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
                 fn from(timestamp: TruncatedTimestamp) -> Self {
                     Self {
                         truncated_seconds: timestamp.truncated_seconds().into(),
                         nanoseconds: timestamp.nanoseconds().into(),
                     }
                 }
             }
             impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
                 type Error = DirstateV2ParseError;
                 fn try_from(
                     timestamp: PackedTruncatedTimestamp,
                 ) -> Result<Self, Self::Error> {
                     Self::from_already_truncated(
                         timestamp.truncated_seconds.get(),
                         timestamp.nanoseconds.get(),
                     )
                 }
             }
             impl PackedTruncatedTimestamp {
                 fn null() -> Self {
                     Self {
                         truncated_seconds: 0.into(),
                         nanoseconds: 0.into(),
                     }
                 }
             }

rust/hg-core/src/dirstate_tree/status.rs

0 +1 -3

             use crate::dirstate::entry::TruncatedTimestamp;
             use crate::dirstate::status::IgnoreFnType;
             use crate::dirstate_tree::dirstate_map::BorrowedPath;
             use crate::dirstate_tree::dirstate_map::ChildNodesRef;
             use crate::dirstate_tree::dirstate_map::DirstateMap;
             use crate::dirstate_tree::dirstate_map::NodeData;
             use crate::dirstate_tree::dirstate_map::NodeRef;
             use crate::dirstate_tree::on_disk::DirstateV2ParseError;
             use crate::matchers::get_ignore_function;
             use crate::matchers::Matcher;
             use crate::utils::files::get_bytes_from_os_string;
             use crate::utils::files::get_path_from_bytes;
             use crate::utils::hg_path::HgPath;
             use crate::BadMatch;
             use crate::DirstateStatus;
             use crate::EntryState;
             use crate::HgPathBuf;
             use crate::PatternFileWarning;
             use crate::StatusError;
             use crate::StatusOptions;
             use micro_timer::timed;
             use rayon::prelude::*;
             use sha1::{Digest, Sha1};
             use std::borrow::Cow;
             use std::io;
             use std::path::Path;
             use std::path::PathBuf;
             use std::sync::Mutex;
             use std::time::SystemTime;
             /// Returns the status of the working directory compared to its parent
             /// changeset.
             ///
             /// This algorithm is based on traversing the filesystem tree (`fs` in function
             /// and variable names) and dirstate tree at the same time. The core of this
             /// traversal is the recursive `traverse_fs_directory_and_dirstate` function
             /// and its use of `itertools::merge_join_by`. When reaching a path that only
             /// exists in one of the two trees, depending on information requested by
             /// `options` we may need to traverse the remaining subtree.
             #[timed]
             pub fn status<'tree, 'on_disk: 'tree>(
                 dmap: &'tree mut DirstateMap<'on_disk>,
                 matcher: &(dyn Matcher + Sync),
                 root_dir: PathBuf,
                 ignore_files: Vec<PathBuf>,
                 options: StatusOptions,
             ) -> Result<(DirstateStatus<'on_disk>, Vec<PatternFileWarning>), StatusError> {
                 let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) =
                     if options.list_ignored || options.list_unknown {
                         let mut hasher = Sha1::new();
                         let (ignore_fn, warnings) = get_ignore_function(
                             ignore_files,
                             &root_dir,
                             &mut |pattern_bytes| hasher.update(pattern_bytes),
                         )?;
                         let new_hash = *hasher.finalize().as_ref();
                         let changed = new_hash != dmap.ignore_patterns_hash;
                         dmap.ignore_patterns_hash = new_hash;
                         (ignore_fn, warnings, Some(changed))
                     } else {
                         (Box::new(|&_| true), vec![], None)
                     };
                 let common = StatusCommon {
                     dmap,
                     options,
                     matcher,
                     ignore_fn,
                     outcome: Default::default(),
                     ignore_patterns_have_changed: patterns_changed,
                     new_cachable_directories: Default::default(),
                     outated_cached_directories: Default::default(),
                     filesystem_time_at_status_start: filesystem_now(&root_dir).ok(),
                 };
                 let is_at_repo_root = true;
                 let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
                 let has_ignored_ancestor = false;
                 let root_cached_mtime = None;
                 let root_dir_metadata = None;
                 // If the path we have for the repository root is a symlink, do follow it.
                 // (As opposed to symlinks within the working directory which are not
                 // followed, using `std::fs::symlink_metadata`.)
                 common.traverse_fs_directory_and_dirstate(
                     has_ignored_ancestor,
                     dmap.root.as_ref(),
                     hg_path,
                     &root_dir,
                     root_dir_metadata,
                     root_cached_mtime,
                     is_at_repo_root,
                 )?;
                 let mut outcome = common.outcome.into_inner().unwrap();
                 let new_cachable = common.new_cachable_directories.into_inner().unwrap();
                 let outdated = common.outated_cached_directories.into_inner().unwrap();
                 outcome.dirty = common.ignore_patterns_have_changed == Some(true)
                     || !outdated.is_empty()
                     || !new_cachable.is_empty();
                 // Remove outdated mtimes before adding new mtimes, in case a given
                 // directory is both
                 for path in &outdated {
                     let node = dmap.get_or_insert(path)?;
                     if let NodeData::CachedDirectory { .. } = &node.data {
                         node.data = NodeData::None
                     }
                 }
                 for (path, mtime) in &new_cachable {
                     let node = dmap.get_or_insert(path)?;
                     match &node.data {
                         NodeData::Entry(_) => {} // Don’t overwrite an entry
                         NodeData::CachedDirectory { .. } | NodeData::None => {
                             node.data = NodeData::CachedDirectory { mtime: *mtime }
                         }
                     }
                 }
                 Ok((outcome, warnings))
             }
             /// Bag of random things needed by various parts of the algorithm. Reduces the
             /// number of parameters passed to functions.
             struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
                 dmap: &'tree DirstateMap<'on_disk>,
                 options: StatusOptions,
                 matcher: &'a (dyn Matcher + Sync),
                 ignore_fn: IgnoreFnType<'a>,
                 outcome: Mutex<DirstateStatus<'on_disk>>,
                 new_cachable_directories:
                     Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
                 outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
                 /// Whether ignore files like `.hgignore` have changed since the previous
                 /// time a `status()` call wrote their hash to the dirstate. `None` means
                 /// we don’t know as this run doesn’t list either ignored or uknown files
                 /// and therefore isn’t reading `.hgignore`.
                 ignore_patterns_have_changed: Option<bool>,
                 /// The current time at the start of the `status()` algorithm, as measured
                 /// and possibly truncated by the filesystem.
                 filesystem_time_at_status_start: Option<SystemTime>,
             }
             impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
                 fn read_dir(
                     &self,
                     hg_path: &HgPath,
                     fs_path: &Path,
                     is_at_repo_root: bool,
                 ) -> Result<Vec<DirEntry>, ()> {
                     DirEntry::read_dir(fs_path, is_at_repo_root)
                         .map_err(|error| self.io_error(error, hg_path))
                 }
                 fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
                     let errno = error.raw_os_error().expect("expected real OS error");
                     self.outcome
                         .lock()
                         .unwrap()
                         .bad
                         .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
                 }
                 fn check_for_outdated_directory_cache(
                     &self,
                     dirstate_node: &NodeRef<'tree, 'on_disk>,
                 ) -> Result<(), DirstateV2ParseError> {
                     if self.ignore_patterns_have_changed == Some(true)
                         && dirstate_node.cached_directory_mtime()?.is_some()
                     {
                         self.outated_cached_directories.lock().unwrap().push(
                             dirstate_node
                                 .full_path_borrowed(self.dmap.on_disk)?
                                 .detach_from_tree(),
                         )
                     }
                     Ok(())
                 }
                 /// If this returns true, we can get accurate results by only using
                 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
                 /// need to call `read_dir`.
                 fn can_skip_fs_readdir(
                     &self,
                     directory_metadata: Option<&std::fs::Metadata>,
                     cached_directory_mtime: Option<TruncatedTimestamp>,
                 ) -> bool {
                     if !self.options.list_unknown && !self.options.list_ignored {
                         // All states that we care about listing have corresponding
                         // dirstate entries.
                         // This happens for example with `hg status -mard`.
                         return true;
                     }
                     if !self.options.list_ignored
                         && self.ignore_patterns_have_changed == Some(false)
                     {
                         if let Some(cached_mtime) = cached_directory_mtime {
                             // The dirstate contains a cached mtime for this directory, set
                             // by a previous run of the `status` algorithm which found this
                             // directory eligible for `read_dir` caching.
                             if let Some(meta) = directory_metadata {
                                 if cached_mtime
                                     .likely_equal_to_mtime_of(meta)
                                     .unwrap_or(false)
                                 {
                                     // The mtime of that directory has not changed
                                     // since then, which means that the results of
                                     // `read_dir` should also be unchanged.
                                     return true;
                                 }
                             }
                         }
                     }
                     false
                 }
                 /// Returns whether all child entries of the filesystem directory have a
                 /// corresponding dirstate node or are ignored.
                 fn traverse_fs_directory_and_dirstate(
                     &self,
                     has_ignored_ancestor: bool,
                     dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
                     directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
                     directory_fs_path: &Path,
                     directory_metadata: Option<&std::fs::Metadata>,
                     cached_directory_mtime: Option<TruncatedTimestamp>,
                     is_at_repo_root: bool,
                 ) -> Result<bool, DirstateV2ParseError> {
                     if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
                     {
                         dirstate_nodes
                             .par_iter()
                             .map(|dirstate_node| {
                                 let fs_path = directory_fs_path.join(get_path_from_bytes(
                                     dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
                                 ));
                                 match std::fs::symlink_metadata(&fs_path) {
                                     Ok(fs_metadata) => self.traverse_fs_and_dirstate(
                                         &fs_path,
                                         &fs_metadata,
                                         dirstate_node,
                                         has_ignored_ancestor,
                                     ),
                                     Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
                                         self.traverse_dirstate_only(dirstate_node)
                                     }
                                     Err(error) => {
                                         let hg_path =
                                             dirstate_node.full_path(self.dmap.on_disk)?;
                                         Ok(self.io_error(error, hg_path))
                                     }
                                 }
                             })
                             .collect::<Result<_, _>>()?;
                         // We don’t know, so conservatively say this isn’t the case
                         let children_all_have_dirstate_node_or_are_ignored = false;
                         return Ok(children_all_have_dirstate_node_or_are_ignored);
                     }
                     let mut fs_entries = if let Ok(entries) = self.read_dir(
                         directory_hg_path,
                         directory_fs_path,
                         is_at_repo_root,
                     ) {
                         entries
                     } else {
                         // Treat an unreadable directory (typically because of insufficient
                         // permissions) like an empty directory. `self.read_dir` has
                         // already called `self.io_error` so a warning will be emitted.
                         Vec::new()
                     };
                     // `merge_join_by` requires both its input iterators to be sorted:
                     let dirstate_nodes = dirstate_nodes.sorted();
                     // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
                     // https://github.com/rust-lang/rust/issues/34162
                     fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
                     // Propagate here any error that would happen inside the comparison
                     // callback below
                     for dirstate_node in &dirstate_nodes {
                         dirstate_node.base_name(self.dmap.on_disk)?;
                     }
                     itertools::merge_join_by(
                         dirstate_nodes,
                         &fs_entries,
                         |dirstate_node, fs_entry| {
                             // This `unwrap` never panics because we already propagated
                             // those errors above
                             dirstate_node
                                 .base_name(self.dmap.on_disk)
                                 .unwrap()
                                 .cmp(&fs_entry.base_name)
                         },
                     )
                     .par_bridge()
                     .map(|pair| {
                         use itertools::EitherOrBoth::*;
                         let has_dirstate_node_or_is_ignored;
                         match pair {
                             Both(dirstate_node, fs_entry) => {
                                 self.traverse_fs_and_dirstate(
                                     &fs_entry.full_path,
                                     &fs_entry.metadata,
                                     dirstate_node,
                                     has_ignored_ancestor,
                                 )?;
                                 has_dirstate_node_or_is_ignored = true
                             }
                             Left(dirstate_node) => {
                                 self.traverse_dirstate_only(dirstate_node)?;
                                 has_dirstate_node_or_is_ignored = true;
                             }
                             Right(fs_entry) => {
                                 has_dirstate_node_or_is_ignored = self.traverse_fs_only(
                                     has_ignored_ancestor,
                                     directory_hg_path,
                                     fs_entry,
                                 )
                             }
                         }
                         Ok(has_dirstate_node_or_is_ignored)
                     })
                     .try_reduce(|| true, |a, b| Ok(a && b))
                 }
                 fn traverse_fs_and_dirstate(
                     &self,
                     fs_path: &Path,
                     fs_metadata: &std::fs::Metadata,
                     dirstate_node: NodeRef<'tree, 'on_disk>,
                     has_ignored_ancestor: bool,
                 ) -> Result<(), DirstateV2ParseError> {
                     self.check_for_outdated_directory_cache(&dirstate_node)?;
                     let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
                     let file_type = fs_metadata.file_type();
                     let file_or_symlink = file_type.is_file() || file_type.is_symlink();
                     if !file_or_symlink {
                         // If we previously had a file here, it was removed (with
                         // `hg rm` or similar) or deleted before it could be
                         // replaced by a directory or something else.
                         self.mark_removed_or_deleted_if_file(
                             &hg_path,
                             dirstate_node.state()?,
                         );
                     }
                     if file_type.is_dir() {
                         if self.options.collect_traversed_dirs {
                             self.outcome
                                 .lock()
                                 .unwrap()
                                 .traversed
                                 .push(hg_path.detach_from_tree())
                         }
                         let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
                         let is_at_repo_root = false;
                         let children_all_have_dirstate_node_or_are_ignored = self
                             .traverse_fs_directory_and_dirstate(
                                 is_ignored,
                                 dirstate_node.children(self.dmap.on_disk)?,
                                 hg_path,
                                 fs_path,
                                 Some(fs_metadata),
                                 dirstate_node.cached_directory_mtime()?,
                                 is_at_repo_root,
                             )?;
                         self.maybe_save_directory_mtime(
                             children_all_have_dirstate_node_or_are_ignored,
                             fs_metadata,
                             dirstate_node,
                         )?
                     } else {
                         if file_or_symlink && self.matcher.matches(hg_path) {
                             if let Some(state) = dirstate_node.state()? {
                                 match state {
                                     EntryState::Added => self
                                         .outcome
                                         .lock()
                                         .unwrap()
                                         .added
                                         .push(hg_path.detach_from_tree()),
                                     EntryState::Removed => self
                                         .outcome
                                         .lock()
                                         .unwrap()
                                         .removed
                                         .push(hg_path.detach_from_tree()),
                                     EntryState::Merged => self
                                         .outcome
                                         .lock()
                                         .unwrap()
                                         .modified
                                         .push(hg_path.detach_from_tree()),
                                     EntryState::Normal => self
                                         .handle_normal_file(&dirstate_node, fs_metadata)?,
                                 }
                             } else {
                                 // `node.entry.is_none()` indicates a "directory"
                                 // node, but the filesystem has a file
                                 self.mark_unknown_or_ignored(
                                     has_ignored_ancestor,
                                     hg_path,
                                 );
                             }
                         }
                         for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
                         {
                             self.traverse_dirstate_only(child_node)?
                         }
                     }
                     Ok(())
                 }
                 fn maybe_save_directory_mtime(
                     &self,
                     children_all_have_dirstate_node_or_are_ignored: bool,
                     directory_metadata: &std::fs::Metadata,
                     dirstate_node: NodeRef<'tree, 'on_disk>,
                 ) -> Result<(), DirstateV2ParseError> {
                     if children_all_have_dirstate_node_or_are_ignored {
                         // All filesystem directory entries from `read_dir` have a
                         // corresponding node in the dirstate, so we can reconstitute the
                         // names of those entries without calling `read_dir` again.
                         if let (Some(status_start), Ok(directory_mtime)) = (
                             &self.filesystem_time_at_status_start,
                             directory_metadata.modified(),
                         ) {
                             // Although the Rust standard library’s `SystemTime` type
                             // has nanosecond precision, the times reported for a
                             // directory’s (or file’s) modified time may have lower
                             // resolution based on the filesystem (for example ext3
                             // only stores integer seconds), kernel (see
                             // https://stackoverflow.com/a/14393315/1162888), etc.
                             if &directory_mtime >= status_start {
                                 // The directory was modified too recently, don’t cache its
                                 // `read_dir` results.
                                 //
                                 // A timeline like this is possible:
                                 //
                                 // 1. A change to this directory (direct child was
                                 //    added or removed) cause its mtime to be set
                                 //    (possibly truncated) to `directory_mtime`
                                 // 2. This `status` algorithm calls `read_dir`
                                 // 3. An other change is made to the same directory is
                                 //    made so that calling `read_dir` agin would give
                                 //    different results, but soon enough after 1. that
                                 //    the mtime stays the same
                                 //
                                 // On a system where the time resolution poor, this
                                 // scenario is not unlikely if all three steps are caused
                                 // by the same script.
                             } else {
                                 // We’ve observed (through `status_start`) that time has
                                 // “progressed” since `directory_mtime`, so any further
                                 // change to this directory is extremely likely to cause a
                                 // different mtime.
                                 //
                                 // Having the same mtime again is not entirely impossible
                                 // since the system clock is not monotonous. It could jump
                                 // backward to some point before `directory_mtime`, then a
                                 // directory change could potentially happen during exactly
                                 // the wrong tick.
                                 //
                                 // We deem this scenario (unlike the previous one) to be
                                 // unlikely enough in practice.
                                 let truncated = TruncatedTimestamp::from(directory_mtime);
                                 let is_up_to_date = if let Some(cached) =
                                     dirstate_node.cached_directory_mtime()?
                                 {
                                     cached.likely_equal(truncated)
                                 } else {
                                     false
                                 };
                                 if !is_up_to_date {
                                     let hg_path = dirstate_node
                                         .full_path_borrowed(self.dmap.on_disk)?
                                         .detach_from_tree();
                                     self.new_cachable_directories
                                         .lock()
                                         .unwrap()
                                         .push((hg_path, truncated))
                                 }
                             }
                         }
                     }
                     Ok(())
                 }
                 /// A file with `EntryState::Normal` in the dirstate was found in the
                 /// filesystem
                 fn handle_normal_file(
                     &self,
                     dirstate_node: &NodeRef<'tree, 'on_disk>,
                     fs_metadata: &std::fs::Metadata,
                 ) -> Result<(), DirstateV2ParseError> {
                     // Keep the low 31 bits
                     fn truncate_u64(value: u64) -> i32 {
                         (value & 0x7FFF_FFFF) as i32
                     }
                     let entry = dirstate_node
                         .entry()?
                         .expect("handle_normal_file called with entry-less node");
                     let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
                     let mode_changed =
                         || self.options.check_exec && entry.mode_changed(fs_metadata);
                     let size = entry.size();
                     let size_changed = size != truncate_u64(fs_metadata.len());
                     if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() {
                         // issue6456: Size returned may be longer due to encryption
                         // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
                         self.outcome
                             .lock()
                             .unwrap()
                             .unsure
                             .push(hg_path.detach_from_tree())
                     } else if dirstate_node.has_copy_source()
                         || entry.is_from_other_parent()
                         || (size >= 0 && (size_changed || mode_changed()))
                     {
                         self.outcome
                             .lock()
                             .unwrap()
                             .modified
                             .push(hg_path.detach_from_tree())
                     } else {
                         let mtime_looks_clean;
                         if let Some(dirstate_mtime) = entry.truncated_mtime() {
                             let fs_mtime = TruncatedTimestamp::for_mtime_of(fs_metadata)
-                                .expect("OS/libc does not support mtime?")
+                                .expect("OS/libc does not support mtime?");
-                                // For now don’t use sub-second precision for file mtimes
-                                .to_integer_second();
                             mtime_looks_clean = fs_mtime.likely_equal(dirstate_mtime)
                                 && !fs_mtime.likely_equal(self.options.last_normal_time)
                         } else {
                             // No mtime in the dirstate entry
                             mtime_looks_clean = false
                         };
                         if !mtime_looks_clean {
                             self.outcome
                                 .lock()
                                 .unwrap()
                                 .unsure
                                 .push(hg_path.detach_from_tree())
                         } else if self.options.list_clean {
                             self.outcome
                                 .lock()
                                 .unwrap()
                                 .clean
                                 .push(hg_path.detach_from_tree())
                         }
                     }
                     Ok(())
                 }
                 /// A node in the dirstate tree has no corresponding filesystem entry
                 fn traverse_dirstate_only(
                     &self,
                     dirstate_node: NodeRef<'tree, 'on_disk>,
                 ) -> Result<(), DirstateV2ParseError> {
                     self.check_for_outdated_directory_cache(&dirstate_node)?;
                     self.mark_removed_or_deleted_if_file(
                         &dirstate_node.full_path_borrowed(self.dmap.on_disk)?,
                         dirstate_node.state()?,
                     );
                     dirstate_node
                         .children(self.dmap.on_disk)?
                         .par_iter()
                         .map(|child_node| self.traverse_dirstate_only(child_node))
                         .collect()
                 }
                 /// A node in the dirstate tree has no corresponding *file* on the
                 /// filesystem
                 ///
                 /// Does nothing on a "directory" node
                 fn mark_removed_or_deleted_if_file(
                     &self,
                     hg_path: &BorrowedPath<'tree, 'on_disk>,
                     dirstate_node_state: Option<EntryState>,
                 ) {
                     if let Some(state) = dirstate_node_state {
                         if self.matcher.matches(hg_path) {
                             if let EntryState::Removed = state {
                                 self.outcome
                                     .lock()
                                     .unwrap()
                                     .removed
                                     .push(hg_path.detach_from_tree())
                             } else {
                                 self.outcome
                                     .lock()
                                     .unwrap()
                                     .deleted
                                     .push(hg_path.detach_from_tree())
                             }
                         }
                     }
                 }
                 /// Something in the filesystem has no corresponding dirstate node
                 ///
                 /// Returns whether that path is ignored
                 fn traverse_fs_only(
                     &self,
                     has_ignored_ancestor: bool,
                     directory_hg_path: &HgPath,
                     fs_entry: &DirEntry,
                 ) -> bool {
                     let hg_path = directory_hg_path.join(&fs_entry.base_name);
                     let file_type = fs_entry.metadata.file_type();
                     let file_or_symlink = file_type.is_file() || file_type.is_symlink();
                     if file_type.is_dir() {
                         let is_ignored =
                             has_ignored_ancestor || (self.ignore_fn)(&hg_path);
                         let traverse_children = if is_ignored {
                             // Descendants of an ignored directory are all ignored
                             self.options.list_ignored
                         } else {
                             // Descendants of an unknown directory may be either unknown or
                             // ignored
                             self.options.list_unknown || self.options.list_ignored
                         };
                         if traverse_children {
                             let is_at_repo_root = false;
                             if let Ok(children_fs_entries) = self.read_dir(
                                 &hg_path,
                                 &fs_entry.full_path,
                                 is_at_repo_root,
                             ) {
                                 children_fs_entries.par_iter().for_each(|child_fs_entry| {
                                     self.traverse_fs_only(
                                         is_ignored,
                                         &hg_path,
                                         child_fs_entry,
                                     );
                                 })
                             }
                         }
                         if self.options.collect_traversed_dirs {
                             self.outcome.lock().unwrap().traversed.push(hg_path.into())
                         }
                         is_ignored
                     } else {
                         if file_or_symlink {
                             if self.matcher.matches(&hg_path) {
                                 self.mark_unknown_or_ignored(
                                     has_ignored_ancestor,
                                     &BorrowedPath::InMemory(&hg_path),
                                 )
                             } else {
                                 // We haven’t computed whether this path is ignored. It
                                 // might not be, and a future run of status might have a
                                 // different matcher that matches it. So treat it as not
                                 // ignored. That is, inhibit readdir caching of the parent
                                 // directory.
                                 false
                             }
                         } else {
                             // This is neither a directory, a plain file, or a symlink.
                             // Treat it like an ignored file.
                             true
                         }
                     }
                 }
                 /// Returns whether that path is ignored
                 fn mark_unknown_or_ignored(
                     &self,
                     has_ignored_ancestor: bool,
                     hg_path: &BorrowedPath<'_, 'on_disk>,
                 ) -> bool {
                     let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
                     if is_ignored {
                         if self.options.list_ignored {
                             self.outcome
                                 .lock()
                                 .unwrap()
                                 .ignored
                                 .push(hg_path.detach_from_tree())
                         }
                     } else {
                         if self.options.list_unknown {
                             self.outcome
                                 .lock()
                                 .unwrap()
                                 .unknown
                                 .push(hg_path.detach_from_tree())
                         }
                     }
                     is_ignored
                 }
             }
             struct DirEntry {
                 base_name: HgPathBuf,
                 full_path: PathBuf,
                 metadata: std::fs::Metadata,
             }
             impl DirEntry {
                 /// Returns **unsorted** entries in the given directory, with name and
                 /// metadata.
                 ///
                 /// If a `.hg` sub-directory is encountered:
                 ///
                 /// * At the repository root, ignore that sub-directory
                 /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty
                 ///   list instead.
                 fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result<Vec<Self>> {
                     let mut results = Vec::new();
                     for entry in path.read_dir()? {
                         let entry = entry?;
                         let metadata = entry.metadata()?;
                         let name = get_bytes_from_os_string(entry.file_name());
                         // FIXME don't do this when cached
                         if name == b".hg" {
                             if is_at_repo_root {
                                 // Skip the repo’s own .hg (might be a symlink)
                                 continue;
                             } else if metadata.is_dir() {
                                 // A .hg sub-directory at another location means a subrepo,
                                 // skip it entirely.
                                 return Ok(Vec::new());
                             }
                         }
                         results.push(DirEntry {
                             base_name: name.into(),
                             full_path: entry.path(),
                             metadata,
                         })
                     }
                     Ok(results)
                 }
             }
             /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
             /// of the give repository.
             ///
             /// This is similar to `SystemTime::now()`, with the result truncated to the
             /// same time resolution as other files’ modification times. Using `.hg`
             /// instead of the system’s default temporary directory (such as `/tmp`) makes
             /// it more likely the temporary file is in the same disk partition as contents
             /// of the working directory, which can matter since different filesystems may
             /// store timestamps with different resolutions.
             ///
             /// This may fail, typically if we lack write permissions. In that case we
             /// should continue the `status()` algoritm anyway and consider the current
             /// date/time to be unknown.
             fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
                 tempfile::tempfile_in(repo_root.join(".hg"))?
                     .metadata()?
                     .modified()
             }

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages