##// END OF EJS Templates
dirstate-v2: Move data file info in the docket closer together...
Simon Sapin -
r48977:d467e44f default
parent child Browse files
Show More
@@ -1,75 +1,75 b''
1 # dirstatedocket.py - docket file for dirstate-v2
1 # dirstatedocket.py - docket file for dirstate-v2
2 #
2 #
3 # Copyright Mercurial Contributors
3 # Copyright Mercurial Contributors
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11
11
12 from ..revlogutils import docket as docket_mod
12 from ..revlogutils import docket as docket_mod
13
13
14
14
15 V2_FORMAT_MARKER = b"dirstate-v2\n"
15 V2_FORMAT_MARKER = b"dirstate-v2\n"
16
16
17 # Must match the constant of the same name in
17 # Must match the constant of the same name in
18 # `rust/hg-core/src/dirstate_tree/on_disk.rs`
18 # `rust/hg-core/src/dirstate_tree/on_disk.rs`
19 TREE_METADATA_SIZE = 44
19 TREE_METADATA_SIZE = 44
20
20
21 # * 12 bytes: format marker
21 # * 12 bytes: format marker
22 # * 32 bytes: node ID of the working directory's first parent
22 # * 32 bytes: node ID of the working directory's first parent
23 # * 32 bytes: node ID of the working directory's second parent
23 # * 32 bytes: node ID of the working directory's second parent
24 # * {TREE_METADATA_SIZE} bytes: tree metadata, parsed separately
24 # * 4 bytes: big-endian used size of the data file
25 # * 4 bytes: big-endian used size of the data file
25 # * {TREE_METADATA_SIZE} bytes: tree metadata, parsed separately
26 # * 1 byte: length of the data file's UUID
26 # * 1 byte: length of the data file's UUID
27 # * variable: data file's UUID
27 # * variable: data file's UUID
28 #
28 #
29 # Node IDs are null-padded if shorter than 32 bytes.
29 # Node IDs are null-padded if shorter than 32 bytes.
30 # A data file shorter than the specified used size is corrupted (truncated)
30 # A data file shorter than the specified used size is corrupted (truncated)
31 HEADER = struct.Struct(
31 HEADER = struct.Struct(
32 ">{}s32s32sL{}sB".format(len(V2_FORMAT_MARKER), TREE_METADATA_SIZE)
32 ">{}s32s32s{}sLB".format(len(V2_FORMAT_MARKER), TREE_METADATA_SIZE)
33 )
33 )
34
34
35
35
36 class DirstateDocket(object):
36 class DirstateDocket(object):
37 data_filename_pattern = b'dirstate.%s'
37 data_filename_pattern = b'dirstate.%s'
38
38
39 def __init__(self, parents, data_size, tree_metadata, uuid):
39 def __init__(self, parents, data_size, tree_metadata, uuid):
40 self.parents = parents
40 self.parents = parents
41 self.data_size = data_size
41 self.data_size = data_size
42 self.tree_metadata = tree_metadata
42 self.tree_metadata = tree_metadata
43 self.uuid = uuid
43 self.uuid = uuid
44
44
45 @classmethod
45 @classmethod
46 def with_new_uuid(cls, parents, data_size, tree_metadata):
46 def with_new_uuid(cls, parents, data_size, tree_metadata):
47 return cls(parents, data_size, tree_metadata, docket_mod.make_uid())
47 return cls(parents, data_size, tree_metadata, docket_mod.make_uid())
48
48
49 @classmethod
49 @classmethod
50 def parse(cls, data, nodeconstants):
50 def parse(cls, data, nodeconstants):
51 if not data:
51 if not data:
52 parents = (nodeconstants.nullid, nodeconstants.nullid)
52 parents = (nodeconstants.nullid, nodeconstants.nullid)
53 return cls(parents, 0, b'', None)
53 return cls(parents, 0, b'', None)
54 marker, p1, p2, data_size, meta, uuid_size = HEADER.unpack_from(data)
54 marker, p1, p2, meta, data_size, uuid_size = HEADER.unpack_from(data)
55 if marker != V2_FORMAT_MARKER:
55 if marker != V2_FORMAT_MARKER:
56 raise ValueError("expected dirstate-v2 marker")
56 raise ValueError("expected dirstate-v2 marker")
57 uuid = data[HEADER.size : HEADER.size + uuid_size]
57 uuid = data[HEADER.size : HEADER.size + uuid_size]
58 p1 = p1[: nodeconstants.nodelen]
58 p1 = p1[: nodeconstants.nodelen]
59 p2 = p2[: nodeconstants.nodelen]
59 p2 = p2[: nodeconstants.nodelen]
60 return cls((p1, p2), data_size, meta, uuid)
60 return cls((p1, p2), data_size, meta, uuid)
61
61
62 def serialize(self):
62 def serialize(self):
63 p1, p2 = self.parents
63 p1, p2 = self.parents
64 header = HEADER.pack(
64 header = HEADER.pack(
65 V2_FORMAT_MARKER,
65 V2_FORMAT_MARKER,
66 p1,
66 p1,
67 p2,
67 p2,
68 self.tree_metadata,
68 self.data_size,
69 self.data_size,
69 self.tree_metadata,
70 len(self.uuid),
70 len(self.uuid),
71 )
71 )
72 return header + self.uuid
72 return header + self.uuid
73
73
74 def data_filename(self):
74 def data_filename(self):
75 return self.data_filename_pattern % self.uuid
75 return self.data_filename_pattern % self.uuid
@@ -1,808 +1,808 b''
1 //! The "version 2" disk representation of the dirstate
1 //! The "version 2" disk representation of the dirstate
2 //!
2 //!
3 //! # File format
3 //! # File format
4 //!
4 //!
5 //! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts
5 //! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts
6 //! with a fixed-sized header whose layout is defined by the `DocketHeader`
6 //! with a fixed-sized header whose layout is defined by the `DocketHeader`
7 //! struct, followed by the data file identifier.
7 //! struct, followed by the data file identifier.
8 //!
8 //!
9 //! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That
9 //! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That
10 //! file may be longer than the size given in the docket, but not shorter. Only
10 //! file may be longer than the size given in the docket, but not shorter. Only
11 //! the start of the data file up to the given size is considered. The
11 //! the start of the data file up to the given size is considered. The
12 //! fixed-size "root" of the dirstate tree whose layout is defined by the
12 //! fixed-size "root" of the dirstate tree whose layout is defined by the
13 //! `Root` struct is found at the end of that slice of data.
13 //! `Root` struct is found at the end of that slice of data.
14 //!
14 //!
15 //! Its `root_nodes` field contains the slice (offset and length) to
15 //! Its `root_nodes` field contains the slice (offset and length) to
16 //! the nodes representing the files and directories at the root of the
16 //! the nodes representing the files and directories at the root of the
17 //! repository. Each node is also fixed-size, defined by the `Node` struct.
17 //! repository. Each node is also fixed-size, defined by the `Node` struct.
18 //! Nodes in turn contain slices to variable-size paths, and to their own child
18 //! Nodes in turn contain slices to variable-size paths, and to their own child
19 //! nodes (if any) for nested files and directories.
19 //! nodes (if any) for nested files and directories.
20
20
21 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
21 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
22 use crate::dirstate_tree::path_with_basename::WithBasename;
22 use crate::dirstate_tree::path_with_basename::WithBasename;
23 use crate::errors::HgError;
23 use crate::errors::HgError;
24 use crate::utils::hg_path::HgPath;
24 use crate::utils::hg_path::HgPath;
25 use crate::DirstateEntry;
25 use crate::DirstateEntry;
26 use crate::DirstateError;
26 use crate::DirstateError;
27 use crate::DirstateParents;
27 use crate::DirstateParents;
28 use bitflags::bitflags;
28 use bitflags::bitflags;
29 use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be};
29 use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be};
30 use bytes_cast::BytesCast;
30 use bytes_cast::BytesCast;
31 use format_bytes::format_bytes;
31 use format_bytes::format_bytes;
32 use std::borrow::Cow;
32 use std::borrow::Cow;
33 use std::convert::{TryFrom, TryInto};
33 use std::convert::{TryFrom, TryInto};
34 use std::time::{Duration, SystemTime, UNIX_EPOCH};
34 use std::time::{Duration, SystemTime, UNIX_EPOCH};
35
35
36 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
36 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
37 /// This a redundant sanity check more than an actual "magic number" since
37 /// This a redundant sanity check more than an actual "magic number" since
38 /// `.hg/requires` already governs which format should be used.
38 /// `.hg/requires` already governs which format should be used.
39 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
39 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
40
40
41 /// Keep space for 256-bit hashes
41 /// Keep space for 256-bit hashes
42 const STORED_NODE_ID_BYTES: usize = 32;
42 const STORED_NODE_ID_BYTES: usize = 32;
43
43
44 /// … even though only 160 bits are used for now, with SHA-1
44 /// … even though only 160 bits are used for now, with SHA-1
45 const USED_NODE_ID_BYTES: usize = 20;
45 const USED_NODE_ID_BYTES: usize = 20;
46
46
47 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
47 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
48 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
48 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
49
49
50 /// Must match the constant of the same name in
50 /// Must match the constant of the same name in
51 /// `mercurial/dirstateutils/docket.py`
51 /// `mercurial/dirstateutils/docket.py`
52 const TREE_METADATA_SIZE: usize = 44;
52 const TREE_METADATA_SIZE: usize = 44;
53
53
54 /// Make sure that size-affecting changes are made knowingly
54 /// Make sure that size-affecting changes are made knowingly
55 #[allow(unused)]
55 #[allow(unused)]
56 fn static_assert_size_of() {
56 fn static_assert_size_of() {
57 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
57 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
58 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
58 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
59 let _ = std::mem::transmute::<Node, [u8; 43]>;
59 let _ = std::mem::transmute::<Node, [u8; 43]>;
60 }
60 }
61
61
62 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
62 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
63 #[derive(BytesCast)]
63 #[derive(BytesCast)]
64 #[repr(C)]
64 #[repr(C)]
65 struct DocketHeader {
65 struct DocketHeader {
66 marker: [u8; V2_FORMAT_MARKER.len()],
66 marker: [u8; V2_FORMAT_MARKER.len()],
67 parent_1: [u8; STORED_NODE_ID_BYTES],
67 parent_1: [u8; STORED_NODE_ID_BYTES],
68 parent_2: [u8; STORED_NODE_ID_BYTES],
68 parent_2: [u8; STORED_NODE_ID_BYTES],
69
69
70 metadata: TreeMetadata,
71
70 /// Counted in bytes
72 /// Counted in bytes
71 data_size: Size,
73 data_size: Size,
72
74
73 metadata: TreeMetadata,
74
75 uuid_size: u8,
75 uuid_size: u8,
76 }
76 }
77
77
78 pub struct Docket<'on_disk> {
78 pub struct Docket<'on_disk> {
79 header: &'on_disk DocketHeader,
79 header: &'on_disk DocketHeader,
80 uuid: &'on_disk [u8],
80 uuid: &'on_disk [u8],
81 }
81 }
82
82
83 #[derive(BytesCast)]
83 #[derive(BytesCast)]
84 #[repr(C)]
84 #[repr(C)]
85 struct TreeMetadata {
85 struct TreeMetadata {
86 root_nodes: ChildNodes,
86 root_nodes: ChildNodes,
87 nodes_with_entry_count: Size,
87 nodes_with_entry_count: Size,
88 nodes_with_copy_source_count: Size,
88 nodes_with_copy_source_count: Size,
89
89
90 /// How many bytes of this data file are not used anymore
90 /// How many bytes of this data file are not used anymore
91 unreachable_bytes: Size,
91 unreachable_bytes: Size,
92
92
93 /// Current version always sets these bytes to zero when creating or
93 /// Current version always sets these bytes to zero when creating or
94 /// updating a dirstate. Future versions could assign some bits to signal
94 /// updating a dirstate. Future versions could assign some bits to signal
95 /// for example "the version that last wrote/updated this dirstate did so
95 /// for example "the version that last wrote/updated this dirstate did so
96 /// in such and such way that can be relied on by versions that know to."
96 /// in such and such way that can be relied on by versions that know to."
97 unused: [u8; 4],
97 unused: [u8; 4],
98
98
99 /// If non-zero, a hash of ignore files that were used for some previous
99 /// If non-zero, a hash of ignore files that were used for some previous
100 /// run of the `status` algorithm.
100 /// run of the `status` algorithm.
101 ///
101 ///
102 /// We define:
102 /// We define:
103 ///
103 ///
104 /// * "Root" ignore files are `.hgignore` at the root of the repository if
104 /// * "Root" ignore files are `.hgignore` at the root of the repository if
105 /// it exists, and files from `ui.ignore.*` config. This set of files is
105 /// it exists, and files from `ui.ignore.*` config. This set of files is
106 /// then sorted by the string representation of their path.
106 /// then sorted by the string representation of their path.
107 /// * The "expanded contents" of an ignore files is the byte string made
107 /// * The "expanded contents" of an ignore files is the byte string made
108 /// by concatenating its contents with the "expanded contents" of other
108 /// by concatenating its contents with the "expanded contents" of other
109 /// files included with `include:` or `subinclude:` files, in inclusion
109 /// files included with `include:` or `subinclude:` files, in inclusion
110 /// order. This definition is recursive, as included files can
110 /// order. This definition is recursive, as included files can
111 /// themselves include more files.
111 /// themselves include more files.
112 ///
112 ///
113 /// This hash is defined as the SHA-1 of the concatenation (in sorted
113 /// This hash is defined as the SHA-1 of the concatenation (in sorted
114 /// order) of the "expanded contents" of each "root" ignore file.
114 /// order) of the "expanded contents" of each "root" ignore file.
115 /// (Note that computing this does not require actually concatenating byte
115 /// (Note that computing this does not require actually concatenating byte
116 /// strings into contiguous memory, instead SHA-1 hashing can be done
116 /// strings into contiguous memory, instead SHA-1 hashing can be done
117 /// incrementally.)
117 /// incrementally.)
118 ignore_patterns_hash: IgnorePatternsHash,
118 ignore_patterns_hash: IgnorePatternsHash,
119 }
119 }
120
120
121 #[derive(BytesCast)]
121 #[derive(BytesCast)]
122 #[repr(C)]
122 #[repr(C)]
123 pub(super) struct Node {
123 pub(super) struct Node {
124 full_path: PathSlice,
124 full_path: PathSlice,
125
125
126 /// In bytes from `self.full_path.start`
126 /// In bytes from `self.full_path.start`
127 base_name_start: PathSize,
127 base_name_start: PathSize,
128
128
129 copy_source: OptPathSlice,
129 copy_source: OptPathSlice,
130 children: ChildNodes,
130 children: ChildNodes,
131 pub(super) descendants_with_entry_count: Size,
131 pub(super) descendants_with_entry_count: Size,
132 pub(super) tracked_descendants_count: Size,
132 pub(super) tracked_descendants_count: Size,
133
133
134 /// Depending on the bits in `flags`:
134 /// Depending on the bits in `flags`:
135 ///
135 ///
136 /// * If any of `WDIR_TRACKED`, `P1_TRACKED`, or `P2_INFO` are set, the
136 /// * If any of `WDIR_TRACKED`, `P1_TRACKED`, or `P2_INFO` are set, the
137 /// node has an entry.
137 /// node has an entry.
138 ///
138 ///
139 /// - If `HAS_MODE_AND_SIZE` is set, `data.mode` and `data.size` are
139 /// - If `HAS_MODE_AND_SIZE` is set, `data.mode` and `data.size` are
140 /// meaningful. Otherwise they are set to zero
140 /// meaningful. Otherwise they are set to zero
141 /// - If `HAS_MTIME` is set, `data.mtime` is meaningful. Otherwise it is
141 /// - If `HAS_MTIME` is set, `data.mtime` is meaningful. Otherwise it is
142 /// set to zero.
142 /// set to zero.
143 ///
143 ///
144 /// * If none of `WDIR_TRACKED`, `P1_TRACKED`, `P2_INFO`, or `HAS_MTIME`
144 /// * If none of `WDIR_TRACKED`, `P1_TRACKED`, `P2_INFO`, or `HAS_MTIME`
145 /// are set, the node does not have an entry and `data` is set to all
145 /// are set, the node does not have an entry and `data` is set to all
146 /// zeros.
146 /// zeros.
147 ///
147 ///
148 /// * If none of `WDIR_TRACKED`, `P1_TRACKED`, `P2_INFO` are set, but
148 /// * If none of `WDIR_TRACKED`, `P1_TRACKED`, `P2_INFO` are set, but
149 /// `HAS_MTIME` is set, the bytes of `data` should instead be
149 /// `HAS_MTIME` is set, the bytes of `data` should instead be
150 /// interpreted as the `Timestamp` for the mtime of a cached directory.
150 /// interpreted as the `Timestamp` for the mtime of a cached directory.
151 ///
151 ///
152 /// The presence of this combination of flags means that at some point,
152 /// The presence of this combination of flags means that at some point,
153 /// this path in the working directory was observed:
153 /// this path in the working directory was observed:
154 ///
154 ///
155 /// - To be a directory
155 /// - To be a directory
156 /// - With the modification time as given by `Timestamp`
156 /// - With the modification time as given by `Timestamp`
157 /// - That timestamp was already strictly in the past when observed,
157 /// - That timestamp was already strictly in the past when observed,
158 /// meaning that later changes cannot happen in the same clock tick
158 /// meaning that later changes cannot happen in the same clock tick
159 /// and must cause a different modification time (unless the system
159 /// and must cause a different modification time (unless the system
160 /// clock jumps back and we get unlucky, which is not impossible but
160 /// clock jumps back and we get unlucky, which is not impossible but
161 /// but deemed unlikely enough).
161 /// but deemed unlikely enough).
162 /// - All direct children of this directory (as returned by
162 /// - All direct children of this directory (as returned by
163 /// `std::fs::read_dir`) either have a corresponding dirstate node, or
163 /// `std::fs::read_dir`) either have a corresponding dirstate node, or
164 /// are ignored by ignore patterns whose hash is in
164 /// are ignored by ignore patterns whose hash is in
165 /// `TreeMetadata::ignore_patterns_hash`.
165 /// `TreeMetadata::ignore_patterns_hash`.
166 ///
166 ///
167 /// This means that if `std::fs::symlink_metadata` later reports the
167 /// This means that if `std::fs::symlink_metadata` later reports the
168 /// same modification time and ignored patterns haven’t changed, a run
168 /// same modification time and ignored patterns haven’t changed, a run
169 /// of status that is not listing ignored files can skip calling
169 /// of status that is not listing ignored files can skip calling
170 /// `std::fs::read_dir` again for this directory, iterate child
170 /// `std::fs::read_dir` again for this directory, iterate child
171 /// dirstate nodes instead.
171 /// dirstate nodes instead.
172 flags: Flags,
172 flags: Flags,
173 data: Entry,
173 data: Entry,
174 }
174 }
175
175
176 bitflags! {
176 bitflags! {
177 #[derive(BytesCast)]
177 #[derive(BytesCast)]
178 #[repr(C)]
178 #[repr(C)]
179 struct Flags: u8 {
179 struct Flags: u8 {
180 const WDIR_TRACKED = 1 << 0;
180 const WDIR_TRACKED = 1 << 0;
181 const P1_TRACKED = 1 << 1;
181 const P1_TRACKED = 1 << 1;
182 const P2_INFO = 1 << 2;
182 const P2_INFO = 1 << 2;
183 const HAS_MODE_AND_SIZE = 1 << 3;
183 const HAS_MODE_AND_SIZE = 1 << 3;
184 const HAS_MTIME = 1 << 4;
184 const HAS_MTIME = 1 << 4;
185 }
185 }
186 }
186 }
187
187
188 #[derive(BytesCast, Copy, Clone, Debug)]
188 #[derive(BytesCast, Copy, Clone, Debug)]
189 #[repr(C)]
189 #[repr(C)]
190 struct Entry {
190 struct Entry {
191 mode: I32Be,
191 mode: I32Be,
192 mtime: I32Be,
192 mtime: I32Be,
193 size: I32Be,
193 size: I32Be,
194 }
194 }
195
195
196 /// Duration since the Unix epoch
196 /// Duration since the Unix epoch
197 #[derive(BytesCast, Copy, Clone, PartialEq)]
197 #[derive(BytesCast, Copy, Clone, PartialEq)]
198 #[repr(C)]
198 #[repr(C)]
199 pub(super) struct Timestamp {
199 pub(super) struct Timestamp {
200 seconds: I64Be,
200 seconds: I64Be,
201
201
202 /// In `0 .. 1_000_000_000`.
202 /// In `0 .. 1_000_000_000`.
203 ///
203 ///
204 /// This timestamp is later or earlier than `(seconds, 0)` by this many
204 /// This timestamp is later or earlier than `(seconds, 0)` by this many
205 /// nanoseconds, if `seconds` is non-negative or negative, respectively.
205 /// nanoseconds, if `seconds` is non-negative or negative, respectively.
206 nanoseconds: U32Be,
206 nanoseconds: U32Be,
207 }
207 }
208
208
209 /// Counted in bytes from the start of the file
209 /// Counted in bytes from the start of the file
210 ///
210 ///
211 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
211 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
212 type Offset = U32Be;
212 type Offset = U32Be;
213
213
214 /// Counted in number of items
214 /// Counted in number of items
215 ///
215 ///
216 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
216 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
217 type Size = U32Be;
217 type Size = U32Be;
218
218
219 /// Counted in bytes
219 /// Counted in bytes
220 ///
220 ///
221 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
221 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
222 type PathSize = U16Be;
222 type PathSize = U16Be;
223
223
224 /// A contiguous sequence of `len` times `Node`, representing the child nodes
224 /// A contiguous sequence of `len` times `Node`, representing the child nodes
225 /// of either some other node or of the repository root.
225 /// of either some other node or of the repository root.
226 ///
226 ///
227 /// Always sorted by ascending `full_path`, to allow binary search.
227 /// Always sorted by ascending `full_path`, to allow binary search.
228 /// Since nodes with the same parent nodes also have the same parent path,
228 /// Since nodes with the same parent nodes also have the same parent path,
229 /// only the `base_name`s need to be compared during binary search.
229 /// only the `base_name`s need to be compared during binary search.
230 #[derive(BytesCast, Copy, Clone)]
230 #[derive(BytesCast, Copy, Clone)]
231 #[repr(C)]
231 #[repr(C)]
232 struct ChildNodes {
232 struct ChildNodes {
233 start: Offset,
233 start: Offset,
234 len: Size,
234 len: Size,
235 }
235 }
236
236
237 /// A `HgPath` of `len` bytes
237 /// A `HgPath` of `len` bytes
238 #[derive(BytesCast, Copy, Clone)]
238 #[derive(BytesCast, Copy, Clone)]
239 #[repr(C)]
239 #[repr(C)]
240 struct PathSlice {
240 struct PathSlice {
241 start: Offset,
241 start: Offset,
242 len: PathSize,
242 len: PathSize,
243 }
243 }
244
244
245 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
245 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
246 type OptPathSlice = PathSlice;
246 type OptPathSlice = PathSlice;
247
247
248 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
248 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
249 ///
249 ///
250 /// This should only happen if Mercurial is buggy or a repository is corrupted.
250 /// This should only happen if Mercurial is buggy or a repository is corrupted.
251 #[derive(Debug)]
251 #[derive(Debug)]
252 pub struct DirstateV2ParseError;
252 pub struct DirstateV2ParseError;
253
253
254 impl From<DirstateV2ParseError> for HgError {
254 impl From<DirstateV2ParseError> for HgError {
255 fn from(_: DirstateV2ParseError) -> Self {
255 fn from(_: DirstateV2ParseError) -> Self {
256 HgError::corrupted("dirstate-v2 parse error")
256 HgError::corrupted("dirstate-v2 parse error")
257 }
257 }
258 }
258 }
259
259
260 impl From<DirstateV2ParseError> for crate::DirstateError {
260 impl From<DirstateV2ParseError> for crate::DirstateError {
261 fn from(error: DirstateV2ParseError) -> Self {
261 fn from(error: DirstateV2ParseError) -> Self {
262 HgError::from(error).into()
262 HgError::from(error).into()
263 }
263 }
264 }
264 }
265
265
266 impl<'on_disk> Docket<'on_disk> {
266 impl<'on_disk> Docket<'on_disk> {
267 pub fn parents(&self) -> DirstateParents {
267 pub fn parents(&self) -> DirstateParents {
268 use crate::Node;
268 use crate::Node;
269 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
269 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
270 .unwrap()
270 .unwrap()
271 .clone();
271 .clone();
272 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
272 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
273 .unwrap()
273 .unwrap()
274 .clone();
274 .clone();
275 DirstateParents { p1, p2 }
275 DirstateParents { p1, p2 }
276 }
276 }
277
277
278 pub fn tree_metadata(&self) -> &[u8] {
278 pub fn tree_metadata(&self) -> &[u8] {
279 self.header.metadata.as_bytes()
279 self.header.metadata.as_bytes()
280 }
280 }
281
281
282 pub fn data_size(&self) -> usize {
282 pub fn data_size(&self) -> usize {
283 // This `unwrap` could only panic on a 16-bit CPU
283 // This `unwrap` could only panic on a 16-bit CPU
284 self.header.data_size.get().try_into().unwrap()
284 self.header.data_size.get().try_into().unwrap()
285 }
285 }
286
286
287 pub fn data_filename(&self) -> String {
287 pub fn data_filename(&self) -> String {
288 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
288 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
289 }
289 }
290 }
290 }
291
291
292 pub fn read_docket(
292 pub fn read_docket(
293 on_disk: &[u8],
293 on_disk: &[u8],
294 ) -> Result<Docket<'_>, DirstateV2ParseError> {
294 ) -> Result<Docket<'_>, DirstateV2ParseError> {
295 let (header, uuid) =
295 let (header, uuid) =
296 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
296 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
297 let uuid_size = header.uuid_size as usize;
297 let uuid_size = header.uuid_size as usize;
298 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
298 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
299 Ok(Docket { header, uuid })
299 Ok(Docket { header, uuid })
300 } else {
300 } else {
301 Err(DirstateV2ParseError)
301 Err(DirstateV2ParseError)
302 }
302 }
303 }
303 }
304
304
305 pub(super) fn read<'on_disk>(
305 pub(super) fn read<'on_disk>(
306 on_disk: &'on_disk [u8],
306 on_disk: &'on_disk [u8],
307 metadata: &[u8],
307 metadata: &[u8],
308 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
308 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
309 if on_disk.is_empty() {
309 if on_disk.is_empty() {
310 return Ok(DirstateMap::empty(on_disk));
310 return Ok(DirstateMap::empty(on_disk));
311 }
311 }
312 let (meta, _) = TreeMetadata::from_bytes(metadata)
312 let (meta, _) = TreeMetadata::from_bytes(metadata)
313 .map_err(|_| DirstateV2ParseError)?;
313 .map_err(|_| DirstateV2ParseError)?;
314 let dirstate_map = DirstateMap {
314 let dirstate_map = DirstateMap {
315 on_disk,
315 on_disk,
316 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
316 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
317 on_disk,
317 on_disk,
318 meta.root_nodes,
318 meta.root_nodes,
319 )?),
319 )?),
320 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
320 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
321 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
321 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
322 ignore_patterns_hash: meta.ignore_patterns_hash,
322 ignore_patterns_hash: meta.ignore_patterns_hash,
323 unreachable_bytes: meta.unreachable_bytes.get(),
323 unreachable_bytes: meta.unreachable_bytes.get(),
324 };
324 };
325 Ok(dirstate_map)
325 Ok(dirstate_map)
326 }
326 }
327
327
328 impl Node {
328 impl Node {
329 pub(super) fn full_path<'on_disk>(
329 pub(super) fn full_path<'on_disk>(
330 &self,
330 &self,
331 on_disk: &'on_disk [u8],
331 on_disk: &'on_disk [u8],
332 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
332 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
333 read_hg_path(on_disk, self.full_path)
333 read_hg_path(on_disk, self.full_path)
334 }
334 }
335
335
336 pub(super) fn base_name_start<'on_disk>(
336 pub(super) fn base_name_start<'on_disk>(
337 &self,
337 &self,
338 ) -> Result<usize, DirstateV2ParseError> {
338 ) -> Result<usize, DirstateV2ParseError> {
339 let start = self.base_name_start.get();
339 let start = self.base_name_start.get();
340 if start < self.full_path.len.get() {
340 if start < self.full_path.len.get() {
341 let start = usize::try_from(start)
341 let start = usize::try_from(start)
342 // u32 -> usize, could only panic on a 16-bit CPU
342 // u32 -> usize, could only panic on a 16-bit CPU
343 .expect("dirstate-v2 base_name_start out of bounds");
343 .expect("dirstate-v2 base_name_start out of bounds");
344 Ok(start)
344 Ok(start)
345 } else {
345 } else {
346 Err(DirstateV2ParseError)
346 Err(DirstateV2ParseError)
347 }
347 }
348 }
348 }
349
349
350 pub(super) fn base_name<'on_disk>(
350 pub(super) fn base_name<'on_disk>(
351 &self,
351 &self,
352 on_disk: &'on_disk [u8],
352 on_disk: &'on_disk [u8],
353 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
353 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
354 let full_path = self.full_path(on_disk)?;
354 let full_path = self.full_path(on_disk)?;
355 let base_name_start = self.base_name_start()?;
355 let base_name_start = self.base_name_start()?;
356 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
356 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
357 }
357 }
358
358
359 pub(super) fn path<'on_disk>(
359 pub(super) fn path<'on_disk>(
360 &self,
360 &self,
361 on_disk: &'on_disk [u8],
361 on_disk: &'on_disk [u8],
362 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
362 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
363 Ok(WithBasename::from_raw_parts(
363 Ok(WithBasename::from_raw_parts(
364 Cow::Borrowed(self.full_path(on_disk)?),
364 Cow::Borrowed(self.full_path(on_disk)?),
365 self.base_name_start()?,
365 self.base_name_start()?,
366 ))
366 ))
367 }
367 }
368
368
369 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
369 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
370 self.copy_source.start.get() != 0
370 self.copy_source.start.get() != 0
371 }
371 }
372
372
373 pub(super) fn copy_source<'on_disk>(
373 pub(super) fn copy_source<'on_disk>(
374 &self,
374 &self,
375 on_disk: &'on_disk [u8],
375 on_disk: &'on_disk [u8],
376 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
376 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
377 Ok(if self.has_copy_source() {
377 Ok(if self.has_copy_source() {
378 Some(read_hg_path(on_disk, self.copy_source)?)
378 Some(read_hg_path(on_disk, self.copy_source)?)
379 } else {
379 } else {
380 None
380 None
381 })
381 })
382 }
382 }
383
383
384 fn has_entry(&self) -> bool {
384 fn has_entry(&self) -> bool {
385 self.flags.intersects(
385 self.flags.intersects(
386 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
386 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
387 )
387 )
388 }
388 }
389
389
390 pub(super) fn node_data(
390 pub(super) fn node_data(
391 &self,
391 &self,
392 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
392 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
393 if self.has_entry() {
393 if self.has_entry() {
394 Ok(dirstate_map::NodeData::Entry(self.assume_entry()))
394 Ok(dirstate_map::NodeData::Entry(self.assume_entry()))
395 } else if let Some(&mtime) = self.cached_directory_mtime() {
395 } else if let Some(&mtime) = self.cached_directory_mtime() {
396 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
396 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
397 } else {
397 } else {
398 Ok(dirstate_map::NodeData::None)
398 Ok(dirstate_map::NodeData::None)
399 }
399 }
400 }
400 }
401
401
402 pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {
402 pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {
403 if self.flags.contains(Flags::HAS_MTIME) && !self.has_entry() {
403 if self.flags.contains(Flags::HAS_MTIME) && !self.has_entry() {
404 Some(self.data.as_timestamp())
404 Some(self.data.as_timestamp())
405 } else {
405 } else {
406 None
406 None
407 }
407 }
408 }
408 }
409
409
410 fn assume_entry(&self) -> DirstateEntry {
410 fn assume_entry(&self) -> DirstateEntry {
411 // TODO: convert through raw bits instead?
411 // TODO: convert through raw bits instead?
412 let wdir_tracked = self.flags.contains(Flags::WDIR_TRACKED);
412 let wdir_tracked = self.flags.contains(Flags::WDIR_TRACKED);
413 let p1_tracked = self.flags.contains(Flags::P1_TRACKED);
413 let p1_tracked = self.flags.contains(Flags::P1_TRACKED);
414 let p2_info = self.flags.contains(Flags::P2_INFO);
414 let p2_info = self.flags.contains(Flags::P2_INFO);
415 let mode_size = if self.flags.contains(Flags::HAS_MODE_AND_SIZE) {
415 let mode_size = if self.flags.contains(Flags::HAS_MODE_AND_SIZE) {
416 Some((self.data.mode.into(), self.data.size.into()))
416 Some((self.data.mode.into(), self.data.size.into()))
417 } else {
417 } else {
418 None
418 None
419 };
419 };
420 let mtime = if self.flags.contains(Flags::HAS_MTIME) {
420 let mtime = if self.flags.contains(Flags::HAS_MTIME) {
421 Some(self.data.mtime.into())
421 Some(self.data.mtime.into())
422 } else {
422 } else {
423 None
423 None
424 };
424 };
425 DirstateEntry::from_v2_data(
425 DirstateEntry::from_v2_data(
426 wdir_tracked,
426 wdir_tracked,
427 p1_tracked,
427 p1_tracked,
428 p2_info,
428 p2_info,
429 mode_size,
429 mode_size,
430 mtime,
430 mtime,
431 )
431 )
432 }
432 }
433
433
434 pub(super) fn entry(
434 pub(super) fn entry(
435 &self,
435 &self,
436 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
436 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
437 if self.has_entry() {
437 if self.has_entry() {
438 Ok(Some(self.assume_entry()))
438 Ok(Some(self.assume_entry()))
439 } else {
439 } else {
440 Ok(None)
440 Ok(None)
441 }
441 }
442 }
442 }
443
443
444 pub(super) fn children<'on_disk>(
444 pub(super) fn children<'on_disk>(
445 &self,
445 &self,
446 on_disk: &'on_disk [u8],
446 on_disk: &'on_disk [u8],
447 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
447 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
448 read_nodes(on_disk, self.children)
448 read_nodes(on_disk, self.children)
449 }
449 }
450
450
451 pub(super) fn to_in_memory_node<'on_disk>(
451 pub(super) fn to_in_memory_node<'on_disk>(
452 &self,
452 &self,
453 on_disk: &'on_disk [u8],
453 on_disk: &'on_disk [u8],
454 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
454 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
455 Ok(dirstate_map::Node {
455 Ok(dirstate_map::Node {
456 children: dirstate_map::ChildNodes::OnDisk(
456 children: dirstate_map::ChildNodes::OnDisk(
457 self.children(on_disk)?,
457 self.children(on_disk)?,
458 ),
458 ),
459 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
459 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
460 data: self.node_data()?,
460 data: self.node_data()?,
461 descendants_with_entry_count: self
461 descendants_with_entry_count: self
462 .descendants_with_entry_count
462 .descendants_with_entry_count
463 .get(),
463 .get(),
464 tracked_descendants_count: self.tracked_descendants_count.get(),
464 tracked_descendants_count: self.tracked_descendants_count.get(),
465 })
465 })
466 }
466 }
467 }
467 }
468
468
469 impl Entry {
469 impl Entry {
470 fn from_dirstate_entry(entry: &DirstateEntry) -> (Flags, Self) {
470 fn from_dirstate_entry(entry: &DirstateEntry) -> (Flags, Self) {
471 let (wdir_tracked, p1_tracked, p2_info, mode_size_opt, mtime_opt) =
471 let (wdir_tracked, p1_tracked, p2_info, mode_size_opt, mtime_opt) =
472 entry.v2_data();
472 entry.v2_data();
473 // TODO: convert throug raw flag bits instead?
473 // TODO: convert throug raw flag bits instead?
474 let mut flags = Flags::empty();
474 let mut flags = Flags::empty();
475 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
475 flags.set(Flags::WDIR_TRACKED, wdir_tracked);
476 flags.set(Flags::P1_TRACKED, p1_tracked);
476 flags.set(Flags::P1_TRACKED, p1_tracked);
477 flags.set(Flags::P2_INFO, p2_info);
477 flags.set(Flags::P2_INFO, p2_info);
478 let (mode, size, mtime);
478 let (mode, size, mtime);
479 if let Some((m, s)) = mode_size_opt {
479 if let Some((m, s)) = mode_size_opt {
480 mode = m;
480 mode = m;
481 size = s;
481 size = s;
482 flags.insert(Flags::HAS_MODE_AND_SIZE)
482 flags.insert(Flags::HAS_MODE_AND_SIZE)
483 } else {
483 } else {
484 mode = 0;
484 mode = 0;
485 size = 0;
485 size = 0;
486 }
486 }
487 if let Some(m) = mtime_opt {
487 if let Some(m) = mtime_opt {
488 mtime = m;
488 mtime = m;
489 flags.insert(Flags::HAS_MTIME);
489 flags.insert(Flags::HAS_MTIME);
490 } else {
490 } else {
491 mtime = 0;
491 mtime = 0;
492 }
492 }
493 let raw_entry = Entry {
493 let raw_entry = Entry {
494 mode: mode.into(),
494 mode: mode.into(),
495 size: size.into(),
495 size: size.into(),
496 mtime: mtime.into(),
496 mtime: mtime.into(),
497 };
497 };
498 (flags, raw_entry)
498 (flags, raw_entry)
499 }
499 }
500
500
501 fn from_timestamp(timestamp: Timestamp) -> Self {
501 fn from_timestamp(timestamp: Timestamp) -> Self {
502 // Safety: both types implement the `ByteCast` trait, so we could
502 // Safety: both types implement the `ByteCast` trait, so we could
503 // safely use `as_bytes` and `from_bytes` to do this conversion. Using
503 // safely use `as_bytes` and `from_bytes` to do this conversion. Using
504 // `transmute` instead makes the compiler check that the two types
504 // `transmute` instead makes the compiler check that the two types
505 // have the same size, which eliminates the error case of
505 // have the same size, which eliminates the error case of
506 // `from_bytes`.
506 // `from_bytes`.
507 unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }
507 unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }
508 }
508 }
509
509
510 fn as_timestamp(&self) -> &Timestamp {
510 fn as_timestamp(&self) -> &Timestamp {
511 // Safety: same as above in `from_timestamp`
511 // Safety: same as above in `from_timestamp`
512 unsafe { &*(self as *const Entry as *const Timestamp) }
512 unsafe { &*(self as *const Entry as *const Timestamp) }
513 }
513 }
514 }
514 }
515
515
516 impl Timestamp {
516 impl Timestamp {
517 pub fn seconds(&self) -> i64 {
517 pub fn seconds(&self) -> i64 {
518 self.seconds.get()
518 self.seconds.get()
519 }
519 }
520 }
520 }
521
521
522 impl From<SystemTime> for Timestamp {
522 impl From<SystemTime> for Timestamp {
523 fn from(system_time: SystemTime) -> Self {
523 fn from(system_time: SystemTime) -> Self {
524 let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {
524 let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {
525 Ok(duration) => {
525 Ok(duration) => {
526 (duration.as_secs() as i64, duration.subsec_nanos())
526 (duration.as_secs() as i64, duration.subsec_nanos())
527 }
527 }
528 Err(error) => {
528 Err(error) => {
529 let negative = error.duration();
529 let negative = error.duration();
530 (-(negative.as_secs() as i64), negative.subsec_nanos())
530 (-(negative.as_secs() as i64), negative.subsec_nanos())
531 }
531 }
532 };
532 };
533 Timestamp {
533 Timestamp {
534 seconds: secs.into(),
534 seconds: secs.into(),
535 nanoseconds: nanos.into(),
535 nanoseconds: nanos.into(),
536 }
536 }
537 }
537 }
538 }
538 }
539
539
540 impl From<&'_ Timestamp> for SystemTime {
540 impl From<&'_ Timestamp> for SystemTime {
541 fn from(timestamp: &'_ Timestamp) -> Self {
541 fn from(timestamp: &'_ Timestamp) -> Self {
542 let secs = timestamp.seconds.get();
542 let secs = timestamp.seconds.get();
543 let nanos = timestamp.nanoseconds.get();
543 let nanos = timestamp.nanoseconds.get();
544 if secs >= 0 {
544 if secs >= 0 {
545 UNIX_EPOCH + Duration::new(secs as u64, nanos)
545 UNIX_EPOCH + Duration::new(secs as u64, nanos)
546 } else {
546 } else {
547 UNIX_EPOCH - Duration::new((-secs) as u64, nanos)
547 UNIX_EPOCH - Duration::new((-secs) as u64, nanos)
548 }
548 }
549 }
549 }
550 }
550 }
551
551
552 fn read_hg_path(
552 fn read_hg_path(
553 on_disk: &[u8],
553 on_disk: &[u8],
554 slice: PathSlice,
554 slice: PathSlice,
555 ) -> Result<&HgPath, DirstateV2ParseError> {
555 ) -> Result<&HgPath, DirstateV2ParseError> {
556 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
556 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
557 }
557 }
558
558
559 fn read_nodes(
559 fn read_nodes(
560 on_disk: &[u8],
560 on_disk: &[u8],
561 slice: ChildNodes,
561 slice: ChildNodes,
562 ) -> Result<&[Node], DirstateV2ParseError> {
562 ) -> Result<&[Node], DirstateV2ParseError> {
563 read_slice(on_disk, slice.start, slice.len.get())
563 read_slice(on_disk, slice.start, slice.len.get())
564 }
564 }
565
565
566 fn read_slice<T, Len>(
566 fn read_slice<T, Len>(
567 on_disk: &[u8],
567 on_disk: &[u8],
568 start: Offset,
568 start: Offset,
569 len: Len,
569 len: Len,
570 ) -> Result<&[T], DirstateV2ParseError>
570 ) -> Result<&[T], DirstateV2ParseError>
571 where
571 where
572 T: BytesCast,
572 T: BytesCast,
573 Len: TryInto<usize>,
573 Len: TryInto<usize>,
574 {
574 {
575 // Either `usize::MAX` would result in "out of bounds" error since a single
575 // Either `usize::MAX` would result in "out of bounds" error since a single
576 // `&[u8]` cannot occupy the entire addess space.
576 // `&[u8]` cannot occupy the entire addess space.
577 let start = start.get().try_into().unwrap_or(std::usize::MAX);
577 let start = start.get().try_into().unwrap_or(std::usize::MAX);
578 let len = len.try_into().unwrap_or(std::usize::MAX);
578 let len = len.try_into().unwrap_or(std::usize::MAX);
579 on_disk
579 on_disk
580 .get(start..)
580 .get(start..)
581 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
581 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
582 .map(|(slice, _rest)| slice)
582 .map(|(slice, _rest)| slice)
583 .ok_or_else(|| DirstateV2ParseError)
583 .ok_or_else(|| DirstateV2ParseError)
584 }
584 }
585
585
586 pub(crate) fn for_each_tracked_path<'on_disk>(
586 pub(crate) fn for_each_tracked_path<'on_disk>(
587 on_disk: &'on_disk [u8],
587 on_disk: &'on_disk [u8],
588 metadata: &[u8],
588 metadata: &[u8],
589 mut f: impl FnMut(&'on_disk HgPath),
589 mut f: impl FnMut(&'on_disk HgPath),
590 ) -> Result<(), DirstateV2ParseError> {
590 ) -> Result<(), DirstateV2ParseError> {
591 let (meta, _) = TreeMetadata::from_bytes(metadata)
591 let (meta, _) = TreeMetadata::from_bytes(metadata)
592 .map_err(|_| DirstateV2ParseError)?;
592 .map_err(|_| DirstateV2ParseError)?;
593 fn recur<'on_disk>(
593 fn recur<'on_disk>(
594 on_disk: &'on_disk [u8],
594 on_disk: &'on_disk [u8],
595 nodes: ChildNodes,
595 nodes: ChildNodes,
596 f: &mut impl FnMut(&'on_disk HgPath),
596 f: &mut impl FnMut(&'on_disk HgPath),
597 ) -> Result<(), DirstateV2ParseError> {
597 ) -> Result<(), DirstateV2ParseError> {
598 for node in read_nodes(on_disk, nodes)? {
598 for node in read_nodes(on_disk, nodes)? {
599 if let Some(entry) = node.entry()? {
599 if let Some(entry) = node.entry()? {
600 if entry.state().is_tracked() {
600 if entry.state().is_tracked() {
601 f(node.full_path(on_disk)?)
601 f(node.full_path(on_disk)?)
602 }
602 }
603 }
603 }
604 recur(on_disk, node.children, f)?
604 recur(on_disk, node.children, f)?
605 }
605 }
606 Ok(())
606 Ok(())
607 }
607 }
608 recur(on_disk, meta.root_nodes, &mut f)
608 recur(on_disk, meta.root_nodes, &mut f)
609 }
609 }
610
610
611 /// Returns new data and metadata, together with whether that data should be
611 /// Returns new data and metadata, together with whether that data should be
612 /// appended to the existing data file whose content is at
612 /// appended to the existing data file whose content is at
613 /// `dirstate_map.on_disk` (true), instead of written to a new data file
613 /// `dirstate_map.on_disk` (true), instead of written to a new data file
614 /// (false).
614 /// (false).
615 pub(super) fn write(
615 pub(super) fn write(
616 dirstate_map: &mut DirstateMap,
616 dirstate_map: &mut DirstateMap,
617 can_append: bool,
617 can_append: bool,
618 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
618 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
619 let append = can_append && dirstate_map.write_should_append();
619 let append = can_append && dirstate_map.write_should_append();
620
620
621 // This ignores the space for paths, and for nodes without an entry.
621 // This ignores the space for paths, and for nodes without an entry.
622 // TODO: better estimate? Skip the `Vec` and write to a file directly?
622 // TODO: better estimate? Skip the `Vec` and write to a file directly?
623 let size_guess = std::mem::size_of::<Node>()
623 let size_guess = std::mem::size_of::<Node>()
624 * dirstate_map.nodes_with_entry_count as usize;
624 * dirstate_map.nodes_with_entry_count as usize;
625
625
626 let mut writer = Writer {
626 let mut writer = Writer {
627 dirstate_map,
627 dirstate_map,
628 append,
628 append,
629 out: Vec::with_capacity(size_guess),
629 out: Vec::with_capacity(size_guess),
630 };
630 };
631
631
632 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
632 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
633
633
634 let meta = TreeMetadata {
634 let meta = TreeMetadata {
635 root_nodes,
635 root_nodes,
636 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
636 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
637 nodes_with_copy_source_count: dirstate_map
637 nodes_with_copy_source_count: dirstate_map
638 .nodes_with_copy_source_count
638 .nodes_with_copy_source_count
639 .into(),
639 .into(),
640 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
640 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
641 unused: [0; 4],
641 unused: [0; 4],
642 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
642 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
643 };
643 };
644 Ok((writer.out, meta.as_bytes().to_vec(), append))
644 Ok((writer.out, meta.as_bytes().to_vec(), append))
645 }
645 }
646
646
647 struct Writer<'dmap, 'on_disk> {
647 struct Writer<'dmap, 'on_disk> {
648 dirstate_map: &'dmap DirstateMap<'on_disk>,
648 dirstate_map: &'dmap DirstateMap<'on_disk>,
649 append: bool,
649 append: bool,
650 out: Vec<u8>,
650 out: Vec<u8>,
651 }
651 }
652
652
653 impl Writer<'_, '_> {
653 impl Writer<'_, '_> {
654 fn write_nodes(
654 fn write_nodes(
655 &mut self,
655 &mut self,
656 nodes: dirstate_map::ChildNodesRef,
656 nodes: dirstate_map::ChildNodesRef,
657 ) -> Result<ChildNodes, DirstateError> {
657 ) -> Result<ChildNodes, DirstateError> {
658 // Reuse already-written nodes if possible
658 // Reuse already-written nodes if possible
659 if self.append {
659 if self.append {
660 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
660 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
661 let start = self.on_disk_offset_of(nodes_slice).expect(
661 let start = self.on_disk_offset_of(nodes_slice).expect(
662 "dirstate-v2 OnDisk nodes not found within on_disk",
662 "dirstate-v2 OnDisk nodes not found within on_disk",
663 );
663 );
664 let len = child_nodes_len_from_usize(nodes_slice.len());
664 let len = child_nodes_len_from_usize(nodes_slice.len());
665 return Ok(ChildNodes { start, len });
665 return Ok(ChildNodes { start, len });
666 }
666 }
667 }
667 }
668
668
669 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
669 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
670 // undefined iteration order. Sort to enable binary search in the
670 // undefined iteration order. Sort to enable binary search in the
671 // written file.
671 // written file.
672 let nodes = nodes.sorted();
672 let nodes = nodes.sorted();
673 let nodes_len = nodes.len();
673 let nodes_len = nodes.len();
674
674
675 // First accumulate serialized nodes in a `Vec`
675 // First accumulate serialized nodes in a `Vec`
676 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
676 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
677 for node in nodes {
677 for node in nodes {
678 let children =
678 let children =
679 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
679 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
680 let full_path = node.full_path(self.dirstate_map.on_disk)?;
680 let full_path = node.full_path(self.dirstate_map.on_disk)?;
681 let full_path = self.write_path(full_path.as_bytes());
681 let full_path = self.write_path(full_path.as_bytes());
682 let copy_source = if let Some(source) =
682 let copy_source = if let Some(source) =
683 node.copy_source(self.dirstate_map.on_disk)?
683 node.copy_source(self.dirstate_map.on_disk)?
684 {
684 {
685 self.write_path(source.as_bytes())
685 self.write_path(source.as_bytes())
686 } else {
686 } else {
687 PathSlice {
687 PathSlice {
688 start: 0.into(),
688 start: 0.into(),
689 len: 0.into(),
689 len: 0.into(),
690 }
690 }
691 };
691 };
692 on_disk_nodes.push(match node {
692 on_disk_nodes.push(match node {
693 NodeRef::InMemory(path, node) => {
693 NodeRef::InMemory(path, node) => {
694 let (flags, data) = match &node.data {
694 let (flags, data) = match &node.data {
695 dirstate_map::NodeData::Entry(entry) => {
695 dirstate_map::NodeData::Entry(entry) => {
696 Entry::from_dirstate_entry(entry)
696 Entry::from_dirstate_entry(entry)
697 }
697 }
698 dirstate_map::NodeData::CachedDirectory { mtime } => {
698 dirstate_map::NodeData::CachedDirectory { mtime } => {
699 (Flags::HAS_MTIME, Entry::from_timestamp(*mtime))
699 (Flags::HAS_MTIME, Entry::from_timestamp(*mtime))
700 }
700 }
701 dirstate_map::NodeData::None => (
701 dirstate_map::NodeData::None => (
702 Flags::empty(),
702 Flags::empty(),
703 Entry {
703 Entry {
704 mode: 0.into(),
704 mode: 0.into(),
705 size: 0.into(),
705 size: 0.into(),
706 mtime: 0.into(),
706 mtime: 0.into(),
707 },
707 },
708 ),
708 ),
709 };
709 };
710 Node {
710 Node {
711 children,
711 children,
712 copy_source,
712 copy_source,
713 full_path,
713 full_path,
714 base_name_start: u16::try_from(path.base_name_start())
714 base_name_start: u16::try_from(path.base_name_start())
715 // Could only panic for paths over 64 KiB
715 // Could only panic for paths over 64 KiB
716 .expect("dirstate-v2 path length overflow")
716 .expect("dirstate-v2 path length overflow")
717 .into(),
717 .into(),
718 descendants_with_entry_count: node
718 descendants_with_entry_count: node
719 .descendants_with_entry_count
719 .descendants_with_entry_count
720 .into(),
720 .into(),
721 tracked_descendants_count: node
721 tracked_descendants_count: node
722 .tracked_descendants_count
722 .tracked_descendants_count
723 .into(),
723 .into(),
724 flags,
724 flags,
725 data,
725 data,
726 }
726 }
727 }
727 }
728 NodeRef::OnDisk(node) => Node {
728 NodeRef::OnDisk(node) => Node {
729 children,
729 children,
730 copy_source,
730 copy_source,
731 full_path,
731 full_path,
732 ..*node
732 ..*node
733 },
733 },
734 })
734 })
735 }
735 }
736 // … so we can write them contiguously, after writing everything else
736 // … so we can write them contiguously, after writing everything else
737 // they refer to.
737 // they refer to.
738 let start = self.current_offset();
738 let start = self.current_offset();
739 let len = child_nodes_len_from_usize(nodes_len);
739 let len = child_nodes_len_from_usize(nodes_len);
740 self.out.extend(on_disk_nodes.as_bytes());
740 self.out.extend(on_disk_nodes.as_bytes());
741 Ok(ChildNodes { start, len })
741 Ok(ChildNodes { start, len })
742 }
742 }
743
743
744 /// If the given slice of items is within `on_disk`, returns its offset
744 /// If the given slice of items is within `on_disk`, returns its offset
745 /// from the start of `on_disk`.
745 /// from the start of `on_disk`.
746 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
746 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
747 where
747 where
748 T: BytesCast,
748 T: BytesCast,
749 {
749 {
750 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
750 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
751 let start = slice.as_ptr() as usize;
751 let start = slice.as_ptr() as usize;
752 let end = start + slice.len();
752 let end = start + slice.len();
753 start..=end
753 start..=end
754 }
754 }
755 let slice_addresses = address_range(slice.as_bytes());
755 let slice_addresses = address_range(slice.as_bytes());
756 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
756 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
757 if on_disk_addresses.contains(slice_addresses.start())
757 if on_disk_addresses.contains(slice_addresses.start())
758 && on_disk_addresses.contains(slice_addresses.end())
758 && on_disk_addresses.contains(slice_addresses.end())
759 {
759 {
760 let offset = slice_addresses.start() - on_disk_addresses.start();
760 let offset = slice_addresses.start() - on_disk_addresses.start();
761 Some(offset_from_usize(offset))
761 Some(offset_from_usize(offset))
762 } else {
762 } else {
763 None
763 None
764 }
764 }
765 }
765 }
766
766
767 fn current_offset(&mut self) -> Offset {
767 fn current_offset(&mut self) -> Offset {
768 let mut offset = self.out.len();
768 let mut offset = self.out.len();
769 if self.append {
769 if self.append {
770 offset += self.dirstate_map.on_disk.len()
770 offset += self.dirstate_map.on_disk.len()
771 }
771 }
772 offset_from_usize(offset)
772 offset_from_usize(offset)
773 }
773 }
774
774
775 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
775 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
776 let len = path_len_from_usize(slice.len());
776 let len = path_len_from_usize(slice.len());
777 // Reuse an already-written path if possible
777 // Reuse an already-written path if possible
778 if self.append {
778 if self.append {
779 if let Some(start) = self.on_disk_offset_of(slice) {
779 if let Some(start) = self.on_disk_offset_of(slice) {
780 return PathSlice { start, len };
780 return PathSlice { start, len };
781 }
781 }
782 }
782 }
783 let start = self.current_offset();
783 let start = self.current_offset();
784 self.out.extend(slice.as_bytes());
784 self.out.extend(slice.as_bytes());
785 PathSlice { start, len }
785 PathSlice { start, len }
786 }
786 }
787 }
787 }
788
788
789 fn offset_from_usize(x: usize) -> Offset {
789 fn offset_from_usize(x: usize) -> Offset {
790 u32::try_from(x)
790 u32::try_from(x)
791 // Could only panic for a dirstate file larger than 4 GiB
791 // Could only panic for a dirstate file larger than 4 GiB
792 .expect("dirstate-v2 offset overflow")
792 .expect("dirstate-v2 offset overflow")
793 .into()
793 .into()
794 }
794 }
795
795
796 fn child_nodes_len_from_usize(x: usize) -> Size {
796 fn child_nodes_len_from_usize(x: usize) -> Size {
797 u32::try_from(x)
797 u32::try_from(x)
798 // Could only panic with over 4 billion nodes
798 // Could only panic with over 4 billion nodes
799 .expect("dirstate-v2 slice length overflow")
799 .expect("dirstate-v2 slice length overflow")
800 .into()
800 .into()
801 }
801 }
802
802
803 fn path_len_from_usize(x: usize) -> PathSize {
803 fn path_len_from_usize(x: usize) -> PathSize {
804 u16::try_from(x)
804 u16::try_from(x)
805 // Could only panic for paths over 64 KiB
805 // Could only panic for paths over 64 KiB
806 .expect("dirstate-v2 path length overflow")
806 .expect("dirstate-v2 path length overflow")
807 .into()
807 .into()
808 }
808 }
General Comments 0
You need to be logged in to leave comments. Login now