##// END OF EJS Templates
dirstate-v2: Remove the `.d` suffix in data file names...
Simon Sapin -
r48780:681851d6 default
parent child Browse files
Show More
@@ -1,75 +1,75
1 # dirstatedocket.py - docket file for dirstate-v2
1 # dirstatedocket.py - docket file for dirstate-v2
2 #
2 #
3 # Copyright Mercurial Contributors
3 # Copyright Mercurial Contributors
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11
11
12 from ..revlogutils import docket as docket_mod
12 from ..revlogutils import docket as docket_mod
13
13
14
14
15 V2_FORMAT_MARKER = b"dirstate-v2\n"
15 V2_FORMAT_MARKER = b"dirstate-v2\n"
16
16
17 # Must match the constant of the same name in
17 # Must match the constant of the same name in
18 # `rust/hg-core/src/dirstate_tree/on_disk.rs`
18 # `rust/hg-core/src/dirstate_tree/on_disk.rs`
19 TREE_METADATA_SIZE = 44
19 TREE_METADATA_SIZE = 44
20
20
21 # * 12 bytes: format marker
21 # * 12 bytes: format marker
22 # * 32 bytes: node ID of the working directory's first parent
22 # * 32 bytes: node ID of the working directory's first parent
23 # * 32 bytes: node ID of the working directory's second parent
23 # * 32 bytes: node ID of the working directory's second parent
24 # * 4 bytes: big-endian used size of the data file
24 # * 4 bytes: big-endian used size of the data file
25 # * {TREE_METADATA_SIZE} bytes: tree metadata, parsed separately
25 # * {TREE_METADATA_SIZE} bytes: tree metadata, parsed separately
26 # * 1 byte: length of the data file's UUID
26 # * 1 byte: length of the data file's UUID
27 # * variable: data file's UUID
27 # * variable: data file's UUID
28 #
28 #
29 # Node IDs are null-padded if shorter than 32 bytes.
29 # Node IDs are null-padded if shorter than 32 bytes.
30 # A data file shorter than the specified used size is corrupted (truncated)
30 # A data file shorter than the specified used size is corrupted (truncated)
31 HEADER = struct.Struct(
31 HEADER = struct.Struct(
32 ">{}s32s32sL{}sB".format(len(V2_FORMAT_MARKER), TREE_METADATA_SIZE)
32 ">{}s32s32sL{}sB".format(len(V2_FORMAT_MARKER), TREE_METADATA_SIZE)
33 )
33 )
34
34
35
35
36 class DirstateDocket(object):
36 class DirstateDocket(object):
37 data_filename_pattern = b'dirstate.%s.d'
37 data_filename_pattern = b'dirstate.%s'
38
38
39 def __init__(self, parents, data_size, tree_metadata, uuid):
39 def __init__(self, parents, data_size, tree_metadata, uuid):
40 self.parents = parents
40 self.parents = parents
41 self.data_size = data_size
41 self.data_size = data_size
42 self.tree_metadata = tree_metadata
42 self.tree_metadata = tree_metadata
43 self.uuid = uuid
43 self.uuid = uuid
44
44
45 @classmethod
45 @classmethod
46 def with_new_uuid(cls, parents, data_size, tree_metadata):
46 def with_new_uuid(cls, parents, data_size, tree_metadata):
47 return cls(parents, data_size, tree_metadata, docket_mod.make_uid())
47 return cls(parents, data_size, tree_metadata, docket_mod.make_uid())
48
48
49 @classmethod
49 @classmethod
50 def parse(cls, data, nodeconstants):
50 def parse(cls, data, nodeconstants):
51 if not data:
51 if not data:
52 parents = (nodeconstants.nullid, nodeconstants.nullid)
52 parents = (nodeconstants.nullid, nodeconstants.nullid)
53 return cls(parents, 0, b'', None)
53 return cls(parents, 0, b'', None)
54 marker, p1, p2, data_size, meta, uuid_size = HEADER.unpack_from(data)
54 marker, p1, p2, data_size, meta, uuid_size = HEADER.unpack_from(data)
55 if marker != V2_FORMAT_MARKER:
55 if marker != V2_FORMAT_MARKER:
56 raise ValueError("expected dirstate-v2 marker")
56 raise ValueError("expected dirstate-v2 marker")
57 uuid = data[HEADER.size : HEADER.size + uuid_size]
57 uuid = data[HEADER.size : HEADER.size + uuid_size]
58 p1 = p1[: nodeconstants.nodelen]
58 p1 = p1[: nodeconstants.nodelen]
59 p2 = p2[: nodeconstants.nodelen]
59 p2 = p2[: nodeconstants.nodelen]
60 return cls((p1, p2), data_size, meta, uuid)
60 return cls((p1, p2), data_size, meta, uuid)
61
61
62 def serialize(self):
62 def serialize(self):
63 p1, p2 = self.parents
63 p1, p2 = self.parents
64 header = HEADER.pack(
64 header = HEADER.pack(
65 V2_FORMAT_MARKER,
65 V2_FORMAT_MARKER,
66 p1,
66 p1,
67 p2,
67 p2,
68 self.data_size,
68 self.data_size,
69 self.tree_metadata,
69 self.tree_metadata,
70 len(self.uuid),
70 len(self.uuid),
71 )
71 )
72 return header + self.uuid
72 return header + self.uuid
73
73
74 def data_filename(self):
74 def data_filename(self):
75 return self.data_filename_pattern % self.uuid
75 return self.data_filename_pattern % self.uuid
@@ -1,760 +1,760
1 //! The "version 2" disk representation of the dirstate
1 //! The "version 2" disk representation of the dirstate
2 //!
2 //!
3 //! # File format
3 //! # File format
4 //!
4 //!
5 //! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts
5 //! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts
6 //! with a fixed-sized header whose layout is defined by the `DocketHeader`
6 //! with a fixed-sized header whose layout is defined by the `DocketHeader`
7 //! struct, followed by the data file identifier.
7 //! struct, followed by the data file identifier.
8 //!
8 //!
9 //! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That
9 //! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That
10 //! file may be longer than the size given in the docket, but not shorter. Only
10 //! file may be longer than the size given in the docket, but not shorter. Only
11 //! the start of the data file up to the given size is considered. The
11 //! the start of the data file up to the given size is considered. The
12 //! fixed-size "root" of the dirstate tree whose layout is defined by the
12 //! fixed-size "root" of the dirstate tree whose layout is defined by the
13 //! `Root` struct is found at the end of that slice of data.
13 //! `Root` struct is found at the end of that slice of data.
14 //!
14 //!
15 //! Its `root_nodes` field contains the slice (offset and length) to
15 //! Its `root_nodes` field contains the slice (offset and length) to
16 //! the nodes representing the files and directories at the root of the
16 //! the nodes representing the files and directories at the root of the
17 //! repository. Each node is also fixed-size, defined by the `Node` struct.
17 //! repository. Each node is also fixed-size, defined by the `Node` struct.
18 //! Nodes in turn contain slices to variable-size paths, and to their own child
18 //! Nodes in turn contain slices to variable-size paths, and to their own child
19 //! nodes (if any) for nested files and directories.
19 //! nodes (if any) for nested files and directories.
20
20
21 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
21 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
22 use crate::dirstate_tree::path_with_basename::WithBasename;
22 use crate::dirstate_tree::path_with_basename::WithBasename;
23 use crate::errors::HgError;
23 use crate::errors::HgError;
24 use crate::utils::hg_path::HgPath;
24 use crate::utils::hg_path::HgPath;
25 use crate::DirstateEntry;
25 use crate::DirstateEntry;
26 use crate::DirstateError;
26 use crate::DirstateError;
27 use crate::DirstateParents;
27 use crate::DirstateParents;
28 use crate::EntryState;
28 use crate::EntryState;
29 use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be};
29 use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be};
30 use bytes_cast::BytesCast;
30 use bytes_cast::BytesCast;
31 use format_bytes::format_bytes;
31 use format_bytes::format_bytes;
32 use std::borrow::Cow;
32 use std::borrow::Cow;
33 use std::convert::{TryFrom, TryInto};
33 use std::convert::{TryFrom, TryInto};
34 use std::time::{Duration, SystemTime, UNIX_EPOCH};
34 use std::time::{Duration, SystemTime, UNIX_EPOCH};
35
35
36 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
36 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
37 /// This a redundant sanity check more than an actual "magic number" since
37 /// This a redundant sanity check more than an actual "magic number" since
38 /// `.hg/requires` already governs which format should be used.
38 /// `.hg/requires` already governs which format should be used.
39 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
39 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
40
40
41 /// Keep space for 256-bit hashes
41 /// Keep space for 256-bit hashes
42 const STORED_NODE_ID_BYTES: usize = 32;
42 const STORED_NODE_ID_BYTES: usize = 32;
43
43
44 /// … even though only 160 bits are used for now, with SHA-1
44 /// … even though only 160 bits are used for now, with SHA-1
45 const USED_NODE_ID_BYTES: usize = 20;
45 const USED_NODE_ID_BYTES: usize = 20;
46
46
47 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
47 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
48 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
48 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
49
49
50 /// Must match the constant of the same name in
50 /// Must match the constant of the same name in
51 /// `mercurial/dirstateutils/docket.py`
51 /// `mercurial/dirstateutils/docket.py`
52 const TREE_METADATA_SIZE: usize = 44;
52 const TREE_METADATA_SIZE: usize = 44;
53
53
54 /// Make sure that size-affecting changes are made knowingly
54 /// Make sure that size-affecting changes are made knowingly
55 #[allow(unused)]
55 #[allow(unused)]
56 fn static_assert_size_of() {
56 fn static_assert_size_of() {
57 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
57 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
58 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
58 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
59 let _ = std::mem::transmute::<Node, [u8; 43]>;
59 let _ = std::mem::transmute::<Node, [u8; 43]>;
60 }
60 }
61
61
62 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
62 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
63 #[derive(BytesCast)]
63 #[derive(BytesCast)]
64 #[repr(C)]
64 #[repr(C)]
65 struct DocketHeader {
65 struct DocketHeader {
66 marker: [u8; V2_FORMAT_MARKER.len()],
66 marker: [u8; V2_FORMAT_MARKER.len()],
67 parent_1: [u8; STORED_NODE_ID_BYTES],
67 parent_1: [u8; STORED_NODE_ID_BYTES],
68 parent_2: [u8; STORED_NODE_ID_BYTES],
68 parent_2: [u8; STORED_NODE_ID_BYTES],
69
69
70 /// Counted in bytes
70 /// Counted in bytes
71 data_size: Size,
71 data_size: Size,
72
72
73 metadata: TreeMetadata,
73 metadata: TreeMetadata,
74
74
75 uuid_size: u8,
75 uuid_size: u8,
76 }
76 }
77
77
78 pub struct Docket<'on_disk> {
78 pub struct Docket<'on_disk> {
79 header: &'on_disk DocketHeader,
79 header: &'on_disk DocketHeader,
80 uuid: &'on_disk [u8],
80 uuid: &'on_disk [u8],
81 }
81 }
82
82
83 #[derive(BytesCast)]
83 #[derive(BytesCast)]
84 #[repr(C)]
84 #[repr(C)]
85 struct TreeMetadata {
85 struct TreeMetadata {
86 root_nodes: ChildNodes,
86 root_nodes: ChildNodes,
87 nodes_with_entry_count: Size,
87 nodes_with_entry_count: Size,
88 nodes_with_copy_source_count: Size,
88 nodes_with_copy_source_count: Size,
89
89
90 /// How many bytes of this data file are not used anymore
90 /// How many bytes of this data file are not used anymore
91 unreachable_bytes: Size,
91 unreachable_bytes: Size,
92
92
93 /// Current version always sets these bytes to zero when creating or
93 /// Current version always sets these bytes to zero when creating or
94 /// updating a dirstate. Future versions could assign some bits to signal
94 /// updating a dirstate. Future versions could assign some bits to signal
95 /// for example "the version that last wrote/updated this dirstate did so
95 /// for example "the version that last wrote/updated this dirstate did so
96 /// in such and such way that can be relied on by versions that know to."
96 /// in such and such way that can be relied on by versions that know to."
97 unused: [u8; 4],
97 unused: [u8; 4],
98
98
99 /// If non-zero, a hash of ignore files that were used for some previous
99 /// If non-zero, a hash of ignore files that were used for some previous
100 /// run of the `status` algorithm.
100 /// run of the `status` algorithm.
101 ///
101 ///
102 /// We define:
102 /// We define:
103 ///
103 ///
104 /// * "Root" ignore files are `.hgignore` at the root of the repository if
104 /// * "Root" ignore files are `.hgignore` at the root of the repository if
105 /// it exists, and files from `ui.ignore.*` config. This set of files is
105 /// it exists, and files from `ui.ignore.*` config. This set of files is
106 /// then sorted by the string representation of their path.
106 /// then sorted by the string representation of their path.
107 /// * The "expanded contents" of an ignore files is the byte string made
107 /// * The "expanded contents" of an ignore files is the byte string made
108 /// by concatenating its contents with the "expanded contents" of other
108 /// by concatenating its contents with the "expanded contents" of other
109 /// files included with `include:` or `subinclude:` files, in inclusion
109 /// files included with `include:` or `subinclude:` files, in inclusion
110 /// order. This definition is recursive, as included files can
110 /// order. This definition is recursive, as included files can
111 /// themselves include more files.
111 /// themselves include more files.
112 ///
112 ///
113 /// This hash is defined as the SHA-1 of the concatenation (in sorted
113 /// This hash is defined as the SHA-1 of the concatenation (in sorted
114 /// order) of the "expanded contents" of each "root" ignore file.
114 /// order) of the "expanded contents" of each "root" ignore file.
115 /// (Note that computing this does not require actually concatenating byte
115 /// (Note that computing this does not require actually concatenating byte
116 /// strings into contiguous memory, instead SHA-1 hashing can be done
116 /// strings into contiguous memory, instead SHA-1 hashing can be done
117 /// incrementally.)
117 /// incrementally.)
118 ignore_patterns_hash: IgnorePatternsHash,
118 ignore_patterns_hash: IgnorePatternsHash,
119 }
119 }
120
120
121 #[derive(BytesCast)]
121 #[derive(BytesCast)]
122 #[repr(C)]
122 #[repr(C)]
123 pub(super) struct Node {
123 pub(super) struct Node {
124 full_path: PathSlice,
124 full_path: PathSlice,
125
125
126 /// In bytes from `self.full_path.start`
126 /// In bytes from `self.full_path.start`
127 base_name_start: PathSize,
127 base_name_start: PathSize,
128
128
129 copy_source: OptPathSlice,
129 copy_source: OptPathSlice,
130 children: ChildNodes,
130 children: ChildNodes,
131 pub(super) descendants_with_entry_count: Size,
131 pub(super) descendants_with_entry_count: Size,
132 pub(super) tracked_descendants_count: Size,
132 pub(super) tracked_descendants_count: Size,
133
133
134 /// Depending on the value of `state`:
134 /// Depending on the value of `state`:
135 ///
135 ///
136 /// * A null byte: `data` is not used.
136 /// * A null byte: `data` is not used.
137 ///
137 ///
138 /// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together
138 /// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together
139 /// represent a dirstate entry like in the v1 format.
139 /// represent a dirstate entry like in the v1 format.
140 ///
140 ///
141 /// * A `d` ASCII byte: the bytes of `data` should instead be interpreted
141 /// * A `d` ASCII byte: the bytes of `data` should instead be interpreted
142 /// as the `Timestamp` for the mtime of a cached directory.
142 /// as the `Timestamp` for the mtime of a cached directory.
143 ///
143 ///
144 /// The presence of this state means that at some point, this path in
144 /// The presence of this state means that at some point, this path in
145 /// the working directory was observed:
145 /// the working directory was observed:
146 ///
146 ///
147 /// - To be a directory
147 /// - To be a directory
148 /// - With the modification time as given by `Timestamp`
148 /// - With the modification time as given by `Timestamp`
149 /// - That timestamp was already strictly in the past when observed,
149 /// - That timestamp was already strictly in the past when observed,
150 /// meaning that later changes cannot happen in the same clock tick
150 /// meaning that later changes cannot happen in the same clock tick
151 /// and must cause a different modification time (unless the system
151 /// and must cause a different modification time (unless the system
152 /// clock jumps back and we get unlucky, which is not impossible but
152 /// clock jumps back and we get unlucky, which is not impossible but
153 /// but deemed unlikely enough).
153 /// but deemed unlikely enough).
154 /// - All direct children of this directory (as returned by
154 /// - All direct children of this directory (as returned by
155 /// `std::fs::read_dir`) either have a corresponding dirstate node, or
155 /// `std::fs::read_dir`) either have a corresponding dirstate node, or
156 /// are ignored by ignore patterns whose hash is in
156 /// are ignored by ignore patterns whose hash is in
157 /// `TreeMetadata::ignore_patterns_hash`.
157 /// `TreeMetadata::ignore_patterns_hash`.
158 ///
158 ///
159 /// This means that if `std::fs::symlink_metadata` later reports the
159 /// This means that if `std::fs::symlink_metadata` later reports the
160 /// same modification time and ignored patterns haven’t changed, a run
160 /// same modification time and ignored patterns haven’t changed, a run
161 /// of status that is not listing ignored files can skip calling
161 /// of status that is not listing ignored files can skip calling
162 /// `std::fs::read_dir` again for this directory, iterate child
162 /// `std::fs::read_dir` again for this directory, iterate child
163 /// dirstate nodes instead.
163 /// dirstate nodes instead.
164 state: u8,
164 state: u8,
165 data: Entry,
165 data: Entry,
166 }
166 }
167
167
168 #[derive(BytesCast, Copy, Clone)]
168 #[derive(BytesCast, Copy, Clone)]
169 #[repr(C)]
169 #[repr(C)]
170 struct Entry {
170 struct Entry {
171 mode: I32Be,
171 mode: I32Be,
172 mtime: I32Be,
172 mtime: I32Be,
173 size: I32Be,
173 size: I32Be,
174 }
174 }
175
175
176 /// Duration since the Unix epoch
176 /// Duration since the Unix epoch
177 #[derive(BytesCast, Copy, Clone, PartialEq)]
177 #[derive(BytesCast, Copy, Clone, PartialEq)]
178 #[repr(C)]
178 #[repr(C)]
179 pub(super) struct Timestamp {
179 pub(super) struct Timestamp {
180 seconds: I64Be,
180 seconds: I64Be,
181
181
182 /// In `0 .. 1_000_000_000`.
182 /// In `0 .. 1_000_000_000`.
183 ///
183 ///
184 /// This timestamp is later or earlier than `(seconds, 0)` by this many
184 /// This timestamp is later or earlier than `(seconds, 0)` by this many
185 /// nanoseconds, if `seconds` is non-negative or negative, respectively.
185 /// nanoseconds, if `seconds` is non-negative or negative, respectively.
186 nanoseconds: U32Be,
186 nanoseconds: U32Be,
187 }
187 }
188
188
189 /// Counted in bytes from the start of the file
189 /// Counted in bytes from the start of the file
190 ///
190 ///
191 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
191 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
192 type Offset = U32Be;
192 type Offset = U32Be;
193
193
194 /// Counted in number of items
194 /// Counted in number of items
195 ///
195 ///
196 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
196 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
197 type Size = U32Be;
197 type Size = U32Be;
198
198
199 /// Counted in bytes
199 /// Counted in bytes
200 ///
200 ///
201 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
201 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
202 type PathSize = U16Be;
202 type PathSize = U16Be;
203
203
204 /// A contiguous sequence of `len` times `Node`, representing the child nodes
204 /// A contiguous sequence of `len` times `Node`, representing the child nodes
205 /// of either some other node or of the repository root.
205 /// of either some other node or of the repository root.
206 ///
206 ///
207 /// Always sorted by ascending `full_path`, to allow binary search.
207 /// Always sorted by ascending `full_path`, to allow binary search.
208 /// Since nodes with the same parent nodes also have the same parent path,
208 /// Since nodes with the same parent nodes also have the same parent path,
209 /// only the `base_name`s need to be compared during binary search.
209 /// only the `base_name`s need to be compared during binary search.
210 #[derive(BytesCast, Copy, Clone)]
210 #[derive(BytesCast, Copy, Clone)]
211 #[repr(C)]
211 #[repr(C)]
212 struct ChildNodes {
212 struct ChildNodes {
213 start: Offset,
213 start: Offset,
214 len: Size,
214 len: Size,
215 }
215 }
216
216
217 /// A `HgPath` of `len` bytes
217 /// A `HgPath` of `len` bytes
218 #[derive(BytesCast, Copy, Clone)]
218 #[derive(BytesCast, Copy, Clone)]
219 #[repr(C)]
219 #[repr(C)]
220 struct PathSlice {
220 struct PathSlice {
221 start: Offset,
221 start: Offset,
222 len: PathSize,
222 len: PathSize,
223 }
223 }
224
224
225 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
225 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
226 type OptPathSlice = PathSlice;
226 type OptPathSlice = PathSlice;
227
227
228 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
228 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
229 ///
229 ///
230 /// This should only happen if Mercurial is buggy or a repository is corrupted.
230 /// This should only happen if Mercurial is buggy or a repository is corrupted.
231 #[derive(Debug)]
231 #[derive(Debug)]
232 pub struct DirstateV2ParseError;
232 pub struct DirstateV2ParseError;
233
233
234 impl From<DirstateV2ParseError> for HgError {
234 impl From<DirstateV2ParseError> for HgError {
235 fn from(_: DirstateV2ParseError) -> Self {
235 fn from(_: DirstateV2ParseError) -> Self {
236 HgError::corrupted("dirstate-v2 parse error")
236 HgError::corrupted("dirstate-v2 parse error")
237 }
237 }
238 }
238 }
239
239
240 impl From<DirstateV2ParseError> for crate::DirstateError {
240 impl From<DirstateV2ParseError> for crate::DirstateError {
241 fn from(error: DirstateV2ParseError) -> Self {
241 fn from(error: DirstateV2ParseError) -> Self {
242 HgError::from(error).into()
242 HgError::from(error).into()
243 }
243 }
244 }
244 }
245
245
246 impl<'on_disk> Docket<'on_disk> {
246 impl<'on_disk> Docket<'on_disk> {
247 pub fn parents(&self) -> DirstateParents {
247 pub fn parents(&self) -> DirstateParents {
248 use crate::Node;
248 use crate::Node;
249 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
249 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
250 .unwrap()
250 .unwrap()
251 .clone();
251 .clone();
252 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
252 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
253 .unwrap()
253 .unwrap()
254 .clone();
254 .clone();
255 DirstateParents { p1, p2 }
255 DirstateParents { p1, p2 }
256 }
256 }
257
257
258 pub fn tree_metadata(&self) -> &[u8] {
258 pub fn tree_metadata(&self) -> &[u8] {
259 self.header.metadata.as_bytes()
259 self.header.metadata.as_bytes()
260 }
260 }
261
261
262 pub fn data_size(&self) -> usize {
262 pub fn data_size(&self) -> usize {
263 // This `unwrap` could only panic on a 16-bit CPU
263 // This `unwrap` could only panic on a 16-bit CPU
264 self.header.data_size.get().try_into().unwrap()
264 self.header.data_size.get().try_into().unwrap()
265 }
265 }
266
266
267 pub fn data_filename(&self) -> String {
267 pub fn data_filename(&self) -> String {
268 String::from_utf8(format_bytes!(b"dirstate.{}.d", self.uuid)).unwrap()
268 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
269 }
269 }
270 }
270 }
271
271
272 pub fn read_docket(
272 pub fn read_docket(
273 on_disk: &[u8],
273 on_disk: &[u8],
274 ) -> Result<Docket<'_>, DirstateV2ParseError> {
274 ) -> Result<Docket<'_>, DirstateV2ParseError> {
275 let (header, uuid) =
275 let (header, uuid) =
276 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
276 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
277 let uuid_size = header.uuid_size as usize;
277 let uuid_size = header.uuid_size as usize;
278 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
278 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
279 Ok(Docket { header, uuid })
279 Ok(Docket { header, uuid })
280 } else {
280 } else {
281 Err(DirstateV2ParseError)
281 Err(DirstateV2ParseError)
282 }
282 }
283 }
283 }
284
284
285 pub(super) fn read<'on_disk>(
285 pub(super) fn read<'on_disk>(
286 on_disk: &'on_disk [u8],
286 on_disk: &'on_disk [u8],
287 metadata: &[u8],
287 metadata: &[u8],
288 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
288 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
289 if on_disk.is_empty() {
289 if on_disk.is_empty() {
290 return Ok(DirstateMap::empty(on_disk));
290 return Ok(DirstateMap::empty(on_disk));
291 }
291 }
292 let (meta, _) = TreeMetadata::from_bytes(metadata)
292 let (meta, _) = TreeMetadata::from_bytes(metadata)
293 .map_err(|_| DirstateV2ParseError)?;
293 .map_err(|_| DirstateV2ParseError)?;
294 let dirstate_map = DirstateMap {
294 let dirstate_map = DirstateMap {
295 on_disk,
295 on_disk,
296 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
296 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
297 on_disk,
297 on_disk,
298 meta.root_nodes,
298 meta.root_nodes,
299 )?),
299 )?),
300 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
300 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
301 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
301 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
302 ignore_patterns_hash: meta.ignore_patterns_hash,
302 ignore_patterns_hash: meta.ignore_patterns_hash,
303 unreachable_bytes: meta.unreachable_bytes.get(),
303 unreachable_bytes: meta.unreachable_bytes.get(),
304 };
304 };
305 Ok(dirstate_map)
305 Ok(dirstate_map)
306 }
306 }
307
307
308 impl Node {
308 impl Node {
309 pub(super) fn full_path<'on_disk>(
309 pub(super) fn full_path<'on_disk>(
310 &self,
310 &self,
311 on_disk: &'on_disk [u8],
311 on_disk: &'on_disk [u8],
312 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
312 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
313 read_hg_path(on_disk, self.full_path)
313 read_hg_path(on_disk, self.full_path)
314 }
314 }
315
315
316 pub(super) fn base_name_start<'on_disk>(
316 pub(super) fn base_name_start<'on_disk>(
317 &self,
317 &self,
318 ) -> Result<usize, DirstateV2ParseError> {
318 ) -> Result<usize, DirstateV2ParseError> {
319 let start = self.base_name_start.get();
319 let start = self.base_name_start.get();
320 if start < self.full_path.len.get() {
320 if start < self.full_path.len.get() {
321 let start = usize::try_from(start)
321 let start = usize::try_from(start)
322 // u32 -> usize, could only panic on a 16-bit CPU
322 // u32 -> usize, could only panic on a 16-bit CPU
323 .expect("dirstate-v2 base_name_start out of bounds");
323 .expect("dirstate-v2 base_name_start out of bounds");
324 Ok(start)
324 Ok(start)
325 } else {
325 } else {
326 Err(DirstateV2ParseError)
326 Err(DirstateV2ParseError)
327 }
327 }
328 }
328 }
329
329
330 pub(super) fn base_name<'on_disk>(
330 pub(super) fn base_name<'on_disk>(
331 &self,
331 &self,
332 on_disk: &'on_disk [u8],
332 on_disk: &'on_disk [u8],
333 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
333 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
334 let full_path = self.full_path(on_disk)?;
334 let full_path = self.full_path(on_disk)?;
335 let base_name_start = self.base_name_start()?;
335 let base_name_start = self.base_name_start()?;
336 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
336 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
337 }
337 }
338
338
339 pub(super) fn path<'on_disk>(
339 pub(super) fn path<'on_disk>(
340 &self,
340 &self,
341 on_disk: &'on_disk [u8],
341 on_disk: &'on_disk [u8],
342 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
342 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
343 Ok(WithBasename::from_raw_parts(
343 Ok(WithBasename::from_raw_parts(
344 Cow::Borrowed(self.full_path(on_disk)?),
344 Cow::Borrowed(self.full_path(on_disk)?),
345 self.base_name_start()?,
345 self.base_name_start()?,
346 ))
346 ))
347 }
347 }
348
348
349 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
349 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
350 self.copy_source.start.get() != 0
350 self.copy_source.start.get() != 0
351 }
351 }
352
352
353 pub(super) fn copy_source<'on_disk>(
353 pub(super) fn copy_source<'on_disk>(
354 &self,
354 &self,
355 on_disk: &'on_disk [u8],
355 on_disk: &'on_disk [u8],
356 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
356 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
357 Ok(if self.has_copy_source() {
357 Ok(if self.has_copy_source() {
358 Some(read_hg_path(on_disk, self.copy_source)?)
358 Some(read_hg_path(on_disk, self.copy_source)?)
359 } else {
359 } else {
360 None
360 None
361 })
361 })
362 }
362 }
363
363
364 pub(super) fn node_data(
364 pub(super) fn node_data(
365 &self,
365 &self,
366 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
366 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
367 let entry = |state| {
367 let entry = |state| {
368 dirstate_map::NodeData::Entry(self.entry_with_given_state(state))
368 dirstate_map::NodeData::Entry(self.entry_with_given_state(state))
369 };
369 };
370
370
371 match self.state {
371 match self.state {
372 b'\0' => Ok(dirstate_map::NodeData::None),
372 b'\0' => Ok(dirstate_map::NodeData::None),
373 b'd' => Ok(dirstate_map::NodeData::CachedDirectory {
373 b'd' => Ok(dirstate_map::NodeData::CachedDirectory {
374 mtime: *self.data.as_timestamp(),
374 mtime: *self.data.as_timestamp(),
375 }),
375 }),
376 b'n' => Ok(entry(EntryState::Normal)),
376 b'n' => Ok(entry(EntryState::Normal)),
377 b'a' => Ok(entry(EntryState::Added)),
377 b'a' => Ok(entry(EntryState::Added)),
378 b'r' => Ok(entry(EntryState::Removed)),
378 b'r' => Ok(entry(EntryState::Removed)),
379 b'm' => Ok(entry(EntryState::Merged)),
379 b'm' => Ok(entry(EntryState::Merged)),
380 _ => Err(DirstateV2ParseError),
380 _ => Err(DirstateV2ParseError),
381 }
381 }
382 }
382 }
383
383
384 pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {
384 pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {
385 if self.state == b'd' {
385 if self.state == b'd' {
386 Some(self.data.as_timestamp())
386 Some(self.data.as_timestamp())
387 } else {
387 } else {
388 None
388 None
389 }
389 }
390 }
390 }
391
391
392 pub(super) fn state(
392 pub(super) fn state(
393 &self,
393 &self,
394 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
394 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
395 match self.state {
395 match self.state {
396 b'\0' | b'd' => Ok(None),
396 b'\0' | b'd' => Ok(None),
397 b'n' => Ok(Some(EntryState::Normal)),
397 b'n' => Ok(Some(EntryState::Normal)),
398 b'a' => Ok(Some(EntryState::Added)),
398 b'a' => Ok(Some(EntryState::Added)),
399 b'r' => Ok(Some(EntryState::Removed)),
399 b'r' => Ok(Some(EntryState::Removed)),
400 b'm' => Ok(Some(EntryState::Merged)),
400 b'm' => Ok(Some(EntryState::Merged)),
401 _ => Err(DirstateV2ParseError),
401 _ => Err(DirstateV2ParseError),
402 }
402 }
403 }
403 }
404
404
405 fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry {
405 fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry {
406 DirstateEntry {
406 DirstateEntry {
407 state,
407 state,
408 mode: self.data.mode.get(),
408 mode: self.data.mode.get(),
409 mtime: self.data.mtime.get(),
409 mtime: self.data.mtime.get(),
410 size: self.data.size.get(),
410 size: self.data.size.get(),
411 }
411 }
412 }
412 }
413
413
414 pub(super) fn entry(
414 pub(super) fn entry(
415 &self,
415 &self,
416 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
416 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
417 Ok(self
417 Ok(self
418 .state()?
418 .state()?
419 .map(|state| self.entry_with_given_state(state)))
419 .map(|state| self.entry_with_given_state(state)))
420 }
420 }
421
421
422 pub(super) fn children<'on_disk>(
422 pub(super) fn children<'on_disk>(
423 &self,
423 &self,
424 on_disk: &'on_disk [u8],
424 on_disk: &'on_disk [u8],
425 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
425 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
426 read_nodes(on_disk, self.children)
426 read_nodes(on_disk, self.children)
427 }
427 }
428
428
429 pub(super) fn to_in_memory_node<'on_disk>(
429 pub(super) fn to_in_memory_node<'on_disk>(
430 &self,
430 &self,
431 on_disk: &'on_disk [u8],
431 on_disk: &'on_disk [u8],
432 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
432 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
433 Ok(dirstate_map::Node {
433 Ok(dirstate_map::Node {
434 children: dirstate_map::ChildNodes::OnDisk(
434 children: dirstate_map::ChildNodes::OnDisk(
435 self.children(on_disk)?,
435 self.children(on_disk)?,
436 ),
436 ),
437 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
437 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
438 data: self.node_data()?,
438 data: self.node_data()?,
439 descendants_with_entry_count: self
439 descendants_with_entry_count: self
440 .descendants_with_entry_count
440 .descendants_with_entry_count
441 .get(),
441 .get(),
442 tracked_descendants_count: self.tracked_descendants_count.get(),
442 tracked_descendants_count: self.tracked_descendants_count.get(),
443 })
443 })
444 }
444 }
445 }
445 }
446
446
447 impl Entry {
447 impl Entry {
448 fn from_timestamp(timestamp: Timestamp) -> Self {
448 fn from_timestamp(timestamp: Timestamp) -> Self {
449 // Safety: both types implement the `ByteCast` trait, so we could
449 // Safety: both types implement the `ByteCast` trait, so we could
450 // safely use `as_bytes` and `from_bytes` to do this conversion. Using
450 // safely use `as_bytes` and `from_bytes` to do this conversion. Using
451 // `transmute` instead makes the compiler check that the two types
451 // `transmute` instead makes the compiler check that the two types
452 // have the same size, which eliminates the error case of
452 // have the same size, which eliminates the error case of
453 // `from_bytes`.
453 // `from_bytes`.
454 unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }
454 unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }
455 }
455 }
456
456
457 fn as_timestamp(&self) -> &Timestamp {
457 fn as_timestamp(&self) -> &Timestamp {
458 // Safety: same as above in `from_timestamp`
458 // Safety: same as above in `from_timestamp`
459 unsafe { &*(self as *const Entry as *const Timestamp) }
459 unsafe { &*(self as *const Entry as *const Timestamp) }
460 }
460 }
461 }
461 }
462
462
463 impl Timestamp {
463 impl Timestamp {
464 pub fn seconds(&self) -> i64 {
464 pub fn seconds(&self) -> i64 {
465 self.seconds.get()
465 self.seconds.get()
466 }
466 }
467 }
467 }
468
468
469 impl From<SystemTime> for Timestamp {
469 impl From<SystemTime> for Timestamp {
470 fn from(system_time: SystemTime) -> Self {
470 fn from(system_time: SystemTime) -> Self {
471 let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {
471 let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {
472 Ok(duration) => {
472 Ok(duration) => {
473 (duration.as_secs() as i64, duration.subsec_nanos())
473 (duration.as_secs() as i64, duration.subsec_nanos())
474 }
474 }
475 Err(error) => {
475 Err(error) => {
476 let negative = error.duration();
476 let negative = error.duration();
477 (-(negative.as_secs() as i64), negative.subsec_nanos())
477 (-(negative.as_secs() as i64), negative.subsec_nanos())
478 }
478 }
479 };
479 };
480 Timestamp {
480 Timestamp {
481 seconds: secs.into(),
481 seconds: secs.into(),
482 nanoseconds: nanos.into(),
482 nanoseconds: nanos.into(),
483 }
483 }
484 }
484 }
485 }
485 }
486
486
487 impl From<&'_ Timestamp> for SystemTime {
487 impl From<&'_ Timestamp> for SystemTime {
488 fn from(timestamp: &'_ Timestamp) -> Self {
488 fn from(timestamp: &'_ Timestamp) -> Self {
489 let secs = timestamp.seconds.get();
489 let secs = timestamp.seconds.get();
490 let nanos = timestamp.nanoseconds.get();
490 let nanos = timestamp.nanoseconds.get();
491 if secs >= 0 {
491 if secs >= 0 {
492 UNIX_EPOCH + Duration::new(secs as u64, nanos)
492 UNIX_EPOCH + Duration::new(secs as u64, nanos)
493 } else {
493 } else {
494 UNIX_EPOCH - Duration::new((-secs) as u64, nanos)
494 UNIX_EPOCH - Duration::new((-secs) as u64, nanos)
495 }
495 }
496 }
496 }
497 }
497 }
498
498
499 fn read_hg_path(
499 fn read_hg_path(
500 on_disk: &[u8],
500 on_disk: &[u8],
501 slice: PathSlice,
501 slice: PathSlice,
502 ) -> Result<&HgPath, DirstateV2ParseError> {
502 ) -> Result<&HgPath, DirstateV2ParseError> {
503 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
503 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
504 }
504 }
505
505
506 fn read_nodes(
506 fn read_nodes(
507 on_disk: &[u8],
507 on_disk: &[u8],
508 slice: ChildNodes,
508 slice: ChildNodes,
509 ) -> Result<&[Node], DirstateV2ParseError> {
509 ) -> Result<&[Node], DirstateV2ParseError> {
510 read_slice(on_disk, slice.start, slice.len.get())
510 read_slice(on_disk, slice.start, slice.len.get())
511 }
511 }
512
512
513 fn read_slice<T, Len>(
513 fn read_slice<T, Len>(
514 on_disk: &[u8],
514 on_disk: &[u8],
515 start: Offset,
515 start: Offset,
516 len: Len,
516 len: Len,
517 ) -> Result<&[T], DirstateV2ParseError>
517 ) -> Result<&[T], DirstateV2ParseError>
518 where
518 where
519 T: BytesCast,
519 T: BytesCast,
520 Len: TryInto<usize>,
520 Len: TryInto<usize>,
521 {
521 {
522 // Either `usize::MAX` would result in "out of bounds" error since a single
522 // Either `usize::MAX` would result in "out of bounds" error since a single
523 // `&[u8]` cannot occupy the entire addess space.
523 // `&[u8]` cannot occupy the entire addess space.
524 let start = start.get().try_into().unwrap_or(std::usize::MAX);
524 let start = start.get().try_into().unwrap_or(std::usize::MAX);
525 let len = len.try_into().unwrap_or(std::usize::MAX);
525 let len = len.try_into().unwrap_or(std::usize::MAX);
526 on_disk
526 on_disk
527 .get(start..)
527 .get(start..)
528 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
528 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
529 .map(|(slice, _rest)| slice)
529 .map(|(slice, _rest)| slice)
530 .ok_or_else(|| DirstateV2ParseError)
530 .ok_or_else(|| DirstateV2ParseError)
531 }
531 }
532
532
533 pub(crate) fn for_each_tracked_path<'on_disk>(
533 pub(crate) fn for_each_tracked_path<'on_disk>(
534 on_disk: &'on_disk [u8],
534 on_disk: &'on_disk [u8],
535 metadata: &[u8],
535 metadata: &[u8],
536 mut f: impl FnMut(&'on_disk HgPath),
536 mut f: impl FnMut(&'on_disk HgPath),
537 ) -> Result<(), DirstateV2ParseError> {
537 ) -> Result<(), DirstateV2ParseError> {
538 let (meta, _) = TreeMetadata::from_bytes(metadata)
538 let (meta, _) = TreeMetadata::from_bytes(metadata)
539 .map_err(|_| DirstateV2ParseError)?;
539 .map_err(|_| DirstateV2ParseError)?;
540 fn recur<'on_disk>(
540 fn recur<'on_disk>(
541 on_disk: &'on_disk [u8],
541 on_disk: &'on_disk [u8],
542 nodes: ChildNodes,
542 nodes: ChildNodes,
543 f: &mut impl FnMut(&'on_disk HgPath),
543 f: &mut impl FnMut(&'on_disk HgPath),
544 ) -> Result<(), DirstateV2ParseError> {
544 ) -> Result<(), DirstateV2ParseError> {
545 for node in read_nodes(on_disk, nodes)? {
545 for node in read_nodes(on_disk, nodes)? {
546 if let Some(state) = node.state()? {
546 if let Some(state) = node.state()? {
547 if state.is_tracked() {
547 if state.is_tracked() {
548 f(node.full_path(on_disk)?)
548 f(node.full_path(on_disk)?)
549 }
549 }
550 }
550 }
551 recur(on_disk, node.children, f)?
551 recur(on_disk, node.children, f)?
552 }
552 }
553 Ok(())
553 Ok(())
554 }
554 }
555 recur(on_disk, meta.root_nodes, &mut f)
555 recur(on_disk, meta.root_nodes, &mut f)
556 }
556 }
557
557
558 /// Returns new data and metadata, together with whether that data should be
558 /// Returns new data and metadata, together with whether that data should be
559 /// appended to the existing data file whose content is at
559 /// appended to the existing data file whose content is at
560 /// `dirstate_map.on_disk` (true), instead of written to a new data file
560 /// `dirstate_map.on_disk` (true), instead of written to a new data file
561 /// (false).
561 /// (false).
562 pub(super) fn write(
562 pub(super) fn write(
563 dirstate_map: &mut DirstateMap,
563 dirstate_map: &mut DirstateMap,
564 can_append: bool,
564 can_append: bool,
565 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
565 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
566 let append = can_append && dirstate_map.write_should_append();
566 let append = can_append && dirstate_map.write_should_append();
567
567
568 // This ignores the space for paths, and for nodes without an entry.
568 // This ignores the space for paths, and for nodes without an entry.
569 // TODO: better estimate? Skip the `Vec` and write to a file directly?
569 // TODO: better estimate? Skip the `Vec` and write to a file directly?
570 let size_guess = std::mem::size_of::<Node>()
570 let size_guess = std::mem::size_of::<Node>()
571 * dirstate_map.nodes_with_entry_count as usize;
571 * dirstate_map.nodes_with_entry_count as usize;
572
572
573 let mut writer = Writer {
573 let mut writer = Writer {
574 dirstate_map,
574 dirstate_map,
575 append,
575 append,
576 out: Vec::with_capacity(size_guess),
576 out: Vec::with_capacity(size_guess),
577 };
577 };
578
578
579 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
579 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
580
580
581 let meta = TreeMetadata {
581 let meta = TreeMetadata {
582 root_nodes,
582 root_nodes,
583 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
583 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
584 nodes_with_copy_source_count: dirstate_map
584 nodes_with_copy_source_count: dirstate_map
585 .nodes_with_copy_source_count
585 .nodes_with_copy_source_count
586 .into(),
586 .into(),
587 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
587 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
588 unused: [0; 4],
588 unused: [0; 4],
589 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
589 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
590 };
590 };
591 Ok((writer.out, meta.as_bytes().to_vec(), append))
591 Ok((writer.out, meta.as_bytes().to_vec(), append))
592 }
592 }
593
593
594 struct Writer<'dmap, 'on_disk> {
594 struct Writer<'dmap, 'on_disk> {
595 dirstate_map: &'dmap DirstateMap<'on_disk>,
595 dirstate_map: &'dmap DirstateMap<'on_disk>,
596 append: bool,
596 append: bool,
597 out: Vec<u8>,
597 out: Vec<u8>,
598 }
598 }
599
599
600 impl Writer<'_, '_> {
600 impl Writer<'_, '_> {
601 fn write_nodes(
601 fn write_nodes(
602 &mut self,
602 &mut self,
603 nodes: dirstate_map::ChildNodesRef,
603 nodes: dirstate_map::ChildNodesRef,
604 ) -> Result<ChildNodes, DirstateError> {
604 ) -> Result<ChildNodes, DirstateError> {
605 // Reuse already-written nodes if possible
605 // Reuse already-written nodes if possible
606 if self.append {
606 if self.append {
607 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
607 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
608 let start = self.on_disk_offset_of(nodes_slice).expect(
608 let start = self.on_disk_offset_of(nodes_slice).expect(
609 "dirstate-v2 OnDisk nodes not found within on_disk",
609 "dirstate-v2 OnDisk nodes not found within on_disk",
610 );
610 );
611 let len = child_nodes_len_from_usize(nodes_slice.len());
611 let len = child_nodes_len_from_usize(nodes_slice.len());
612 return Ok(ChildNodes { start, len });
612 return Ok(ChildNodes { start, len });
613 }
613 }
614 }
614 }
615
615
616 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
616 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
617 // undefined iteration order. Sort to enable binary search in the
617 // undefined iteration order. Sort to enable binary search in the
618 // written file.
618 // written file.
619 let nodes = nodes.sorted();
619 let nodes = nodes.sorted();
620 let nodes_len = nodes.len();
620 let nodes_len = nodes.len();
621
621
622 // First accumulate serialized nodes in a `Vec`
622 // First accumulate serialized nodes in a `Vec`
623 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
623 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
624 for node in nodes {
624 for node in nodes {
625 let children =
625 let children =
626 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
626 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
627 let full_path = node.full_path(self.dirstate_map.on_disk)?;
627 let full_path = node.full_path(self.dirstate_map.on_disk)?;
628 let full_path = self.write_path(full_path.as_bytes());
628 let full_path = self.write_path(full_path.as_bytes());
629 let copy_source = if let Some(source) =
629 let copy_source = if let Some(source) =
630 node.copy_source(self.dirstate_map.on_disk)?
630 node.copy_source(self.dirstate_map.on_disk)?
631 {
631 {
632 self.write_path(source.as_bytes())
632 self.write_path(source.as_bytes())
633 } else {
633 } else {
634 PathSlice {
634 PathSlice {
635 start: 0.into(),
635 start: 0.into(),
636 len: 0.into(),
636 len: 0.into(),
637 }
637 }
638 };
638 };
639 on_disk_nodes.push(match node {
639 on_disk_nodes.push(match node {
640 NodeRef::InMemory(path, node) => {
640 NodeRef::InMemory(path, node) => {
641 let (state, data) = match &node.data {
641 let (state, data) = match &node.data {
642 dirstate_map::NodeData::Entry(entry) => (
642 dirstate_map::NodeData::Entry(entry) => (
643 entry.state.into(),
643 entry.state.into(),
644 Entry {
644 Entry {
645 mode: entry.mode.into(),
645 mode: entry.mode.into(),
646 mtime: entry.mtime.into(),
646 mtime: entry.mtime.into(),
647 size: entry.size.into(),
647 size: entry.size.into(),
648 },
648 },
649 ),
649 ),
650 dirstate_map::NodeData::CachedDirectory { mtime } => {
650 dirstate_map::NodeData::CachedDirectory { mtime } => {
651 (b'd', Entry::from_timestamp(*mtime))
651 (b'd', Entry::from_timestamp(*mtime))
652 }
652 }
653 dirstate_map::NodeData::None => (
653 dirstate_map::NodeData::None => (
654 b'\0',
654 b'\0',
655 Entry {
655 Entry {
656 mode: 0.into(),
656 mode: 0.into(),
657 mtime: 0.into(),
657 mtime: 0.into(),
658 size: 0.into(),
658 size: 0.into(),
659 },
659 },
660 ),
660 ),
661 };
661 };
662 Node {
662 Node {
663 children,
663 children,
664 copy_source,
664 copy_source,
665 full_path,
665 full_path,
666 base_name_start: u16::try_from(path.base_name_start())
666 base_name_start: u16::try_from(path.base_name_start())
667 // Could only panic for paths over 64 KiB
667 // Could only panic for paths over 64 KiB
668 .expect("dirstate-v2 path length overflow")
668 .expect("dirstate-v2 path length overflow")
669 .into(),
669 .into(),
670 descendants_with_entry_count: node
670 descendants_with_entry_count: node
671 .descendants_with_entry_count
671 .descendants_with_entry_count
672 .into(),
672 .into(),
673 tracked_descendants_count: node
673 tracked_descendants_count: node
674 .tracked_descendants_count
674 .tracked_descendants_count
675 .into(),
675 .into(),
676 state,
676 state,
677 data,
677 data,
678 }
678 }
679 }
679 }
680 NodeRef::OnDisk(node) => Node {
680 NodeRef::OnDisk(node) => Node {
681 children,
681 children,
682 copy_source,
682 copy_source,
683 full_path,
683 full_path,
684 ..*node
684 ..*node
685 },
685 },
686 })
686 })
687 }
687 }
688 // … so we can write them contiguously, after writing everything else
688 // … so we can write them contiguously, after writing everything else
689 // they refer to.
689 // they refer to.
690 let start = self.current_offset();
690 let start = self.current_offset();
691 let len = child_nodes_len_from_usize(nodes_len);
691 let len = child_nodes_len_from_usize(nodes_len);
692 self.out.extend(on_disk_nodes.as_bytes());
692 self.out.extend(on_disk_nodes.as_bytes());
693 Ok(ChildNodes { start, len })
693 Ok(ChildNodes { start, len })
694 }
694 }
695
695
696 /// If the given slice of items is within `on_disk`, returns its offset
696 /// If the given slice of items is within `on_disk`, returns its offset
697 /// from the start of `on_disk`.
697 /// from the start of `on_disk`.
698 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
698 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
699 where
699 where
700 T: BytesCast,
700 T: BytesCast,
701 {
701 {
702 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
702 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
703 let start = slice.as_ptr() as usize;
703 let start = slice.as_ptr() as usize;
704 let end = start + slice.len();
704 let end = start + slice.len();
705 start..=end
705 start..=end
706 }
706 }
707 let slice_addresses = address_range(slice.as_bytes());
707 let slice_addresses = address_range(slice.as_bytes());
708 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
708 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
709 if on_disk_addresses.contains(slice_addresses.start())
709 if on_disk_addresses.contains(slice_addresses.start())
710 && on_disk_addresses.contains(slice_addresses.end())
710 && on_disk_addresses.contains(slice_addresses.end())
711 {
711 {
712 let offset = slice_addresses.start() - on_disk_addresses.start();
712 let offset = slice_addresses.start() - on_disk_addresses.start();
713 Some(offset_from_usize(offset))
713 Some(offset_from_usize(offset))
714 } else {
714 } else {
715 None
715 None
716 }
716 }
717 }
717 }
718
718
719 fn current_offset(&mut self) -> Offset {
719 fn current_offset(&mut self) -> Offset {
720 let mut offset = self.out.len();
720 let mut offset = self.out.len();
721 if self.append {
721 if self.append {
722 offset += self.dirstate_map.on_disk.len()
722 offset += self.dirstate_map.on_disk.len()
723 }
723 }
724 offset_from_usize(offset)
724 offset_from_usize(offset)
725 }
725 }
726
726
727 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
727 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
728 let len = path_len_from_usize(slice.len());
728 let len = path_len_from_usize(slice.len());
729 // Reuse an already-written path if possible
729 // Reuse an already-written path if possible
730 if self.append {
730 if self.append {
731 if let Some(start) = self.on_disk_offset_of(slice) {
731 if let Some(start) = self.on_disk_offset_of(slice) {
732 return PathSlice { start, len };
732 return PathSlice { start, len };
733 }
733 }
734 }
734 }
735 let start = self.current_offset();
735 let start = self.current_offset();
736 self.out.extend(slice.as_bytes());
736 self.out.extend(slice.as_bytes());
737 PathSlice { start, len }
737 PathSlice { start, len }
738 }
738 }
739 }
739 }
740
740
741 fn offset_from_usize(x: usize) -> Offset {
741 fn offset_from_usize(x: usize) -> Offset {
742 u32::try_from(x)
742 u32::try_from(x)
743 // Could only panic for a dirstate file larger than 4 GiB
743 // Could only panic for a dirstate file larger than 4 GiB
744 .expect("dirstate-v2 offset overflow")
744 .expect("dirstate-v2 offset overflow")
745 .into()
745 .into()
746 }
746 }
747
747
748 fn child_nodes_len_from_usize(x: usize) -> Size {
748 fn child_nodes_len_from_usize(x: usize) -> Size {
749 u32::try_from(x)
749 u32::try_from(x)
750 // Could only panic with over 4 billion nodes
750 // Could only panic with over 4 billion nodes
751 .expect("dirstate-v2 slice length overflow")
751 .expect("dirstate-v2 slice length overflow")
752 .into()
752 .into()
753 }
753 }
754
754
755 fn path_len_from_usize(x: usize) -> PathSize {
755 fn path_len_from_usize(x: usize) -> PathSize {
756 u16::try_from(x)
756 u16::try_from(x)
757 // Could only panic for paths over 64 KiB
757 // Could only panic for paths over 64 KiB
758 .expect("dirstate-v2 path length overflow")
758 .expect("dirstate-v2 path length overflow")
759 .into()
759 .into()
760 }
760 }
General Comments 0
You need to be logged in to leave comments. Login now