Show More
@@ -1,656 +1,678 | |||||
1 | //! The "version 2" disk representation of the dirstate |
|
1 | //! The "version 2" disk representation of the dirstate | |
2 | //! |
|
2 | //! | |
3 | //! # File format |
|
3 | //! # File format | |
4 | //! |
|
4 | //! | |
5 | //! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts |
|
5 | //! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts | |
6 | //! with a fixed-sized header whose layout is defined by the `DocketHeader` |
|
6 | //! with a fixed-sized header whose layout is defined by the `DocketHeader` | |
7 | //! struct, followed by the data file identifier. |
|
7 | //! struct, followed by the data file identifier. | |
8 | //! |
|
8 | //! | |
9 | //! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That |
|
9 | //! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That | |
10 | //! file may be longer than the size given in the docket, but not shorter. Only |
|
10 | //! file may be longer than the size given in the docket, but not shorter. Only | |
11 | //! the start of the data file up to the given size is considered. The |
|
11 | //! the start of the data file up to the given size is considered. The | |
12 | //! fixed-size "root" of the dirstate tree whose layout is defined by the |
|
12 | //! fixed-size "root" of the dirstate tree whose layout is defined by the | |
13 | //! `Root` struct is found at the end of that slice of data. |
|
13 | //! `Root` struct is found at the end of that slice of data. | |
14 | //! |
|
14 | //! | |
15 | //! Its `root_nodes` field contains the slice (offset and length) to |
|
15 | //! Its `root_nodes` field contains the slice (offset and length) to | |
16 | //! the nodes representing the files and directories at the root of the |
|
16 | //! the nodes representing the files and directories at the root of the | |
17 | //! repository. Each node is also fixed-size, defined by the `Node` struct. |
|
17 | //! repository. Each node is also fixed-size, defined by the `Node` struct. | |
18 | //! Nodes in turn contain slices to variable-size paths, and to their own child |
|
18 | //! Nodes in turn contain slices to variable-size paths, and to their own child | |
19 | //! nodes (if any) for nested files and directories. |
|
19 | //! nodes (if any) for nested files and directories. | |
20 |
|
20 | |||
21 | use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef}; |
|
21 | use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef}; | |
22 | use crate::dirstate_tree::path_with_basename::WithBasename; |
|
22 | use crate::dirstate_tree::path_with_basename::WithBasename; | |
23 | use crate::errors::HgError; |
|
23 | use crate::errors::HgError; | |
24 | use crate::utils::hg_path::HgPath; |
|
24 | use crate::utils::hg_path::HgPath; | |
25 | use crate::DirstateEntry; |
|
25 | use crate::DirstateEntry; | |
26 | use crate::DirstateError; |
|
26 | use crate::DirstateError; | |
27 | use crate::DirstateParents; |
|
27 | use crate::DirstateParents; | |
28 | use crate::EntryState; |
|
28 | use crate::EntryState; | |
29 | use bytes_cast::unaligned::{I32Be, I64Be, U32Be}; |
|
29 | use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be}; | |
30 | use bytes_cast::BytesCast; |
|
30 | use bytes_cast::BytesCast; | |
31 | use format_bytes::format_bytes; |
|
31 | use format_bytes::format_bytes; | |
32 | use std::borrow::Cow; |
|
32 | use std::borrow::Cow; | |
33 | use std::convert::{TryFrom, TryInto}; |
|
33 | use std::convert::{TryFrom, TryInto}; | |
34 | use std::time::{Duration, SystemTime, UNIX_EPOCH}; |
|
34 | use std::time::{Duration, SystemTime, UNIX_EPOCH}; | |
35 |
|
35 | |||
36 | /// Added at the start of `.hg/dirstate` when the "v2" format is used. |
|
36 | /// Added at the start of `.hg/dirstate` when the "v2" format is used. | |
37 | /// This a redundant sanity check more than an actual "magic number" since |
|
37 | /// This a redundant sanity check more than an actual "magic number" since | |
38 | /// `.hg/requires` already governs which format should be used. |
|
38 | /// `.hg/requires` already governs which format should be used. | |
39 | pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n"; |
|
39 | pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n"; | |
40 |
|
40 | |||
41 | /// Keep space for 256-bit hashes |
|
41 | /// Keep space for 256-bit hashes | |
42 | const STORED_NODE_ID_BYTES: usize = 32; |
|
42 | const STORED_NODE_ID_BYTES: usize = 32; | |
43 |
|
43 | |||
44 | /// β¦ even though only 160 bits are used for now, with SHA-1 |
|
44 | /// β¦ even though only 160 bits are used for now, with SHA-1 | |
45 | const USED_NODE_ID_BYTES: usize = 20; |
|
45 | const USED_NODE_ID_BYTES: usize = 20; | |
46 |
|
46 | |||
47 | pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20; |
|
47 | pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20; | |
48 | pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN]; |
|
48 | pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN]; | |
49 |
|
49 | |||
50 | // Must match `HEADER` in `mercurial/dirstateutils/docket.py` |
|
50 | // Must match `HEADER` in `mercurial/dirstateutils/docket.py` | |
51 | #[derive(BytesCast)] |
|
51 | #[derive(BytesCast)] | |
52 | #[repr(C)] |
|
52 | #[repr(C)] | |
53 | struct DocketHeader { |
|
53 | struct DocketHeader { | |
54 | marker: [u8; V2_FORMAT_MARKER.len()], |
|
54 | marker: [u8; V2_FORMAT_MARKER.len()], | |
55 | parent_1: [u8; STORED_NODE_ID_BYTES], |
|
55 | parent_1: [u8; STORED_NODE_ID_BYTES], | |
56 | parent_2: [u8; STORED_NODE_ID_BYTES], |
|
56 | parent_2: [u8; STORED_NODE_ID_BYTES], | |
|
57 | ||||
|
58 | /// Counted in bytes | |||
57 | data_size: Size, |
|
59 | data_size: Size, | |
|
60 | ||||
58 | uuid_size: u8, |
|
61 | uuid_size: u8, | |
59 | } |
|
62 | } | |
60 |
|
63 | |||
61 | pub struct Docket<'on_disk> { |
|
64 | pub struct Docket<'on_disk> { | |
62 | header: &'on_disk DocketHeader, |
|
65 | header: &'on_disk DocketHeader, | |
63 | uuid: &'on_disk [u8], |
|
66 | uuid: &'on_disk [u8], | |
64 | } |
|
67 | } | |
65 |
|
68 | |||
66 | #[derive(BytesCast)] |
|
69 | #[derive(BytesCast)] | |
67 | #[repr(C)] |
|
70 | #[repr(C)] | |
68 | struct Root { |
|
71 | struct Root { | |
69 | root_nodes: ChildNodes, |
|
72 | root_nodes: ChildNodes, | |
70 | nodes_with_entry_count: Size, |
|
73 | nodes_with_entry_count: Size, | |
71 | nodes_with_copy_source_count: Size, |
|
74 | nodes_with_copy_source_count: Size, | |
72 |
|
75 | |||
73 | /// If non-zero, a hash of ignore files that were used for some previous |
|
76 | /// If non-zero, a hash of ignore files that were used for some previous | |
74 | /// run of the `status` algorithm. |
|
77 | /// run of the `status` algorithm. | |
75 | /// |
|
78 | /// | |
76 | /// We define: |
|
79 | /// We define: | |
77 | /// |
|
80 | /// | |
78 | /// * "Root" ignore files are `.hgignore` at the root of the repository if |
|
81 | /// * "Root" ignore files are `.hgignore` at the root of the repository if | |
79 | /// it exists, and files from `ui.ignore.*` config. This set of files is |
|
82 | /// it exists, and files from `ui.ignore.*` config. This set of files is | |
80 | /// then sorted by the string representation of their path. |
|
83 | /// then sorted by the string representation of their path. | |
81 | /// * The "expanded contents" of an ignore files is the byte string made |
|
84 | /// * The "expanded contents" of an ignore files is the byte string made | |
82 | /// by concatenating its contents with the "expanded contents" of other |
|
85 | /// by concatenating its contents with the "expanded contents" of other | |
83 | /// files included with `include:` or `subinclude:` files, in inclusion |
|
86 | /// files included with `include:` or `subinclude:` files, in inclusion | |
84 | /// order. This definition is recursive, as included files can |
|
87 | /// order. This definition is recursive, as included files can | |
85 | /// themselves include more files. |
|
88 | /// themselves include more files. | |
86 | /// |
|
89 | /// | |
87 | /// This hash is defined as the SHA-1 of the concatenation (in sorted |
|
90 | /// This hash is defined as the SHA-1 of the concatenation (in sorted | |
88 | /// order) of the "expanded contents" of each "root" ignore file. |
|
91 | /// order) of the "expanded contents" of each "root" ignore file. | |
89 | /// (Note that computing this does not require actually concatenating byte |
|
92 | /// (Note that computing this does not require actually concatenating byte | |
90 | /// strings into contiguous memory, instead SHA-1 hashing can be done |
|
93 | /// strings into contiguous memory, instead SHA-1 hashing can be done | |
91 | /// incrementally.) |
|
94 | /// incrementally.) | |
92 | ignore_patterns_hash: IgnorePatternsHash, |
|
95 | ignore_patterns_hash: IgnorePatternsHash, | |
93 | } |
|
96 | } | |
94 |
|
97 | |||
95 | #[derive(BytesCast)] |
|
98 | #[derive(BytesCast)] | |
96 | #[repr(C)] |
|
99 | #[repr(C)] | |
97 | pub(super) struct Node { |
|
100 | pub(super) struct Node { | |
98 | full_path: PathSlice, |
|
101 | full_path: PathSlice, | |
99 |
|
102 | |||
100 | /// In bytes from `self.full_path.start` |
|
103 | /// In bytes from `self.full_path.start` | |
101 | base_name_start: Size, |
|
104 | base_name_start: PathSize, | |
102 |
|
105 | |||
103 | copy_source: OptPathSlice, |
|
106 | copy_source: OptPathSlice, | |
104 | children: ChildNodes, |
|
107 | children: ChildNodes, | |
105 | pub(super) descendants_with_entry_count: Size, |
|
108 | pub(super) descendants_with_entry_count: Size, | |
106 | pub(super) tracked_descendants_count: Size, |
|
109 | pub(super) tracked_descendants_count: Size, | |
107 |
|
110 | |||
108 | /// Depending on the value of `state`: |
|
111 | /// Depending on the value of `state`: | |
109 | /// |
|
112 | /// | |
110 | /// * A null byte: `data` is not used. |
|
113 | /// * A null byte: `data` is not used. | |
111 | /// |
|
114 | /// | |
112 | /// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together |
|
115 | /// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together | |
113 | /// represent a dirstate entry like in the v1 format. |
|
116 | /// represent a dirstate entry like in the v1 format. | |
114 | /// |
|
117 | /// | |
115 | /// * A `d` ASCII byte: the bytes of `data` should instead be interpreted |
|
118 | /// * A `d` ASCII byte: the bytes of `data` should instead be interpreted | |
116 | /// as the `Timestamp` for the mtime of a cached directory. |
|
119 | /// as the `Timestamp` for the mtime of a cached directory. | |
117 | /// |
|
120 | /// | |
118 | /// The presence of this state means that at some point, this path in |
|
121 | /// The presence of this state means that at some point, this path in | |
119 | /// the working directory was observed: |
|
122 | /// the working directory was observed: | |
120 | /// |
|
123 | /// | |
121 | /// - To be a directory |
|
124 | /// - To be a directory | |
122 | /// - With the modification time as given by `Timestamp` |
|
125 | /// - With the modification time as given by `Timestamp` | |
123 | /// - That timestamp was already strictly in the past when observed, |
|
126 | /// - That timestamp was already strictly in the past when observed, | |
124 | /// meaning that later changes cannot happen in the same clock tick |
|
127 | /// meaning that later changes cannot happen in the same clock tick | |
125 | /// and must cause a different modification time (unless the system |
|
128 | /// and must cause a different modification time (unless the system | |
126 | /// clock jumps back and we get unlucky, which is not impossible but |
|
129 | /// clock jumps back and we get unlucky, which is not impossible but | |
127 | /// but deemed unlikely enough). |
|
130 | /// but deemed unlikely enough). | |
128 | /// - All direct children of this directory (as returned by |
|
131 | /// - All direct children of this directory (as returned by | |
129 | /// `std::fs::read_dir`) either have a corresponding dirstate node, or |
|
132 | /// `std::fs::read_dir`) either have a corresponding dirstate node, or | |
130 | /// are ignored by ignore patterns whose hash is in |
|
133 | /// are ignored by ignore patterns whose hash is in | |
131 | /// `Root::ignore_patterns_hash`. |
|
134 | /// `Root::ignore_patterns_hash`. | |
132 | /// |
|
135 | /// | |
133 | /// This means that if `std::fs::symlink_metadata` later reports the |
|
136 | /// This means that if `std::fs::symlink_metadata` later reports the | |
134 | /// same modification time and ignored patterns havenβt changed, a run |
|
137 | /// same modification time and ignored patterns havenβt changed, a run | |
135 | /// of status that is not listing ignored files can skip calling |
|
138 | /// of status that is not listing ignored files can skip calling | |
136 | /// `std::fs::read_dir` again for this directory, iterate child |
|
139 | /// `std::fs::read_dir` again for this directory, iterate child | |
137 | /// dirstate nodes instead. |
|
140 | /// dirstate nodes instead. | |
138 | state: u8, |
|
141 | state: u8, | |
139 | data: Entry, |
|
142 | data: Entry, | |
140 | } |
|
143 | } | |
141 |
|
144 | |||
142 | #[derive(BytesCast, Copy, Clone)] |
|
145 | #[derive(BytesCast, Copy, Clone)] | |
143 | #[repr(C)] |
|
146 | #[repr(C)] | |
144 | struct Entry { |
|
147 | struct Entry { | |
145 | mode: I32Be, |
|
148 | mode: I32Be, | |
146 | mtime: I32Be, |
|
149 | mtime: I32Be, | |
147 | size: I32Be, |
|
150 | size: I32Be, | |
148 | } |
|
151 | } | |
149 |
|
152 | |||
150 | /// Duration since the Unix epoch |
|
153 | /// Duration since the Unix epoch | |
151 | #[derive(BytesCast, Copy, Clone, PartialEq)] |
|
154 | #[derive(BytesCast, Copy, Clone, PartialEq)] | |
152 | #[repr(C)] |
|
155 | #[repr(C)] | |
153 | pub(super) struct Timestamp { |
|
156 | pub(super) struct Timestamp { | |
154 | seconds: I64Be, |
|
157 | seconds: I64Be, | |
155 |
|
158 | |||
156 | /// In `0 .. 1_000_000_000`. |
|
159 | /// In `0 .. 1_000_000_000`. | |
157 | /// |
|
160 | /// | |
158 | /// This timestamp is later or earlier than `(seconds, 0)` by this many |
|
161 | /// This timestamp is later or earlier than `(seconds, 0)` by this many | |
159 | /// nanoseconds, if `seconds` is non-negative or negative, respectively. |
|
162 | /// nanoseconds, if `seconds` is non-negative or negative, respectively. | |
160 | nanoseconds: U32Be, |
|
163 | nanoseconds: U32Be, | |
161 | } |
|
164 | } | |
162 |
|
165 | |||
163 | /// Counted in bytes from the start of the file |
|
166 | /// Counted in bytes from the start of the file | |
164 | /// |
|
167 | /// | |
165 | /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB. |
|
168 | /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB. | |
166 | type Offset = U32Be; |
|
169 | type Offset = U32Be; | |
167 |
|
170 | |||
168 | /// Counted in number of items |
|
171 | /// Counted in number of items | |
169 | /// |
|
172 | /// | |
170 |
/// NOTE: not support |
|
173 | /// NOTE: we choose not to support counting more than 4 billion nodes anywhere. | |
171 | /// or filenames more than 4 GiB. |
|
|||
172 | type Size = U32Be; |
|
174 | type Size = U32Be; | |
173 |
|
175 | |||
174 | /// Location of consecutive, fixed-size items. |
|
176 | /// Counted in bytes | |
175 | /// |
|
177 | /// | |
176 | /// An item can be a single byte for paths, or a struct with |
|
178 | /// NOTE: we choose not to support file names/paths longer than 64 KiB. | |
177 | /// `derive(BytesCast)`. |
|
179 | type PathSize = U16Be; | |
178 | #[derive(BytesCast, Copy, Clone)] |
|
|||
179 | #[repr(C)] |
|
|||
180 | struct Slice { |
|
|||
181 | start: Offset, |
|
|||
182 | len: Size, |
|
|||
183 | } |
|
|||
184 |
|
180 | |||
185 | /// A contiguous sequence of `len` times `Node`, representing the child nodes |
|
181 | /// A contiguous sequence of `len` times `Node`, representing the child nodes | |
186 | /// of either some other node or of the repository root. |
|
182 | /// of either some other node or of the repository root. | |
187 | /// |
|
183 | /// | |
188 | /// Always sorted by ascending `full_path`, to allow binary search. |
|
184 | /// Always sorted by ascending `full_path`, to allow binary search. | |
189 | /// Since nodes with the same parent nodes also have the same parent path, |
|
185 | /// Since nodes with the same parent nodes also have the same parent path, | |
190 | /// only the `base_name`s need to be compared during binary search. |
|
186 | /// only the `base_name`s need to be compared during binary search. | |
191 | type ChildNodes = Slice; |
|
187 | #[derive(BytesCast, Copy, Clone)] | |
|
188 | #[repr(C)] | |||
|
189 | struct ChildNodes { | |||
|
190 | start: Offset, | |||
|
191 | len: Size, | |||
|
192 | } | |||
192 |
|
193 | |||
193 | /// A `HgPath` of `len` bytes |
|
194 | /// A `HgPath` of `len` bytes | |
194 | type PathSlice = Slice; |
|
195 | #[derive(BytesCast, Copy, Clone)] | |
|
196 | #[repr(C)] | |||
|
197 | struct PathSlice { | |||
|
198 | start: Offset, | |||
|
199 | len: PathSize, | |||
|
200 | } | |||
195 |
|
201 | |||
196 | /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes |
|
202 | /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes | |
197 | type OptPathSlice = Slice; |
|
203 | type OptPathSlice = PathSlice; | |
198 |
|
204 | |||
199 | /// Make sure that size-affecting changes are made knowingly |
|
205 | /// Make sure that size-affecting changes are made knowingly | |
200 | fn _static_assert_size_of() { |
|
206 | fn _static_assert_size_of() { | |
201 | let _ = std::mem::transmute::<DocketHeader, [u8; 81]>; |
|
207 | let _ = std::mem::transmute::<DocketHeader, [u8; 81]>; | |
202 | let _ = std::mem::transmute::<Root, [u8; 36]>; |
|
208 | let _ = std::mem::transmute::<Root, [u8; 36]>; | |
203 |
let _ = std::mem::transmute::<Node, [u8; 4 |
|
209 | let _ = std::mem::transmute::<Node, [u8; 43]>; | |
204 | } |
|
210 | } | |
205 |
|
211 | |||
206 | /// Unexpected file format found in `.hg/dirstate` with the "v2" format. |
|
212 | /// Unexpected file format found in `.hg/dirstate` with the "v2" format. | |
207 | /// |
|
213 | /// | |
208 | /// This should only happen if Mercurial is buggy or a repository is corrupted. |
|
214 | /// This should only happen if Mercurial is buggy or a repository is corrupted. | |
209 | #[derive(Debug)] |
|
215 | #[derive(Debug)] | |
210 | pub struct DirstateV2ParseError; |
|
216 | pub struct DirstateV2ParseError; | |
211 |
|
217 | |||
212 | impl From<DirstateV2ParseError> for HgError { |
|
218 | impl From<DirstateV2ParseError> for HgError { | |
213 | fn from(_: DirstateV2ParseError) -> Self { |
|
219 | fn from(_: DirstateV2ParseError) -> Self { | |
214 | HgError::corrupted("dirstate-v2 parse error") |
|
220 | HgError::corrupted("dirstate-v2 parse error") | |
215 | } |
|
221 | } | |
216 | } |
|
222 | } | |
217 |
|
223 | |||
218 | impl From<DirstateV2ParseError> for crate::DirstateError { |
|
224 | impl From<DirstateV2ParseError> for crate::DirstateError { | |
219 | fn from(error: DirstateV2ParseError) -> Self { |
|
225 | fn from(error: DirstateV2ParseError) -> Self { | |
220 | HgError::from(error).into() |
|
226 | HgError::from(error).into() | |
221 | } |
|
227 | } | |
222 | } |
|
228 | } | |
223 |
|
229 | |||
224 | impl<'on_disk> Docket<'on_disk> { |
|
230 | impl<'on_disk> Docket<'on_disk> { | |
225 | pub fn parents(&self) -> DirstateParents { |
|
231 | pub fn parents(&self) -> DirstateParents { | |
226 | use crate::Node; |
|
232 | use crate::Node; | |
227 | let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES]) |
|
233 | let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES]) | |
228 | .unwrap() |
|
234 | .unwrap() | |
229 | .clone(); |
|
235 | .clone(); | |
230 | let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES]) |
|
236 | let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES]) | |
231 | .unwrap() |
|
237 | .unwrap() | |
232 | .clone(); |
|
238 | .clone(); | |
233 | DirstateParents { p1, p2 } |
|
239 | DirstateParents { p1, p2 } | |
234 | } |
|
240 | } | |
235 |
|
241 | |||
236 | pub fn data_size(&self) -> usize { |
|
242 | pub fn data_size(&self) -> usize { | |
237 | // This `unwrap` could only panic on a 16-bit CPU |
|
243 | // This `unwrap` could only panic on a 16-bit CPU | |
238 | self.header.data_size.get().try_into().unwrap() |
|
244 | self.header.data_size.get().try_into().unwrap() | |
239 | } |
|
245 | } | |
240 |
|
246 | |||
241 | pub fn data_filename(&self) -> String { |
|
247 | pub fn data_filename(&self) -> String { | |
242 | String::from_utf8(format_bytes!(b"dirstate.{}.d", self.uuid)).unwrap() |
|
248 | String::from_utf8(format_bytes!(b"dirstate.{}.d", self.uuid)).unwrap() | |
243 | } |
|
249 | } | |
244 | } |
|
250 | } | |
245 |
|
251 | |||
246 | pub fn read_docket( |
|
252 | pub fn read_docket( | |
247 | on_disk: &[u8], |
|
253 | on_disk: &[u8], | |
248 | ) -> Result<Docket<'_>, DirstateV2ParseError> { |
|
254 | ) -> Result<Docket<'_>, DirstateV2ParseError> { | |
249 | let (header, uuid) = |
|
255 | let (header, uuid) = | |
250 | DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?; |
|
256 | DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?; | |
251 | let uuid_size = header.uuid_size as usize; |
|
257 | let uuid_size = header.uuid_size as usize; | |
252 | if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size { |
|
258 | if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size { | |
253 | Ok(Docket { header, uuid }) |
|
259 | Ok(Docket { header, uuid }) | |
254 | } else { |
|
260 | } else { | |
255 | Err(DirstateV2ParseError) |
|
261 | Err(DirstateV2ParseError) | |
256 | } |
|
262 | } | |
257 | } |
|
263 | } | |
258 |
|
264 | |||
259 | fn read_root<'on_disk>( |
|
265 | fn read_root<'on_disk>( | |
260 | on_disk: &'on_disk [u8], |
|
266 | on_disk: &'on_disk [u8], | |
261 | ) -> Result<&'on_disk Root, DirstateV2ParseError> { |
|
267 | ) -> Result<&'on_disk Root, DirstateV2ParseError> { | |
262 | // Find the `Root` at the end of the given slice |
|
268 | // Find the `Root` at the end of the given slice | |
263 | let root_offset = on_disk |
|
269 | let root_offset = on_disk | |
264 | .len() |
|
270 | .len() | |
265 | .checked_sub(std::mem::size_of::<Root>()) |
|
271 | .checked_sub(std::mem::size_of::<Root>()) | |
266 | // A non-empty slice too short is an error |
|
272 | // A non-empty slice too short is an error | |
267 | .ok_or(DirstateV2ParseError)?; |
|
273 | .ok_or(DirstateV2ParseError)?; | |
268 | let (root, _) = Root::from_bytes(&on_disk[root_offset..]) |
|
274 | let (root, _) = Root::from_bytes(&on_disk[root_offset..]) | |
269 | .map_err(|_| DirstateV2ParseError)?; |
|
275 | .map_err(|_| DirstateV2ParseError)?; | |
270 | Ok(root) |
|
276 | Ok(root) | |
271 | } |
|
277 | } | |
272 |
|
278 | |||
273 | pub(super) fn read<'on_disk>( |
|
279 | pub(super) fn read<'on_disk>( | |
274 | on_disk: &'on_disk [u8], |
|
280 | on_disk: &'on_disk [u8], | |
275 | ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> { |
|
281 | ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> { | |
276 | if on_disk.is_empty() { |
|
282 | if on_disk.is_empty() { | |
277 | return Ok(DirstateMap::empty(on_disk)); |
|
283 | return Ok(DirstateMap::empty(on_disk)); | |
278 | } |
|
284 | } | |
279 | let root = read_root(on_disk)?; |
|
285 | let root = read_root(on_disk)?; | |
280 | let dirstate_map = DirstateMap { |
|
286 | let dirstate_map = DirstateMap { | |
281 | on_disk, |
|
287 | on_disk, | |
282 |
root: dirstate_map::ChildNodes::OnDisk(read_ |
|
288 | root: dirstate_map::ChildNodes::OnDisk(read_nodes( | |
283 | on_disk, |
|
289 | on_disk, | |
284 | root.root_nodes, |
|
290 | root.root_nodes, | |
285 | )?), |
|
291 | )?), | |
286 | nodes_with_entry_count: root.nodes_with_entry_count.get(), |
|
292 | nodes_with_entry_count: root.nodes_with_entry_count.get(), | |
287 | nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(), |
|
293 | nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(), | |
288 | ignore_patterns_hash: root.ignore_patterns_hash, |
|
294 | ignore_patterns_hash: root.ignore_patterns_hash, | |
289 | }; |
|
295 | }; | |
290 | Ok(dirstate_map) |
|
296 | Ok(dirstate_map) | |
291 | } |
|
297 | } | |
292 |
|
298 | |||
293 | impl Node { |
|
299 | impl Node { | |
294 | pub(super) fn full_path<'on_disk>( |
|
300 | pub(super) fn full_path<'on_disk>( | |
295 | &self, |
|
301 | &self, | |
296 | on_disk: &'on_disk [u8], |
|
302 | on_disk: &'on_disk [u8], | |
297 | ) -> Result<&'on_disk HgPath, DirstateV2ParseError> { |
|
303 | ) -> Result<&'on_disk HgPath, DirstateV2ParseError> { | |
298 | read_hg_path(on_disk, self.full_path) |
|
304 | read_hg_path(on_disk, self.full_path) | |
299 | } |
|
305 | } | |
300 |
|
306 | |||
301 | pub(super) fn base_name_start<'on_disk>( |
|
307 | pub(super) fn base_name_start<'on_disk>( | |
302 | &self, |
|
308 | &self, | |
303 | ) -> Result<usize, DirstateV2ParseError> { |
|
309 | ) -> Result<usize, DirstateV2ParseError> { | |
304 | let start = self.base_name_start.get(); |
|
310 | let start = self.base_name_start.get(); | |
305 | if start < self.full_path.len.get() { |
|
311 | if start < self.full_path.len.get() { | |
306 | let start = usize::try_from(start) |
|
312 | let start = usize::try_from(start) | |
307 | // u32 -> usize, could only panic on a 16-bit CPU |
|
313 | // u32 -> usize, could only panic on a 16-bit CPU | |
308 | .expect("dirstate-v2 base_name_start out of bounds"); |
|
314 | .expect("dirstate-v2 base_name_start out of bounds"); | |
309 | Ok(start) |
|
315 | Ok(start) | |
310 | } else { |
|
316 | } else { | |
311 | Err(DirstateV2ParseError) |
|
317 | Err(DirstateV2ParseError) | |
312 | } |
|
318 | } | |
313 | } |
|
319 | } | |
314 |
|
320 | |||
315 | pub(super) fn base_name<'on_disk>( |
|
321 | pub(super) fn base_name<'on_disk>( | |
316 | &self, |
|
322 | &self, | |
317 | on_disk: &'on_disk [u8], |
|
323 | on_disk: &'on_disk [u8], | |
318 | ) -> Result<&'on_disk HgPath, DirstateV2ParseError> { |
|
324 | ) -> Result<&'on_disk HgPath, DirstateV2ParseError> { | |
319 | let full_path = self.full_path(on_disk)?; |
|
325 | let full_path = self.full_path(on_disk)?; | |
320 | let base_name_start = self.base_name_start()?; |
|
326 | let base_name_start = self.base_name_start()?; | |
321 | Ok(HgPath::new(&full_path.as_bytes()[base_name_start..])) |
|
327 | Ok(HgPath::new(&full_path.as_bytes()[base_name_start..])) | |
322 | } |
|
328 | } | |
323 |
|
329 | |||
324 | pub(super) fn path<'on_disk>( |
|
330 | pub(super) fn path<'on_disk>( | |
325 | &self, |
|
331 | &self, | |
326 | on_disk: &'on_disk [u8], |
|
332 | on_disk: &'on_disk [u8], | |
327 | ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> { |
|
333 | ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> { | |
328 | Ok(WithBasename::from_raw_parts( |
|
334 | Ok(WithBasename::from_raw_parts( | |
329 | Cow::Borrowed(self.full_path(on_disk)?), |
|
335 | Cow::Borrowed(self.full_path(on_disk)?), | |
330 | self.base_name_start()?, |
|
336 | self.base_name_start()?, | |
331 | )) |
|
337 | )) | |
332 | } |
|
338 | } | |
333 |
|
339 | |||
334 | pub(super) fn has_copy_source<'on_disk>(&self) -> bool { |
|
340 | pub(super) fn has_copy_source<'on_disk>(&self) -> bool { | |
335 | self.copy_source.start.get() != 0 |
|
341 | self.copy_source.start.get() != 0 | |
336 | } |
|
342 | } | |
337 |
|
343 | |||
338 | pub(super) fn copy_source<'on_disk>( |
|
344 | pub(super) fn copy_source<'on_disk>( | |
339 | &self, |
|
345 | &self, | |
340 | on_disk: &'on_disk [u8], |
|
346 | on_disk: &'on_disk [u8], | |
341 | ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> { |
|
347 | ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> { | |
342 | Ok(if self.has_copy_source() { |
|
348 | Ok(if self.has_copy_source() { | |
343 | Some(read_hg_path(on_disk, self.copy_source)?) |
|
349 | Some(read_hg_path(on_disk, self.copy_source)?) | |
344 | } else { |
|
350 | } else { | |
345 | None |
|
351 | None | |
346 | }) |
|
352 | }) | |
347 | } |
|
353 | } | |
348 |
|
354 | |||
349 | pub(super) fn node_data( |
|
355 | pub(super) fn node_data( | |
350 | &self, |
|
356 | &self, | |
351 | ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> { |
|
357 | ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> { | |
352 | let entry = |state| { |
|
358 | let entry = |state| { | |
353 | dirstate_map::NodeData::Entry(self.entry_with_given_state(state)) |
|
359 | dirstate_map::NodeData::Entry(self.entry_with_given_state(state)) | |
354 | }; |
|
360 | }; | |
355 |
|
361 | |||
356 | match self.state { |
|
362 | match self.state { | |
357 | b'\0' => Ok(dirstate_map::NodeData::None), |
|
363 | b'\0' => Ok(dirstate_map::NodeData::None), | |
358 | b'd' => Ok(dirstate_map::NodeData::CachedDirectory { |
|
364 | b'd' => Ok(dirstate_map::NodeData::CachedDirectory { | |
359 | mtime: *self.data.as_timestamp(), |
|
365 | mtime: *self.data.as_timestamp(), | |
360 | }), |
|
366 | }), | |
361 | b'n' => Ok(entry(EntryState::Normal)), |
|
367 | b'n' => Ok(entry(EntryState::Normal)), | |
362 | b'a' => Ok(entry(EntryState::Added)), |
|
368 | b'a' => Ok(entry(EntryState::Added)), | |
363 | b'r' => Ok(entry(EntryState::Removed)), |
|
369 | b'r' => Ok(entry(EntryState::Removed)), | |
364 | b'm' => Ok(entry(EntryState::Merged)), |
|
370 | b'm' => Ok(entry(EntryState::Merged)), | |
365 | _ => Err(DirstateV2ParseError), |
|
371 | _ => Err(DirstateV2ParseError), | |
366 | } |
|
372 | } | |
367 | } |
|
373 | } | |
368 |
|
374 | |||
369 | pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> { |
|
375 | pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> { | |
370 | if self.state == b'd' { |
|
376 | if self.state == b'd' { | |
371 | Some(self.data.as_timestamp()) |
|
377 | Some(self.data.as_timestamp()) | |
372 | } else { |
|
378 | } else { | |
373 | None |
|
379 | None | |
374 | } |
|
380 | } | |
375 | } |
|
381 | } | |
376 |
|
382 | |||
377 | pub(super) fn state( |
|
383 | pub(super) fn state( | |
378 | &self, |
|
384 | &self, | |
379 | ) -> Result<Option<EntryState>, DirstateV2ParseError> { |
|
385 | ) -> Result<Option<EntryState>, DirstateV2ParseError> { | |
380 | match self.state { |
|
386 | match self.state { | |
381 | b'\0' | b'd' => Ok(None), |
|
387 | b'\0' | b'd' => Ok(None), | |
382 | b'n' => Ok(Some(EntryState::Normal)), |
|
388 | b'n' => Ok(Some(EntryState::Normal)), | |
383 | b'a' => Ok(Some(EntryState::Added)), |
|
389 | b'a' => Ok(Some(EntryState::Added)), | |
384 | b'r' => Ok(Some(EntryState::Removed)), |
|
390 | b'r' => Ok(Some(EntryState::Removed)), | |
385 | b'm' => Ok(Some(EntryState::Merged)), |
|
391 | b'm' => Ok(Some(EntryState::Merged)), | |
386 | _ => Err(DirstateV2ParseError), |
|
392 | _ => Err(DirstateV2ParseError), | |
387 | } |
|
393 | } | |
388 | } |
|
394 | } | |
389 |
|
395 | |||
390 | fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry { |
|
396 | fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry { | |
391 | DirstateEntry { |
|
397 | DirstateEntry { | |
392 | state, |
|
398 | state, | |
393 | mode: self.data.mode.get(), |
|
399 | mode: self.data.mode.get(), | |
394 | mtime: self.data.mtime.get(), |
|
400 | mtime: self.data.mtime.get(), | |
395 | size: self.data.size.get(), |
|
401 | size: self.data.size.get(), | |
396 | } |
|
402 | } | |
397 | } |
|
403 | } | |
398 |
|
404 | |||
399 | pub(super) fn entry( |
|
405 | pub(super) fn entry( | |
400 | &self, |
|
406 | &self, | |
401 | ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> { |
|
407 | ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> { | |
402 | Ok(self |
|
408 | Ok(self | |
403 | .state()? |
|
409 | .state()? | |
404 | .map(|state| self.entry_with_given_state(state))) |
|
410 | .map(|state| self.entry_with_given_state(state))) | |
405 | } |
|
411 | } | |
406 |
|
412 | |||
407 | pub(super) fn children<'on_disk>( |
|
413 | pub(super) fn children<'on_disk>( | |
408 | &self, |
|
414 | &self, | |
409 | on_disk: &'on_disk [u8], |
|
415 | on_disk: &'on_disk [u8], | |
410 | ) -> Result<&'on_disk [Node], DirstateV2ParseError> { |
|
416 | ) -> Result<&'on_disk [Node], DirstateV2ParseError> { | |
411 |
read_ |
|
417 | read_nodes(on_disk, self.children) | |
412 | } |
|
418 | } | |
413 |
|
419 | |||
414 | pub(super) fn to_in_memory_node<'on_disk>( |
|
420 | pub(super) fn to_in_memory_node<'on_disk>( | |
415 | &self, |
|
421 | &self, | |
416 | on_disk: &'on_disk [u8], |
|
422 | on_disk: &'on_disk [u8], | |
417 | ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> { |
|
423 | ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> { | |
418 | Ok(dirstate_map::Node { |
|
424 | Ok(dirstate_map::Node { | |
419 | children: dirstate_map::ChildNodes::OnDisk( |
|
425 | children: dirstate_map::ChildNodes::OnDisk( | |
420 | self.children(on_disk)?, |
|
426 | self.children(on_disk)?, | |
421 | ), |
|
427 | ), | |
422 | copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed), |
|
428 | copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed), | |
423 | data: self.node_data()?, |
|
429 | data: self.node_data()?, | |
424 | descendants_with_entry_count: self |
|
430 | descendants_with_entry_count: self | |
425 | .descendants_with_entry_count |
|
431 | .descendants_with_entry_count | |
426 | .get(), |
|
432 | .get(), | |
427 | tracked_descendants_count: self.tracked_descendants_count.get(), |
|
433 | tracked_descendants_count: self.tracked_descendants_count.get(), | |
428 | }) |
|
434 | }) | |
429 | } |
|
435 | } | |
430 | } |
|
436 | } | |
431 |
|
437 | |||
432 | impl Entry { |
|
438 | impl Entry { | |
433 | fn from_timestamp(timestamp: Timestamp) -> Self { |
|
439 | fn from_timestamp(timestamp: Timestamp) -> Self { | |
434 | // Safety: both types implement the `ByteCast` trait, so we could |
|
440 | // Safety: both types implement the `ByteCast` trait, so we could | |
435 | // safely use `as_bytes` and `from_bytes` to do this conversion. Using |
|
441 | // safely use `as_bytes` and `from_bytes` to do this conversion. Using | |
436 | // `transmute` instead makes the compiler check that the two types |
|
442 | // `transmute` instead makes the compiler check that the two types | |
437 | // have the same size, which eliminates the error case of |
|
443 | // have the same size, which eliminates the error case of | |
438 | // `from_bytes`. |
|
444 | // `from_bytes`. | |
439 | unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) } |
|
445 | unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) } | |
440 | } |
|
446 | } | |
441 |
|
447 | |||
442 | fn as_timestamp(&self) -> &Timestamp { |
|
448 | fn as_timestamp(&self) -> &Timestamp { | |
443 | // Safety: same as above in `from_timestamp` |
|
449 | // Safety: same as above in `from_timestamp` | |
444 | unsafe { &*(self as *const Entry as *const Timestamp) } |
|
450 | unsafe { &*(self as *const Entry as *const Timestamp) } | |
445 | } |
|
451 | } | |
446 | } |
|
452 | } | |
447 |
|
453 | |||
448 | impl Timestamp { |
|
454 | impl Timestamp { | |
449 | pub fn seconds(&self) -> i64 { |
|
455 | pub fn seconds(&self) -> i64 { | |
450 | self.seconds.get() |
|
456 | self.seconds.get() | |
451 | } |
|
457 | } | |
452 | } |
|
458 | } | |
453 |
|
459 | |||
454 | impl From<SystemTime> for Timestamp { |
|
460 | impl From<SystemTime> for Timestamp { | |
455 | fn from(system_time: SystemTime) -> Self { |
|
461 | fn from(system_time: SystemTime) -> Self { | |
456 | let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) { |
|
462 | let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) { | |
457 | Ok(duration) => { |
|
463 | Ok(duration) => { | |
458 | (duration.as_secs() as i64, duration.subsec_nanos()) |
|
464 | (duration.as_secs() as i64, duration.subsec_nanos()) | |
459 | } |
|
465 | } | |
460 | Err(error) => { |
|
466 | Err(error) => { | |
461 | let negative = error.duration(); |
|
467 | let negative = error.duration(); | |
462 | (-(negative.as_secs() as i64), negative.subsec_nanos()) |
|
468 | (-(negative.as_secs() as i64), negative.subsec_nanos()) | |
463 | } |
|
469 | } | |
464 | }; |
|
470 | }; | |
465 | Timestamp { |
|
471 | Timestamp { | |
466 | seconds: secs.into(), |
|
472 | seconds: secs.into(), | |
467 | nanoseconds: nanos.into(), |
|
473 | nanoseconds: nanos.into(), | |
468 | } |
|
474 | } | |
469 | } |
|
475 | } | |
470 | } |
|
476 | } | |
471 |
|
477 | |||
472 | impl From<&'_ Timestamp> for SystemTime { |
|
478 | impl From<&'_ Timestamp> for SystemTime { | |
473 | fn from(timestamp: &'_ Timestamp) -> Self { |
|
479 | fn from(timestamp: &'_ Timestamp) -> Self { | |
474 | let secs = timestamp.seconds.get(); |
|
480 | let secs = timestamp.seconds.get(); | |
475 | let nanos = timestamp.nanoseconds.get(); |
|
481 | let nanos = timestamp.nanoseconds.get(); | |
476 | if secs >= 0 { |
|
482 | if secs >= 0 { | |
477 | UNIX_EPOCH + Duration::new(secs as u64, nanos) |
|
483 | UNIX_EPOCH + Duration::new(secs as u64, nanos) | |
478 | } else { |
|
484 | } else { | |
479 | UNIX_EPOCH - Duration::new((-secs) as u64, nanos) |
|
485 | UNIX_EPOCH - Duration::new((-secs) as u64, nanos) | |
480 | } |
|
486 | } | |
481 | } |
|
487 | } | |
482 | } |
|
488 | } | |
483 |
|
489 | |||
484 | fn read_hg_path( |
|
490 | fn read_hg_path( | |
485 | on_disk: &[u8], |
|
491 | on_disk: &[u8], | |
486 | slice: Slice, |
|
492 | slice: PathSlice, | |
487 | ) -> Result<&HgPath, DirstateV2ParseError> { |
|
493 | ) -> Result<&HgPath, DirstateV2ParseError> { | |
488 | let bytes = read_slice::<u8>(on_disk, slice)?; |
|
494 | read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new) | |
489 | Ok(HgPath::new(bytes)) |
|
|||
490 | } |
|
495 | } | |
491 |
|
496 | |||
492 |
fn read_s |
|
497 | fn read_nodes( | |
493 | on_disk: &[u8], |
|
498 | on_disk: &[u8], | |
494 |
slice: |
|
499 | slice: ChildNodes, | |
|
500 | ) -> Result<&[Node], DirstateV2ParseError> { | |||
|
501 | read_slice(on_disk, slice.start, slice.len.get()) | |||
|
502 | } | |||
|
503 | ||||
|
504 | fn read_slice<T, Len>( | |||
|
505 | on_disk: &[u8], | |||
|
506 | start: Offset, | |||
|
507 | len: Len, | |||
495 | ) -> Result<&[T], DirstateV2ParseError> |
|
508 | ) -> Result<&[T], DirstateV2ParseError> | |
496 | where |
|
509 | where | |
497 | T: BytesCast, |
|
510 | T: BytesCast, | |
|
511 | Len: TryInto<usize>, | |||
498 | { |
|
512 | { | |
499 | // Either `usize::MAX` would result in "out of bounds" error since a single |
|
513 | // Either `usize::MAX` would result in "out of bounds" error since a single | |
500 | // `&[u8]` cannot occupy the entire addess space. |
|
514 | // `&[u8]` cannot occupy the entire addess space. | |
501 |
let start = |
|
515 | let start = start.get().try_into().unwrap_or(std::usize::MAX); | |
502 |
let len = |
|
516 | let len = len.try_into().unwrap_or(std::usize::MAX); | |
503 | on_disk |
|
517 | on_disk | |
504 | .get(start..) |
|
518 | .get(start..) | |
505 | .and_then(|bytes| T::slice_from_bytes(bytes, len).ok()) |
|
519 | .and_then(|bytes| T::slice_from_bytes(bytes, len).ok()) | |
506 | .map(|(slice, _rest)| slice) |
|
520 | .map(|(slice, _rest)| slice) | |
507 | .ok_or_else(|| DirstateV2ParseError) |
|
521 | .ok_or_else(|| DirstateV2ParseError) | |
508 | } |
|
522 | } | |
509 |
|
523 | |||
510 | pub(crate) fn for_each_tracked_path<'on_disk>( |
|
524 | pub(crate) fn for_each_tracked_path<'on_disk>( | |
511 | on_disk: &'on_disk [u8], |
|
525 | on_disk: &'on_disk [u8], | |
512 | mut f: impl FnMut(&'on_disk HgPath), |
|
526 | mut f: impl FnMut(&'on_disk HgPath), | |
513 | ) -> Result<(), DirstateV2ParseError> { |
|
527 | ) -> Result<(), DirstateV2ParseError> { | |
514 | let root = read_root(on_disk)?; |
|
528 | let root = read_root(on_disk)?; | |
515 | fn recur<'on_disk>( |
|
529 | fn recur<'on_disk>( | |
516 | on_disk: &'on_disk [u8], |
|
530 | on_disk: &'on_disk [u8], | |
517 |
nodes: |
|
531 | nodes: ChildNodes, | |
518 | f: &mut impl FnMut(&'on_disk HgPath), |
|
532 | f: &mut impl FnMut(&'on_disk HgPath), | |
519 | ) -> Result<(), DirstateV2ParseError> { |
|
533 | ) -> Result<(), DirstateV2ParseError> { | |
520 |
for node in read_ |
|
534 | for node in read_nodes(on_disk, nodes)? { | |
521 | if let Some(state) = node.state()? { |
|
535 | if let Some(state) = node.state()? { | |
522 | if state.is_tracked() { |
|
536 | if state.is_tracked() { | |
523 | f(node.full_path(on_disk)?) |
|
537 | f(node.full_path(on_disk)?) | |
524 | } |
|
538 | } | |
525 | } |
|
539 | } | |
526 | recur(on_disk, node.children, f)? |
|
540 | recur(on_disk, node.children, f)? | |
527 | } |
|
541 | } | |
528 | Ok(()) |
|
542 | Ok(()) | |
529 | } |
|
543 | } | |
530 | recur(on_disk, root.root_nodes, &mut f) |
|
544 | recur(on_disk, root.root_nodes, &mut f) | |
531 | } |
|
545 | } | |
532 |
|
546 | |||
533 | pub(super) fn write( |
|
547 | pub(super) fn write( | |
534 | dirstate_map: &mut DirstateMap, |
|
548 | dirstate_map: &mut DirstateMap, | |
535 | ) -> Result<Vec<u8>, DirstateError> { |
|
549 | ) -> Result<Vec<u8>, DirstateError> { | |
536 | let root_len = std::mem::size_of::<Root>(); |
|
550 | let root_len = std::mem::size_of::<Root>(); | |
537 |
|
551 | |||
538 | // This ignores the space for paths, and for nodes without an entry. |
|
552 | // This ignores the space for paths, and for nodes without an entry. | |
539 | // TODO: better estimate? Skip the `Vec` and write to a file directly? |
|
553 | // TODO: better estimate? Skip the `Vec` and write to a file directly? | |
540 | let size_guess = root_len |
|
554 | let size_guess = root_len | |
541 | + std::mem::size_of::<Node>() |
|
555 | + std::mem::size_of::<Node>() | |
542 | * dirstate_map.nodes_with_entry_count as usize; |
|
556 | * dirstate_map.nodes_with_entry_count as usize; | |
543 | let mut out = Vec::with_capacity(size_guess); |
|
557 | let mut out = Vec::with_capacity(size_guess); | |
544 |
|
558 | |||
545 | let root_nodes = |
|
559 | let root_nodes = | |
546 | write_nodes(dirstate_map, dirstate_map.root.as_ref(), &mut out)?; |
|
560 | write_nodes(dirstate_map, dirstate_map.root.as_ref(), &mut out)?; | |
547 |
|
561 | |||
548 | let root = Root { |
|
562 | let root = Root { | |
549 | root_nodes, |
|
563 | root_nodes, | |
550 | nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(), |
|
564 | nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(), | |
551 | nodes_with_copy_source_count: dirstate_map |
|
565 | nodes_with_copy_source_count: dirstate_map | |
552 | .nodes_with_copy_source_count |
|
566 | .nodes_with_copy_source_count | |
553 | .into(), |
|
567 | .into(), | |
554 | ignore_patterns_hash: dirstate_map.ignore_patterns_hash, |
|
568 | ignore_patterns_hash: dirstate_map.ignore_patterns_hash, | |
555 | }; |
|
569 | }; | |
556 | out.extend(root.as_bytes()); |
|
570 | out.extend(root.as_bytes()); | |
557 | Ok(out) |
|
571 | Ok(out) | |
558 | } |
|
572 | } | |
559 |
|
573 | |||
560 | fn write_nodes( |
|
574 | fn write_nodes( | |
561 | dirstate_map: &DirstateMap, |
|
575 | dirstate_map: &DirstateMap, | |
562 | nodes: dirstate_map::ChildNodesRef, |
|
576 | nodes: dirstate_map::ChildNodesRef, | |
563 | out: &mut Vec<u8>, |
|
577 | out: &mut Vec<u8>, | |
564 | ) -> Result<ChildNodes, DirstateError> { |
|
578 | ) -> Result<ChildNodes, DirstateError> { | |
565 | // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration |
|
579 | // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration | |
566 | // order. Sort to enable binary search in the written file. |
|
580 | // order. Sort to enable binary search in the written file. | |
567 | let nodes = nodes.sorted(); |
|
581 | let nodes = nodes.sorted(); | |
|
582 | let nodes_len = nodes.len(); | |||
568 |
|
583 | |||
569 | // First accumulate serialized nodes in a `Vec` |
|
584 | // First accumulate serialized nodes in a `Vec` | |
570 |
let mut on_disk_nodes = Vec::with_capacity(nodes |
|
585 | let mut on_disk_nodes = Vec::with_capacity(nodes_len); | |
571 | for node in nodes { |
|
586 | for node in nodes { | |
572 | let children = write_nodes( |
|
587 | let children = write_nodes( | |
573 | dirstate_map, |
|
588 | dirstate_map, | |
574 | node.children(dirstate_map.on_disk)?, |
|
589 | node.children(dirstate_map.on_disk)?, | |
575 | out, |
|
590 | out, | |
576 | )?; |
|
591 | )?; | |
577 | let full_path = node.full_path(dirstate_map.on_disk)?; |
|
592 | let full_path = node.full_path(dirstate_map.on_disk)?; | |
578 |
let full_path = write_ |
|
593 | let full_path = write_path(full_path.as_bytes(), out); | |
579 | let copy_source = |
|
594 | let copy_source = | |
580 | if let Some(source) = node.copy_source(dirstate_map.on_disk)? { |
|
595 | if let Some(source) = node.copy_source(dirstate_map.on_disk)? { | |
581 |
write_ |
|
596 | write_path(source.as_bytes(), out) | |
582 | } else { |
|
597 | } else { | |
583 | Slice { |
|
598 | PathSlice { | |
584 | start: 0.into(), |
|
599 | start: 0.into(), | |
585 | len: 0.into(), |
|
600 | len: 0.into(), | |
586 | } |
|
601 | } | |
587 | }; |
|
602 | }; | |
588 | on_disk_nodes.push(match node { |
|
603 | on_disk_nodes.push(match node { | |
589 | NodeRef::InMemory(path, node) => { |
|
604 | NodeRef::InMemory(path, node) => { | |
590 | let (state, data) = match &node.data { |
|
605 | let (state, data) = match &node.data { | |
591 | dirstate_map::NodeData::Entry(entry) => ( |
|
606 | dirstate_map::NodeData::Entry(entry) => ( | |
592 | entry.state.into(), |
|
607 | entry.state.into(), | |
593 | Entry { |
|
608 | Entry { | |
594 | mode: entry.mode.into(), |
|
609 | mode: entry.mode.into(), | |
595 | mtime: entry.mtime.into(), |
|
610 | mtime: entry.mtime.into(), | |
596 | size: entry.size.into(), |
|
611 | size: entry.size.into(), | |
597 | }, |
|
612 | }, | |
598 | ), |
|
613 | ), | |
599 | dirstate_map::NodeData::CachedDirectory { mtime } => { |
|
614 | dirstate_map::NodeData::CachedDirectory { mtime } => { | |
600 | (b'd', Entry::from_timestamp(*mtime)) |
|
615 | (b'd', Entry::from_timestamp(*mtime)) | |
601 | } |
|
616 | } | |
602 | dirstate_map::NodeData::None => ( |
|
617 | dirstate_map::NodeData::None => ( | |
603 | b'\0', |
|
618 | b'\0', | |
604 | Entry { |
|
619 | Entry { | |
605 | mode: 0.into(), |
|
620 | mode: 0.into(), | |
606 | mtime: 0.into(), |
|
621 | mtime: 0.into(), | |
607 | size: 0.into(), |
|
622 | size: 0.into(), | |
608 | }, |
|
623 | }, | |
609 | ), |
|
624 | ), | |
610 | }; |
|
625 | }; | |
611 | Node { |
|
626 | Node { | |
612 | children, |
|
627 | children, | |
613 | copy_source, |
|
628 | copy_source, | |
614 | full_path, |
|
629 | full_path, | |
615 |
base_name_start: u |
|
630 | base_name_start: u16::try_from(path.base_name_start()) | |
616 |
// Could only panic for paths over |
|
631 | // Could only panic for paths over 64 KiB | |
617 |
.expect("dirstate-v2 |
|
632 | .expect("dirstate-v2 path length overflow") | |
618 | .into(), |
|
633 | .into(), | |
619 | descendants_with_entry_count: node |
|
634 | descendants_with_entry_count: node | |
620 | .descendants_with_entry_count |
|
635 | .descendants_with_entry_count | |
621 | .into(), |
|
636 | .into(), | |
622 | tracked_descendants_count: node |
|
637 | tracked_descendants_count: node | |
623 | .tracked_descendants_count |
|
638 | .tracked_descendants_count | |
624 | .into(), |
|
639 | .into(), | |
625 | state, |
|
640 | state, | |
626 | data, |
|
641 | data, | |
627 | } |
|
642 | } | |
628 | } |
|
643 | } | |
629 | NodeRef::OnDisk(node) => Node { |
|
644 | NodeRef::OnDisk(node) => Node { | |
630 | children, |
|
645 | children, | |
631 | copy_source, |
|
646 | copy_source, | |
632 | full_path, |
|
647 | full_path, | |
633 | ..*node |
|
648 | ..*node | |
634 | }, |
|
649 | }, | |
635 | }) |
|
650 | }) | |
636 | } |
|
651 | } | |
637 | // β¦ so we can write them contiguously |
|
652 | // β¦ so we can write them contiguously, after writing everything else they | |
638 | Ok(write_slice::<Node>(&on_disk_nodes, out)) |
|
653 | // refer to. | |
|
654 | let start = current_offset(out); | |||
|
655 | let len = u32::try_from(nodes_len) | |||
|
656 | // Could only panic with over 4 billion nodes | |||
|
657 | .expect("dirstate-v2 path length overflow") | |||
|
658 | .into(); | |||
|
659 | out.extend(on_disk_nodes.as_bytes()); | |||
|
660 | Ok(ChildNodes { start, len }) | |||
639 | } |
|
661 | } | |
640 |
|
662 | |||
641 | fn write_slice<T>(slice: &[T], out: &mut Vec<u8>) -> Slice |
|
663 | fn current_offset(out: &Vec<u8>) -> Offset { | |
642 | where |
|
664 | u32::try_from(out.len()) | |
643 | T: BytesCast, |
|
|||
644 | { |
|
|||
645 | let start = u32::try_from(out.len()) |
|
|||
646 | // Could only panic for a dirstate file larger than 4 GiB |
|
665 | // Could only panic for a dirstate file larger than 4 GiB | |
647 | .expect("dirstate-v2 offset overflow") |
|
666 | .expect("dirstate-v2 offset overflow") | |
648 |
.into() |
|
667 | .into() | |
649 | let len = u32::try_from(slice.len()) |
|
668 | } | |
650 | // Could only panic for paths over 4 GiB or nodes with over 4 billions |
|
669 | ||
651 | // child nodes |
|
670 | fn write_path(slice: &[u8], out: &mut Vec<u8>) -> PathSlice { | |
652 | .expect("dirstate-v2 offset overflow") |
|
671 | let start = current_offset(out); | |
|
672 | let len = u16::try_from(slice.len()) | |||
|
673 | // Could only panic for paths over 64 KiB | |||
|
674 | .expect("dirstate-v2 path length overflow") | |||
653 | .into(); |
|
675 | .into(); | |
654 | out.extend(slice.as_bytes()); |
|
676 | out.extend(slice.as_bytes()); | |
655 | Slice { start, len } |
|
677 | PathSlice { start, len } | |
656 | } |
|
678 | } |
General Comments 0
You need to be logged in to leave comments.
Login now