##// END OF EJS Templates
dirstate-v2: shrink on-disk path lengths to 16-bits...
Simon Sapin -
r48477:da1c0cd6 default
parent child Browse files
Show More
@@ -1,656 +1,678
1 //! The "version 2" disk representation of the dirstate
1 //! The "version 2" disk representation of the dirstate
2 //!
2 //!
3 //! # File format
3 //! # File format
4 //!
4 //!
5 //! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts
5 //! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts
6 //! with a fixed-sized header whose layout is defined by the `DocketHeader`
6 //! with a fixed-sized header whose layout is defined by the `DocketHeader`
7 //! struct, followed by the data file identifier.
7 //! struct, followed by the data file identifier.
8 //!
8 //!
9 //! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That
9 //! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That
10 //! file may be longer than the size given in the docket, but not shorter. Only
10 //! file may be longer than the size given in the docket, but not shorter. Only
11 //! the start of the data file up to the given size is considered. The
11 //! the start of the data file up to the given size is considered. The
12 //! fixed-size "root" of the dirstate tree whose layout is defined by the
12 //! fixed-size "root" of the dirstate tree whose layout is defined by the
13 //! `Root` struct is found at the end of that slice of data.
13 //! `Root` struct is found at the end of that slice of data.
14 //!
14 //!
15 //! Its `root_nodes` field contains the slice (offset and length) to
15 //! Its `root_nodes` field contains the slice (offset and length) to
16 //! the nodes representing the files and directories at the root of the
16 //! the nodes representing the files and directories at the root of the
17 //! repository. Each node is also fixed-size, defined by the `Node` struct.
17 //! repository. Each node is also fixed-size, defined by the `Node` struct.
18 //! Nodes in turn contain slices to variable-size paths, and to their own child
18 //! Nodes in turn contain slices to variable-size paths, and to their own child
19 //! nodes (if any) for nested files and directories.
19 //! nodes (if any) for nested files and directories.
20
20
21 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
21 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
22 use crate::dirstate_tree::path_with_basename::WithBasename;
22 use crate::dirstate_tree::path_with_basename::WithBasename;
23 use crate::errors::HgError;
23 use crate::errors::HgError;
24 use crate::utils::hg_path::HgPath;
24 use crate::utils::hg_path::HgPath;
25 use crate::DirstateEntry;
25 use crate::DirstateEntry;
26 use crate::DirstateError;
26 use crate::DirstateError;
27 use crate::DirstateParents;
27 use crate::DirstateParents;
28 use crate::EntryState;
28 use crate::EntryState;
29 use bytes_cast::unaligned::{I32Be, I64Be, U32Be};
29 use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be};
30 use bytes_cast::BytesCast;
30 use bytes_cast::BytesCast;
31 use format_bytes::format_bytes;
31 use format_bytes::format_bytes;
32 use std::borrow::Cow;
32 use std::borrow::Cow;
33 use std::convert::{TryFrom, TryInto};
33 use std::convert::{TryFrom, TryInto};
34 use std::time::{Duration, SystemTime, UNIX_EPOCH};
34 use std::time::{Duration, SystemTime, UNIX_EPOCH};
35
35
36 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
36 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
37 /// This a redundant sanity check more than an actual "magic number" since
37 /// This a redundant sanity check more than an actual "magic number" since
38 /// `.hg/requires` already governs which format should be used.
38 /// `.hg/requires` already governs which format should be used.
39 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
39 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
40
40
41 /// Keep space for 256-bit hashes
41 /// Keep space for 256-bit hashes
42 const STORED_NODE_ID_BYTES: usize = 32;
42 const STORED_NODE_ID_BYTES: usize = 32;
43
43
44 /// … even though only 160 bits are used for now, with SHA-1
44 /// … even though only 160 bits are used for now, with SHA-1
45 const USED_NODE_ID_BYTES: usize = 20;
45 const USED_NODE_ID_BYTES: usize = 20;
46
46
47 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
47 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
48 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
48 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
49
49
50 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
50 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
51 #[derive(BytesCast)]
51 #[derive(BytesCast)]
52 #[repr(C)]
52 #[repr(C)]
53 struct DocketHeader {
53 struct DocketHeader {
54 marker: [u8; V2_FORMAT_MARKER.len()],
54 marker: [u8; V2_FORMAT_MARKER.len()],
55 parent_1: [u8; STORED_NODE_ID_BYTES],
55 parent_1: [u8; STORED_NODE_ID_BYTES],
56 parent_2: [u8; STORED_NODE_ID_BYTES],
56 parent_2: [u8; STORED_NODE_ID_BYTES],
57
58 /// Counted in bytes
57 data_size: Size,
59 data_size: Size,
60
58 uuid_size: u8,
61 uuid_size: u8,
59 }
62 }
60
63
61 pub struct Docket<'on_disk> {
64 pub struct Docket<'on_disk> {
62 header: &'on_disk DocketHeader,
65 header: &'on_disk DocketHeader,
63 uuid: &'on_disk [u8],
66 uuid: &'on_disk [u8],
64 }
67 }
65
68
66 #[derive(BytesCast)]
69 #[derive(BytesCast)]
67 #[repr(C)]
70 #[repr(C)]
68 struct Root {
71 struct Root {
69 root_nodes: ChildNodes,
72 root_nodes: ChildNodes,
70 nodes_with_entry_count: Size,
73 nodes_with_entry_count: Size,
71 nodes_with_copy_source_count: Size,
74 nodes_with_copy_source_count: Size,
72
75
73 /// If non-zero, a hash of ignore files that were used for some previous
76 /// If non-zero, a hash of ignore files that were used for some previous
74 /// run of the `status` algorithm.
77 /// run of the `status` algorithm.
75 ///
78 ///
76 /// We define:
79 /// We define:
77 ///
80 ///
78 /// * "Root" ignore files are `.hgignore` at the root of the repository if
81 /// * "Root" ignore files are `.hgignore` at the root of the repository if
79 /// it exists, and files from `ui.ignore.*` config. This set of files is
82 /// it exists, and files from `ui.ignore.*` config. This set of files is
80 /// then sorted by the string representation of their path.
83 /// then sorted by the string representation of their path.
81 /// * The "expanded contents" of an ignore files is the byte string made
84 /// * The "expanded contents" of an ignore files is the byte string made
82 /// by concatenating its contents with the "expanded contents" of other
85 /// by concatenating its contents with the "expanded contents" of other
83 /// files included with `include:` or `subinclude:` files, in inclusion
86 /// files included with `include:` or `subinclude:` files, in inclusion
84 /// order. This definition is recursive, as included files can
87 /// order. This definition is recursive, as included files can
85 /// themselves include more files.
88 /// themselves include more files.
86 ///
89 ///
87 /// This hash is defined as the SHA-1 of the concatenation (in sorted
90 /// This hash is defined as the SHA-1 of the concatenation (in sorted
88 /// order) of the "expanded contents" of each "root" ignore file.
91 /// order) of the "expanded contents" of each "root" ignore file.
89 /// (Note that computing this does not require actually concatenating byte
92 /// (Note that computing this does not require actually concatenating byte
90 /// strings into contiguous memory, instead SHA-1 hashing can be done
93 /// strings into contiguous memory, instead SHA-1 hashing can be done
91 /// incrementally.)
94 /// incrementally.)
92 ignore_patterns_hash: IgnorePatternsHash,
95 ignore_patterns_hash: IgnorePatternsHash,
93 }
96 }
94
97
95 #[derive(BytesCast)]
98 #[derive(BytesCast)]
96 #[repr(C)]
99 #[repr(C)]
97 pub(super) struct Node {
100 pub(super) struct Node {
98 full_path: PathSlice,
101 full_path: PathSlice,
99
102
100 /// In bytes from `self.full_path.start`
103 /// In bytes from `self.full_path.start`
101 base_name_start: Size,
104 base_name_start: PathSize,
102
105
103 copy_source: OptPathSlice,
106 copy_source: OptPathSlice,
104 children: ChildNodes,
107 children: ChildNodes,
105 pub(super) descendants_with_entry_count: Size,
108 pub(super) descendants_with_entry_count: Size,
106 pub(super) tracked_descendants_count: Size,
109 pub(super) tracked_descendants_count: Size,
107
110
108 /// Depending on the value of `state`:
111 /// Depending on the value of `state`:
109 ///
112 ///
110 /// * A null byte: `data` is not used.
113 /// * A null byte: `data` is not used.
111 ///
114 ///
112 /// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together
115 /// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together
113 /// represent a dirstate entry like in the v1 format.
116 /// represent a dirstate entry like in the v1 format.
114 ///
117 ///
115 /// * A `d` ASCII byte: the bytes of `data` should instead be interpreted
118 /// * A `d` ASCII byte: the bytes of `data` should instead be interpreted
116 /// as the `Timestamp` for the mtime of a cached directory.
119 /// as the `Timestamp` for the mtime of a cached directory.
117 ///
120 ///
118 /// The presence of this state means that at some point, this path in
121 /// The presence of this state means that at some point, this path in
119 /// the working directory was observed:
122 /// the working directory was observed:
120 ///
123 ///
121 /// - To be a directory
124 /// - To be a directory
122 /// - With the modification time as given by `Timestamp`
125 /// - With the modification time as given by `Timestamp`
123 /// - That timestamp was already strictly in the past when observed,
126 /// - That timestamp was already strictly in the past when observed,
124 /// meaning that later changes cannot happen in the same clock tick
127 /// meaning that later changes cannot happen in the same clock tick
125 /// and must cause a different modification time (unless the system
128 /// and must cause a different modification time (unless the system
126 /// clock jumps back and we get unlucky, which is not impossible but
129 /// clock jumps back and we get unlucky, which is not impossible but
127 /// but deemed unlikely enough).
130 /// but deemed unlikely enough).
128 /// - All direct children of this directory (as returned by
131 /// - All direct children of this directory (as returned by
129 /// `std::fs::read_dir`) either have a corresponding dirstate node, or
132 /// `std::fs::read_dir`) either have a corresponding dirstate node, or
130 /// are ignored by ignore patterns whose hash is in
133 /// are ignored by ignore patterns whose hash is in
131 /// `Root::ignore_patterns_hash`.
134 /// `Root::ignore_patterns_hash`.
132 ///
135 ///
133 /// This means that if `std::fs::symlink_metadata` later reports the
136 /// This means that if `std::fs::symlink_metadata` later reports the
134 /// same modification time and ignored patterns haven’t changed, a run
137 /// same modification time and ignored patterns haven’t changed, a run
135 /// of status that is not listing ignored files can skip calling
138 /// of status that is not listing ignored files can skip calling
136 /// `std::fs::read_dir` again for this directory, iterate child
139 /// `std::fs::read_dir` again for this directory, iterate child
137 /// dirstate nodes instead.
140 /// dirstate nodes instead.
138 state: u8,
141 state: u8,
139 data: Entry,
142 data: Entry,
140 }
143 }
141
144
142 #[derive(BytesCast, Copy, Clone)]
145 #[derive(BytesCast, Copy, Clone)]
143 #[repr(C)]
146 #[repr(C)]
144 struct Entry {
147 struct Entry {
145 mode: I32Be,
148 mode: I32Be,
146 mtime: I32Be,
149 mtime: I32Be,
147 size: I32Be,
150 size: I32Be,
148 }
151 }
149
152
150 /// Duration since the Unix epoch
153 /// Duration since the Unix epoch
151 #[derive(BytesCast, Copy, Clone, PartialEq)]
154 #[derive(BytesCast, Copy, Clone, PartialEq)]
152 #[repr(C)]
155 #[repr(C)]
153 pub(super) struct Timestamp {
156 pub(super) struct Timestamp {
154 seconds: I64Be,
157 seconds: I64Be,
155
158
156 /// In `0 .. 1_000_000_000`.
159 /// In `0 .. 1_000_000_000`.
157 ///
160 ///
158 /// This timestamp is later or earlier than `(seconds, 0)` by this many
161 /// This timestamp is later or earlier than `(seconds, 0)` by this many
159 /// nanoseconds, if `seconds` is non-negative or negative, respectively.
162 /// nanoseconds, if `seconds` is non-negative or negative, respectively.
160 nanoseconds: U32Be,
163 nanoseconds: U32Be,
161 }
164 }
162
165
163 /// Counted in bytes from the start of the file
166 /// Counted in bytes from the start of the file
164 ///
167 ///
165 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
168 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
166 type Offset = U32Be;
169 type Offset = U32Be;
167
170
168 /// Counted in number of items
171 /// Counted in number of items
169 ///
172 ///
170 /// NOTE: not supporting directories with more than 4 billion direct children,
173 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
171 /// or filenames more than 4 GiB.
172 type Size = U32Be;
174 type Size = U32Be;
173
175
174 /// Location of consecutive, fixed-size items.
176 /// Counted in bytes
175 ///
177 ///
176 /// An item can be a single byte for paths, or a struct with
178 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
177 /// `derive(BytesCast)`.
179 type PathSize = U16Be;
178 #[derive(BytesCast, Copy, Clone)]
179 #[repr(C)]
180 struct Slice {
181 start: Offset,
182 len: Size,
183 }
184
180
185 /// A contiguous sequence of `len` times `Node`, representing the child nodes
181 /// A contiguous sequence of `len` times `Node`, representing the child nodes
186 /// of either some other node or of the repository root.
182 /// of either some other node or of the repository root.
187 ///
183 ///
188 /// Always sorted by ascending `full_path`, to allow binary search.
184 /// Always sorted by ascending `full_path`, to allow binary search.
189 /// Since nodes with the same parent nodes also have the same parent path,
185 /// Since nodes with the same parent nodes also have the same parent path,
190 /// only the `base_name`s need to be compared during binary search.
186 /// only the `base_name`s need to be compared during binary search.
191 type ChildNodes = Slice;
187 #[derive(BytesCast, Copy, Clone)]
188 #[repr(C)]
189 struct ChildNodes {
190 start: Offset,
191 len: Size,
192 }
192
193
193 /// A `HgPath` of `len` bytes
194 /// A `HgPath` of `len` bytes
194 type PathSlice = Slice;
195 #[derive(BytesCast, Copy, Clone)]
196 #[repr(C)]
197 struct PathSlice {
198 start: Offset,
199 len: PathSize,
200 }
195
201
196 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
202 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
197 type OptPathSlice = Slice;
203 type OptPathSlice = PathSlice;
198
204
199 /// Make sure that size-affecting changes are made knowingly
205 /// Make sure that size-affecting changes are made knowingly
200 fn _static_assert_size_of() {
206 fn _static_assert_size_of() {
201 let _ = std::mem::transmute::<DocketHeader, [u8; 81]>;
207 let _ = std::mem::transmute::<DocketHeader, [u8; 81]>;
202 let _ = std::mem::transmute::<Root, [u8; 36]>;
208 let _ = std::mem::transmute::<Root, [u8; 36]>;
203 let _ = std::mem::transmute::<Node, [u8; 49]>;
209 let _ = std::mem::transmute::<Node, [u8; 43]>;
204 }
210 }
205
211
206 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
212 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
207 ///
213 ///
208 /// This should only happen if Mercurial is buggy or a repository is corrupted.
214 /// This should only happen if Mercurial is buggy or a repository is corrupted.
209 #[derive(Debug)]
215 #[derive(Debug)]
210 pub struct DirstateV2ParseError;
216 pub struct DirstateV2ParseError;
211
217
212 impl From<DirstateV2ParseError> for HgError {
218 impl From<DirstateV2ParseError> for HgError {
213 fn from(_: DirstateV2ParseError) -> Self {
219 fn from(_: DirstateV2ParseError) -> Self {
214 HgError::corrupted("dirstate-v2 parse error")
220 HgError::corrupted("dirstate-v2 parse error")
215 }
221 }
216 }
222 }
217
223
218 impl From<DirstateV2ParseError> for crate::DirstateError {
224 impl From<DirstateV2ParseError> for crate::DirstateError {
219 fn from(error: DirstateV2ParseError) -> Self {
225 fn from(error: DirstateV2ParseError) -> Self {
220 HgError::from(error).into()
226 HgError::from(error).into()
221 }
227 }
222 }
228 }
223
229
224 impl<'on_disk> Docket<'on_disk> {
230 impl<'on_disk> Docket<'on_disk> {
225 pub fn parents(&self) -> DirstateParents {
231 pub fn parents(&self) -> DirstateParents {
226 use crate::Node;
232 use crate::Node;
227 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
233 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
228 .unwrap()
234 .unwrap()
229 .clone();
235 .clone();
230 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
236 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
231 .unwrap()
237 .unwrap()
232 .clone();
238 .clone();
233 DirstateParents { p1, p2 }
239 DirstateParents { p1, p2 }
234 }
240 }
235
241
236 pub fn data_size(&self) -> usize {
242 pub fn data_size(&self) -> usize {
237 // This `unwrap` could only panic on a 16-bit CPU
243 // This `unwrap` could only panic on a 16-bit CPU
238 self.header.data_size.get().try_into().unwrap()
244 self.header.data_size.get().try_into().unwrap()
239 }
245 }
240
246
241 pub fn data_filename(&self) -> String {
247 pub fn data_filename(&self) -> String {
242 String::from_utf8(format_bytes!(b"dirstate.{}.d", self.uuid)).unwrap()
248 String::from_utf8(format_bytes!(b"dirstate.{}.d", self.uuid)).unwrap()
243 }
249 }
244 }
250 }
245
251
246 pub fn read_docket(
252 pub fn read_docket(
247 on_disk: &[u8],
253 on_disk: &[u8],
248 ) -> Result<Docket<'_>, DirstateV2ParseError> {
254 ) -> Result<Docket<'_>, DirstateV2ParseError> {
249 let (header, uuid) =
255 let (header, uuid) =
250 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
256 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
251 let uuid_size = header.uuid_size as usize;
257 let uuid_size = header.uuid_size as usize;
252 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
258 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
253 Ok(Docket { header, uuid })
259 Ok(Docket { header, uuid })
254 } else {
260 } else {
255 Err(DirstateV2ParseError)
261 Err(DirstateV2ParseError)
256 }
262 }
257 }
263 }
258
264
259 fn read_root<'on_disk>(
265 fn read_root<'on_disk>(
260 on_disk: &'on_disk [u8],
266 on_disk: &'on_disk [u8],
261 ) -> Result<&'on_disk Root, DirstateV2ParseError> {
267 ) -> Result<&'on_disk Root, DirstateV2ParseError> {
262 // Find the `Root` at the end of the given slice
268 // Find the `Root` at the end of the given slice
263 let root_offset = on_disk
269 let root_offset = on_disk
264 .len()
270 .len()
265 .checked_sub(std::mem::size_of::<Root>())
271 .checked_sub(std::mem::size_of::<Root>())
266 // A non-empty slice too short is an error
272 // A non-empty slice too short is an error
267 .ok_or(DirstateV2ParseError)?;
273 .ok_or(DirstateV2ParseError)?;
268 let (root, _) = Root::from_bytes(&on_disk[root_offset..])
274 let (root, _) = Root::from_bytes(&on_disk[root_offset..])
269 .map_err(|_| DirstateV2ParseError)?;
275 .map_err(|_| DirstateV2ParseError)?;
270 Ok(root)
276 Ok(root)
271 }
277 }
272
278
273 pub(super) fn read<'on_disk>(
279 pub(super) fn read<'on_disk>(
274 on_disk: &'on_disk [u8],
280 on_disk: &'on_disk [u8],
275 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
281 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
276 if on_disk.is_empty() {
282 if on_disk.is_empty() {
277 return Ok(DirstateMap::empty(on_disk));
283 return Ok(DirstateMap::empty(on_disk));
278 }
284 }
279 let root = read_root(on_disk)?;
285 let root = read_root(on_disk)?;
280 let dirstate_map = DirstateMap {
286 let dirstate_map = DirstateMap {
281 on_disk,
287 on_disk,
282 root: dirstate_map::ChildNodes::OnDisk(read_slice::<Node>(
288 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
283 on_disk,
289 on_disk,
284 root.root_nodes,
290 root.root_nodes,
285 )?),
291 )?),
286 nodes_with_entry_count: root.nodes_with_entry_count.get(),
292 nodes_with_entry_count: root.nodes_with_entry_count.get(),
287 nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(),
293 nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(),
288 ignore_patterns_hash: root.ignore_patterns_hash,
294 ignore_patterns_hash: root.ignore_patterns_hash,
289 };
295 };
290 Ok(dirstate_map)
296 Ok(dirstate_map)
291 }
297 }
292
298
293 impl Node {
299 impl Node {
294 pub(super) fn full_path<'on_disk>(
300 pub(super) fn full_path<'on_disk>(
295 &self,
301 &self,
296 on_disk: &'on_disk [u8],
302 on_disk: &'on_disk [u8],
297 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
303 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
298 read_hg_path(on_disk, self.full_path)
304 read_hg_path(on_disk, self.full_path)
299 }
305 }
300
306
301 pub(super) fn base_name_start<'on_disk>(
307 pub(super) fn base_name_start<'on_disk>(
302 &self,
308 &self,
303 ) -> Result<usize, DirstateV2ParseError> {
309 ) -> Result<usize, DirstateV2ParseError> {
304 let start = self.base_name_start.get();
310 let start = self.base_name_start.get();
305 if start < self.full_path.len.get() {
311 if start < self.full_path.len.get() {
306 let start = usize::try_from(start)
312 let start = usize::try_from(start)
307 // u32 -> usize, could only panic on a 16-bit CPU
313 // u32 -> usize, could only panic on a 16-bit CPU
308 .expect("dirstate-v2 base_name_start out of bounds");
314 .expect("dirstate-v2 base_name_start out of bounds");
309 Ok(start)
315 Ok(start)
310 } else {
316 } else {
311 Err(DirstateV2ParseError)
317 Err(DirstateV2ParseError)
312 }
318 }
313 }
319 }
314
320
315 pub(super) fn base_name<'on_disk>(
321 pub(super) fn base_name<'on_disk>(
316 &self,
322 &self,
317 on_disk: &'on_disk [u8],
323 on_disk: &'on_disk [u8],
318 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
324 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
319 let full_path = self.full_path(on_disk)?;
325 let full_path = self.full_path(on_disk)?;
320 let base_name_start = self.base_name_start()?;
326 let base_name_start = self.base_name_start()?;
321 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
327 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
322 }
328 }
323
329
324 pub(super) fn path<'on_disk>(
330 pub(super) fn path<'on_disk>(
325 &self,
331 &self,
326 on_disk: &'on_disk [u8],
332 on_disk: &'on_disk [u8],
327 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
333 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
328 Ok(WithBasename::from_raw_parts(
334 Ok(WithBasename::from_raw_parts(
329 Cow::Borrowed(self.full_path(on_disk)?),
335 Cow::Borrowed(self.full_path(on_disk)?),
330 self.base_name_start()?,
336 self.base_name_start()?,
331 ))
337 ))
332 }
338 }
333
339
334 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
340 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
335 self.copy_source.start.get() != 0
341 self.copy_source.start.get() != 0
336 }
342 }
337
343
338 pub(super) fn copy_source<'on_disk>(
344 pub(super) fn copy_source<'on_disk>(
339 &self,
345 &self,
340 on_disk: &'on_disk [u8],
346 on_disk: &'on_disk [u8],
341 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
347 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
342 Ok(if self.has_copy_source() {
348 Ok(if self.has_copy_source() {
343 Some(read_hg_path(on_disk, self.copy_source)?)
349 Some(read_hg_path(on_disk, self.copy_source)?)
344 } else {
350 } else {
345 None
351 None
346 })
352 })
347 }
353 }
348
354
349 pub(super) fn node_data(
355 pub(super) fn node_data(
350 &self,
356 &self,
351 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
357 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
352 let entry = |state| {
358 let entry = |state| {
353 dirstate_map::NodeData::Entry(self.entry_with_given_state(state))
359 dirstate_map::NodeData::Entry(self.entry_with_given_state(state))
354 };
360 };
355
361
356 match self.state {
362 match self.state {
357 b'\0' => Ok(dirstate_map::NodeData::None),
363 b'\0' => Ok(dirstate_map::NodeData::None),
358 b'd' => Ok(dirstate_map::NodeData::CachedDirectory {
364 b'd' => Ok(dirstate_map::NodeData::CachedDirectory {
359 mtime: *self.data.as_timestamp(),
365 mtime: *self.data.as_timestamp(),
360 }),
366 }),
361 b'n' => Ok(entry(EntryState::Normal)),
367 b'n' => Ok(entry(EntryState::Normal)),
362 b'a' => Ok(entry(EntryState::Added)),
368 b'a' => Ok(entry(EntryState::Added)),
363 b'r' => Ok(entry(EntryState::Removed)),
369 b'r' => Ok(entry(EntryState::Removed)),
364 b'm' => Ok(entry(EntryState::Merged)),
370 b'm' => Ok(entry(EntryState::Merged)),
365 _ => Err(DirstateV2ParseError),
371 _ => Err(DirstateV2ParseError),
366 }
372 }
367 }
373 }
368
374
369 pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {
375 pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {
370 if self.state == b'd' {
376 if self.state == b'd' {
371 Some(self.data.as_timestamp())
377 Some(self.data.as_timestamp())
372 } else {
378 } else {
373 None
379 None
374 }
380 }
375 }
381 }
376
382
377 pub(super) fn state(
383 pub(super) fn state(
378 &self,
384 &self,
379 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
385 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
380 match self.state {
386 match self.state {
381 b'\0' | b'd' => Ok(None),
387 b'\0' | b'd' => Ok(None),
382 b'n' => Ok(Some(EntryState::Normal)),
388 b'n' => Ok(Some(EntryState::Normal)),
383 b'a' => Ok(Some(EntryState::Added)),
389 b'a' => Ok(Some(EntryState::Added)),
384 b'r' => Ok(Some(EntryState::Removed)),
390 b'r' => Ok(Some(EntryState::Removed)),
385 b'm' => Ok(Some(EntryState::Merged)),
391 b'm' => Ok(Some(EntryState::Merged)),
386 _ => Err(DirstateV2ParseError),
392 _ => Err(DirstateV2ParseError),
387 }
393 }
388 }
394 }
389
395
390 fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry {
396 fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry {
391 DirstateEntry {
397 DirstateEntry {
392 state,
398 state,
393 mode: self.data.mode.get(),
399 mode: self.data.mode.get(),
394 mtime: self.data.mtime.get(),
400 mtime: self.data.mtime.get(),
395 size: self.data.size.get(),
401 size: self.data.size.get(),
396 }
402 }
397 }
403 }
398
404
399 pub(super) fn entry(
405 pub(super) fn entry(
400 &self,
406 &self,
401 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
407 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
402 Ok(self
408 Ok(self
403 .state()?
409 .state()?
404 .map(|state| self.entry_with_given_state(state)))
410 .map(|state| self.entry_with_given_state(state)))
405 }
411 }
406
412
407 pub(super) fn children<'on_disk>(
413 pub(super) fn children<'on_disk>(
408 &self,
414 &self,
409 on_disk: &'on_disk [u8],
415 on_disk: &'on_disk [u8],
410 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
416 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
411 read_slice::<Node>(on_disk, self.children)
417 read_nodes(on_disk, self.children)
412 }
418 }
413
419
414 pub(super) fn to_in_memory_node<'on_disk>(
420 pub(super) fn to_in_memory_node<'on_disk>(
415 &self,
421 &self,
416 on_disk: &'on_disk [u8],
422 on_disk: &'on_disk [u8],
417 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
423 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
418 Ok(dirstate_map::Node {
424 Ok(dirstate_map::Node {
419 children: dirstate_map::ChildNodes::OnDisk(
425 children: dirstate_map::ChildNodes::OnDisk(
420 self.children(on_disk)?,
426 self.children(on_disk)?,
421 ),
427 ),
422 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
428 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
423 data: self.node_data()?,
429 data: self.node_data()?,
424 descendants_with_entry_count: self
430 descendants_with_entry_count: self
425 .descendants_with_entry_count
431 .descendants_with_entry_count
426 .get(),
432 .get(),
427 tracked_descendants_count: self.tracked_descendants_count.get(),
433 tracked_descendants_count: self.tracked_descendants_count.get(),
428 })
434 })
429 }
435 }
430 }
436 }
431
437
432 impl Entry {
438 impl Entry {
433 fn from_timestamp(timestamp: Timestamp) -> Self {
439 fn from_timestamp(timestamp: Timestamp) -> Self {
434 // Safety: both types implement the `ByteCast` trait, so we could
440 // Safety: both types implement the `ByteCast` trait, so we could
435 // safely use `as_bytes` and `from_bytes` to do this conversion. Using
441 // safely use `as_bytes` and `from_bytes` to do this conversion. Using
436 // `transmute` instead makes the compiler check that the two types
442 // `transmute` instead makes the compiler check that the two types
437 // have the same size, which eliminates the error case of
443 // have the same size, which eliminates the error case of
438 // `from_bytes`.
444 // `from_bytes`.
439 unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }
445 unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }
440 }
446 }
441
447
442 fn as_timestamp(&self) -> &Timestamp {
448 fn as_timestamp(&self) -> &Timestamp {
443 // Safety: same as above in `from_timestamp`
449 // Safety: same as above in `from_timestamp`
444 unsafe { &*(self as *const Entry as *const Timestamp) }
450 unsafe { &*(self as *const Entry as *const Timestamp) }
445 }
451 }
446 }
452 }
447
453
448 impl Timestamp {
454 impl Timestamp {
449 pub fn seconds(&self) -> i64 {
455 pub fn seconds(&self) -> i64 {
450 self.seconds.get()
456 self.seconds.get()
451 }
457 }
452 }
458 }
453
459
454 impl From<SystemTime> for Timestamp {
460 impl From<SystemTime> for Timestamp {
455 fn from(system_time: SystemTime) -> Self {
461 fn from(system_time: SystemTime) -> Self {
456 let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {
462 let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {
457 Ok(duration) => {
463 Ok(duration) => {
458 (duration.as_secs() as i64, duration.subsec_nanos())
464 (duration.as_secs() as i64, duration.subsec_nanos())
459 }
465 }
460 Err(error) => {
466 Err(error) => {
461 let negative = error.duration();
467 let negative = error.duration();
462 (-(negative.as_secs() as i64), negative.subsec_nanos())
468 (-(negative.as_secs() as i64), negative.subsec_nanos())
463 }
469 }
464 };
470 };
465 Timestamp {
471 Timestamp {
466 seconds: secs.into(),
472 seconds: secs.into(),
467 nanoseconds: nanos.into(),
473 nanoseconds: nanos.into(),
468 }
474 }
469 }
475 }
470 }
476 }
471
477
472 impl From<&'_ Timestamp> for SystemTime {
478 impl From<&'_ Timestamp> for SystemTime {
473 fn from(timestamp: &'_ Timestamp) -> Self {
479 fn from(timestamp: &'_ Timestamp) -> Self {
474 let secs = timestamp.seconds.get();
480 let secs = timestamp.seconds.get();
475 let nanos = timestamp.nanoseconds.get();
481 let nanos = timestamp.nanoseconds.get();
476 if secs >= 0 {
482 if secs >= 0 {
477 UNIX_EPOCH + Duration::new(secs as u64, nanos)
483 UNIX_EPOCH + Duration::new(secs as u64, nanos)
478 } else {
484 } else {
479 UNIX_EPOCH - Duration::new((-secs) as u64, nanos)
485 UNIX_EPOCH - Duration::new((-secs) as u64, nanos)
480 }
486 }
481 }
487 }
482 }
488 }
483
489
484 fn read_hg_path(
490 fn read_hg_path(
485 on_disk: &[u8],
491 on_disk: &[u8],
486 slice: Slice,
492 slice: PathSlice,
487 ) -> Result<&HgPath, DirstateV2ParseError> {
493 ) -> Result<&HgPath, DirstateV2ParseError> {
488 let bytes = read_slice::<u8>(on_disk, slice)?;
494 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
489 Ok(HgPath::new(bytes))
490 }
495 }
491
496
492 fn read_slice<T>(
497 fn read_nodes(
493 on_disk: &[u8],
498 on_disk: &[u8],
494 slice: Slice,
499 slice: ChildNodes,
500 ) -> Result<&[Node], DirstateV2ParseError> {
501 read_slice(on_disk, slice.start, slice.len.get())
502 }
503
504 fn read_slice<T, Len>(
505 on_disk: &[u8],
506 start: Offset,
507 len: Len,
495 ) -> Result<&[T], DirstateV2ParseError>
508 ) -> Result<&[T], DirstateV2ParseError>
496 where
509 where
497 T: BytesCast,
510 T: BytesCast,
511 Len: TryInto<usize>,
498 {
512 {
499 // Either `usize::MAX` would result in "out of bounds" error since a single
513 // Either `usize::MAX` would result in "out of bounds" error since a single
500 // `&[u8]` cannot occupy the entire addess space.
514 // `&[u8]` cannot occupy the entire addess space.
501 let start = usize::try_from(slice.start.get()).unwrap_or(std::usize::MAX);
515 let start = start.get().try_into().unwrap_or(std::usize::MAX);
502 let len = usize::try_from(slice.len.get()).unwrap_or(std::usize::MAX);
516 let len = len.try_into().unwrap_or(std::usize::MAX);
503 on_disk
517 on_disk
504 .get(start..)
518 .get(start..)
505 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
519 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
506 .map(|(slice, _rest)| slice)
520 .map(|(slice, _rest)| slice)
507 .ok_or_else(|| DirstateV2ParseError)
521 .ok_or_else(|| DirstateV2ParseError)
508 }
522 }
509
523
510 pub(crate) fn for_each_tracked_path<'on_disk>(
524 pub(crate) fn for_each_tracked_path<'on_disk>(
511 on_disk: &'on_disk [u8],
525 on_disk: &'on_disk [u8],
512 mut f: impl FnMut(&'on_disk HgPath),
526 mut f: impl FnMut(&'on_disk HgPath),
513 ) -> Result<(), DirstateV2ParseError> {
527 ) -> Result<(), DirstateV2ParseError> {
514 let root = read_root(on_disk)?;
528 let root = read_root(on_disk)?;
515 fn recur<'on_disk>(
529 fn recur<'on_disk>(
516 on_disk: &'on_disk [u8],
530 on_disk: &'on_disk [u8],
517 nodes: Slice,
531 nodes: ChildNodes,
518 f: &mut impl FnMut(&'on_disk HgPath),
532 f: &mut impl FnMut(&'on_disk HgPath),
519 ) -> Result<(), DirstateV2ParseError> {
533 ) -> Result<(), DirstateV2ParseError> {
520 for node in read_slice::<Node>(on_disk, nodes)? {
534 for node in read_nodes(on_disk, nodes)? {
521 if let Some(state) = node.state()? {
535 if let Some(state) = node.state()? {
522 if state.is_tracked() {
536 if state.is_tracked() {
523 f(node.full_path(on_disk)?)
537 f(node.full_path(on_disk)?)
524 }
538 }
525 }
539 }
526 recur(on_disk, node.children, f)?
540 recur(on_disk, node.children, f)?
527 }
541 }
528 Ok(())
542 Ok(())
529 }
543 }
530 recur(on_disk, root.root_nodes, &mut f)
544 recur(on_disk, root.root_nodes, &mut f)
531 }
545 }
532
546
533 pub(super) fn write(
547 pub(super) fn write(
534 dirstate_map: &mut DirstateMap,
548 dirstate_map: &mut DirstateMap,
535 ) -> Result<Vec<u8>, DirstateError> {
549 ) -> Result<Vec<u8>, DirstateError> {
536 let root_len = std::mem::size_of::<Root>();
550 let root_len = std::mem::size_of::<Root>();
537
551
538 // This ignores the space for paths, and for nodes without an entry.
552 // This ignores the space for paths, and for nodes without an entry.
539 // TODO: better estimate? Skip the `Vec` and write to a file directly?
553 // TODO: better estimate? Skip the `Vec` and write to a file directly?
540 let size_guess = root_len
554 let size_guess = root_len
541 + std::mem::size_of::<Node>()
555 + std::mem::size_of::<Node>()
542 * dirstate_map.nodes_with_entry_count as usize;
556 * dirstate_map.nodes_with_entry_count as usize;
543 let mut out = Vec::with_capacity(size_guess);
557 let mut out = Vec::with_capacity(size_guess);
544
558
545 let root_nodes =
559 let root_nodes =
546 write_nodes(dirstate_map, dirstate_map.root.as_ref(), &mut out)?;
560 write_nodes(dirstate_map, dirstate_map.root.as_ref(), &mut out)?;
547
561
548 let root = Root {
562 let root = Root {
549 root_nodes,
563 root_nodes,
550 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
564 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
551 nodes_with_copy_source_count: dirstate_map
565 nodes_with_copy_source_count: dirstate_map
552 .nodes_with_copy_source_count
566 .nodes_with_copy_source_count
553 .into(),
567 .into(),
554 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
568 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
555 };
569 };
556 out.extend(root.as_bytes());
570 out.extend(root.as_bytes());
557 Ok(out)
571 Ok(out)
558 }
572 }
559
573
560 fn write_nodes(
574 fn write_nodes(
561 dirstate_map: &DirstateMap,
575 dirstate_map: &DirstateMap,
562 nodes: dirstate_map::ChildNodesRef,
576 nodes: dirstate_map::ChildNodesRef,
563 out: &mut Vec<u8>,
577 out: &mut Vec<u8>,
564 ) -> Result<ChildNodes, DirstateError> {
578 ) -> Result<ChildNodes, DirstateError> {
565 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
579 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
566 // order. Sort to enable binary search in the written file.
580 // order. Sort to enable binary search in the written file.
567 let nodes = nodes.sorted();
581 let nodes = nodes.sorted();
582 let nodes_len = nodes.len();
568
583
569 // First accumulate serialized nodes in a `Vec`
584 // First accumulate serialized nodes in a `Vec`
570 let mut on_disk_nodes = Vec::with_capacity(nodes.len());
585 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
571 for node in nodes {
586 for node in nodes {
572 let children = write_nodes(
587 let children = write_nodes(
573 dirstate_map,
588 dirstate_map,
574 node.children(dirstate_map.on_disk)?,
589 node.children(dirstate_map.on_disk)?,
575 out,
590 out,
576 )?;
591 )?;
577 let full_path = node.full_path(dirstate_map.on_disk)?;
592 let full_path = node.full_path(dirstate_map.on_disk)?;
578 let full_path = write_slice::<u8>(full_path.as_bytes(), out);
593 let full_path = write_path(full_path.as_bytes(), out);
579 let copy_source =
594 let copy_source =
580 if let Some(source) = node.copy_source(dirstate_map.on_disk)? {
595 if let Some(source) = node.copy_source(dirstate_map.on_disk)? {
581 write_slice::<u8>(source.as_bytes(), out)
596 write_path(source.as_bytes(), out)
582 } else {
597 } else {
583 Slice {
598 PathSlice {
584 start: 0.into(),
599 start: 0.into(),
585 len: 0.into(),
600 len: 0.into(),
586 }
601 }
587 };
602 };
588 on_disk_nodes.push(match node {
603 on_disk_nodes.push(match node {
589 NodeRef::InMemory(path, node) => {
604 NodeRef::InMemory(path, node) => {
590 let (state, data) = match &node.data {
605 let (state, data) = match &node.data {
591 dirstate_map::NodeData::Entry(entry) => (
606 dirstate_map::NodeData::Entry(entry) => (
592 entry.state.into(),
607 entry.state.into(),
593 Entry {
608 Entry {
594 mode: entry.mode.into(),
609 mode: entry.mode.into(),
595 mtime: entry.mtime.into(),
610 mtime: entry.mtime.into(),
596 size: entry.size.into(),
611 size: entry.size.into(),
597 },
612 },
598 ),
613 ),
599 dirstate_map::NodeData::CachedDirectory { mtime } => {
614 dirstate_map::NodeData::CachedDirectory { mtime } => {
600 (b'd', Entry::from_timestamp(*mtime))
615 (b'd', Entry::from_timestamp(*mtime))
601 }
616 }
602 dirstate_map::NodeData::None => (
617 dirstate_map::NodeData::None => (
603 b'\0',
618 b'\0',
604 Entry {
619 Entry {
605 mode: 0.into(),
620 mode: 0.into(),
606 mtime: 0.into(),
621 mtime: 0.into(),
607 size: 0.into(),
622 size: 0.into(),
608 },
623 },
609 ),
624 ),
610 };
625 };
611 Node {
626 Node {
612 children,
627 children,
613 copy_source,
628 copy_source,
614 full_path,
629 full_path,
615 base_name_start: u32::try_from(path.base_name_start())
630 base_name_start: u16::try_from(path.base_name_start())
616 // Could only panic for paths over 4 GiB
631 // Could only panic for paths over 64 KiB
617 .expect("dirstate-v2 offset overflow")
632 .expect("dirstate-v2 path length overflow")
618 .into(),
633 .into(),
619 descendants_with_entry_count: node
634 descendants_with_entry_count: node
620 .descendants_with_entry_count
635 .descendants_with_entry_count
621 .into(),
636 .into(),
622 tracked_descendants_count: node
637 tracked_descendants_count: node
623 .tracked_descendants_count
638 .tracked_descendants_count
624 .into(),
639 .into(),
625 state,
640 state,
626 data,
641 data,
627 }
642 }
628 }
643 }
629 NodeRef::OnDisk(node) => Node {
644 NodeRef::OnDisk(node) => Node {
630 children,
645 children,
631 copy_source,
646 copy_source,
632 full_path,
647 full_path,
633 ..*node
648 ..*node
634 },
649 },
635 })
650 })
636 }
651 }
637 // … so we can write them contiguously
652 // … so we can write them contiguously, after writing everything else they
638 Ok(write_slice::<Node>(&on_disk_nodes, out))
653 // refer to.
654 let start = current_offset(out);
655 let len = u32::try_from(nodes_len)
656 // Could only panic with over 4 billion nodes
657 .expect("dirstate-v2 path length overflow")
658 .into();
659 out.extend(on_disk_nodes.as_bytes());
660 Ok(ChildNodes { start, len })
639 }
661 }
640
662
641 fn write_slice<T>(slice: &[T], out: &mut Vec<u8>) -> Slice
663 fn current_offset(out: &Vec<u8>) -> Offset {
642 where
664 u32::try_from(out.len())
643 T: BytesCast,
644 {
645 let start = u32::try_from(out.len())
646 // Could only panic for a dirstate file larger than 4 GiB
665 // Could only panic for a dirstate file larger than 4 GiB
647 .expect("dirstate-v2 offset overflow")
666 .expect("dirstate-v2 offset overflow")
648 .into();
667 .into()
649 let len = u32::try_from(slice.len())
668 }
650 // Could only panic for paths over 4 GiB or nodes with over 4 billions
669
651 // child nodes
670 fn write_path(slice: &[u8], out: &mut Vec<u8>) -> PathSlice {
652 .expect("dirstate-v2 offset overflow")
671 let start = current_offset(out);
672 let len = u16::try_from(slice.len())
673 // Could only panic for paths over 64 KiB
674 .expect("dirstate-v2 path length overflow")
653 .into();
675 .into();
654 out.extend(slice.as_bytes());
676 out.extend(slice.as_bytes());
655 Slice { start, len }
677 PathSlice { start, len }
656 }
678 }
General Comments 0
You need to be logged in to leave comments. Login now