##// END OF EJS Templates
rust-dirstate: trace append/no append to help debugging
Raphaël Gomès -
r51074:f2e13d8d stable
parent child Browse files
Show More
@@ -1,878 +1,883
1 //! The "version 2" disk representation of the dirstate
1 //! The "version 2" disk representation of the dirstate
2 //!
2 //!
3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
4
4
5 use crate::dirstate::{DirstateV2Data, TruncatedTimestamp};
5 use crate::dirstate::{DirstateV2Data, TruncatedTimestamp};
6 use crate::dirstate_tree::dirstate_map::DirstateVersion;
6 use crate::dirstate_tree::dirstate_map::DirstateVersion;
7 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
7 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
8 use crate::dirstate_tree::path_with_basename::WithBasename;
8 use crate::dirstate_tree::path_with_basename::WithBasename;
9 use crate::errors::HgError;
9 use crate::errors::HgError;
10 use crate::utils::hg_path::HgPath;
10 use crate::utils::hg_path::HgPath;
11 use crate::DirstateEntry;
11 use crate::DirstateEntry;
12 use crate::DirstateError;
12 use crate::DirstateError;
13 use crate::DirstateParents;
13 use crate::DirstateParents;
14 use bitflags::bitflags;
14 use bitflags::bitflags;
15 use bytes_cast::unaligned::{U16Be, U32Be};
15 use bytes_cast::unaligned::{U16Be, U32Be};
16 use bytes_cast::BytesCast;
16 use bytes_cast::BytesCast;
17 use format_bytes::format_bytes;
17 use format_bytes::format_bytes;
18 use rand::Rng;
18 use rand::Rng;
19 use std::borrow::Cow;
19 use std::borrow::Cow;
20 use std::convert::{TryFrom, TryInto};
20 use std::convert::{TryFrom, TryInto};
21 use std::fmt::Write;
21 use std::fmt::Write;
22
22
23 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
23 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
24 /// This a redundant sanity check more than an actual "magic number" since
24 /// This a redundant sanity check more than an actual "magic number" since
25 /// `.hg/requires` already governs which format should be used.
25 /// `.hg/requires` already governs which format should be used.
26 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
26 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
27
27
28 /// Keep space for 256-bit hashes
28 /// Keep space for 256-bit hashes
29 const STORED_NODE_ID_BYTES: usize = 32;
29 const STORED_NODE_ID_BYTES: usize = 32;
30
30
31 /// … even though only 160 bits are used for now, with SHA-1
31 /// … even though only 160 bits are used for now, with SHA-1
32 const USED_NODE_ID_BYTES: usize = 20;
32 const USED_NODE_ID_BYTES: usize = 20;
33
33
34 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
34 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
35 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
35 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
36
36
37 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
37 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
38 const TREE_METADATA_SIZE: usize = 44;
38 const TREE_METADATA_SIZE: usize = 44;
39 const NODE_SIZE: usize = 44;
39 const NODE_SIZE: usize = 44;
40
40
41 /// Make sure that size-affecting changes are made knowingly
41 /// Make sure that size-affecting changes are made knowingly
42 #[allow(unused)]
42 #[allow(unused)]
43 fn static_assert_size_of() {
43 fn static_assert_size_of() {
44 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
44 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
45 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
45 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
46 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
46 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
47 }
47 }
48
48
49 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
49 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
50 #[derive(BytesCast)]
50 #[derive(BytesCast)]
51 #[repr(C)]
51 #[repr(C)]
52 struct DocketHeader {
52 struct DocketHeader {
53 marker: [u8; V2_FORMAT_MARKER.len()],
53 marker: [u8; V2_FORMAT_MARKER.len()],
54 parent_1: [u8; STORED_NODE_ID_BYTES],
54 parent_1: [u8; STORED_NODE_ID_BYTES],
55 parent_2: [u8; STORED_NODE_ID_BYTES],
55 parent_2: [u8; STORED_NODE_ID_BYTES],
56
56
57 metadata: TreeMetadata,
57 metadata: TreeMetadata,
58
58
59 /// Counted in bytes
59 /// Counted in bytes
60 data_size: Size,
60 data_size: Size,
61
61
62 uuid_size: u8,
62 uuid_size: u8,
63 }
63 }
64
64
65 pub struct Docket<'on_disk> {
65 pub struct Docket<'on_disk> {
66 header: &'on_disk DocketHeader,
66 header: &'on_disk DocketHeader,
67 pub uuid: &'on_disk [u8],
67 pub uuid: &'on_disk [u8],
68 }
68 }
69
69
70 /// Fields are documented in the *Tree metadata in the docket file*
70 /// Fields are documented in the *Tree metadata in the docket file*
71 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
71 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
72 #[derive(BytesCast)]
72 #[derive(BytesCast)]
73 #[repr(C)]
73 #[repr(C)]
74 pub struct TreeMetadata {
74 pub struct TreeMetadata {
75 root_nodes: ChildNodes,
75 root_nodes: ChildNodes,
76 nodes_with_entry_count: Size,
76 nodes_with_entry_count: Size,
77 nodes_with_copy_source_count: Size,
77 nodes_with_copy_source_count: Size,
78 unreachable_bytes: Size,
78 unreachable_bytes: Size,
79 unused: [u8; 4],
79 unused: [u8; 4],
80
80
81 /// See *Optional hash of ignore patterns* section of
81 /// See *Optional hash of ignore patterns* section of
82 /// `mercurial/helptext/internals/dirstate-v2.txt`
82 /// `mercurial/helptext/internals/dirstate-v2.txt`
83 ignore_patterns_hash: IgnorePatternsHash,
83 ignore_patterns_hash: IgnorePatternsHash,
84 }
84 }
85
85
86 /// Fields are documented in the *The data file format*
86 /// Fields are documented in the *The data file format*
87 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
87 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
88 #[derive(BytesCast, Debug)]
88 #[derive(BytesCast, Debug)]
89 #[repr(C)]
89 #[repr(C)]
90 pub(super) struct Node {
90 pub(super) struct Node {
91 full_path: PathSlice,
91 full_path: PathSlice,
92
92
93 /// In bytes from `self.full_path.start`
93 /// In bytes from `self.full_path.start`
94 base_name_start: PathSize,
94 base_name_start: PathSize,
95
95
96 copy_source: OptPathSlice,
96 copy_source: OptPathSlice,
97 children: ChildNodes,
97 children: ChildNodes,
98 pub(super) descendants_with_entry_count: Size,
98 pub(super) descendants_with_entry_count: Size,
99 pub(super) tracked_descendants_count: Size,
99 pub(super) tracked_descendants_count: Size,
100 flags: U16Be,
100 flags: U16Be,
101 size: U32Be,
101 size: U32Be,
102 mtime: PackedTruncatedTimestamp,
102 mtime: PackedTruncatedTimestamp,
103 }
103 }
104
104
105 bitflags! {
105 bitflags! {
106 #[repr(C)]
106 #[repr(C)]
107 struct Flags: u16 {
107 struct Flags: u16 {
108 const WDIR_TRACKED = 1 << 0;
108 const WDIR_TRACKED = 1 << 0;
109 const P1_TRACKED = 1 << 1;
109 const P1_TRACKED = 1 << 1;
110 const P2_INFO = 1 << 2;
110 const P2_INFO = 1 << 2;
111 const MODE_EXEC_PERM = 1 << 3;
111 const MODE_EXEC_PERM = 1 << 3;
112 const MODE_IS_SYMLINK = 1 << 4;
112 const MODE_IS_SYMLINK = 1 << 4;
113 const HAS_FALLBACK_EXEC = 1 << 5;
113 const HAS_FALLBACK_EXEC = 1 << 5;
114 const FALLBACK_EXEC = 1 << 6;
114 const FALLBACK_EXEC = 1 << 6;
115 const HAS_FALLBACK_SYMLINK = 1 << 7;
115 const HAS_FALLBACK_SYMLINK = 1 << 7;
116 const FALLBACK_SYMLINK = 1 << 8;
116 const FALLBACK_SYMLINK = 1 << 8;
117 const EXPECTED_STATE_IS_MODIFIED = 1 << 9;
117 const EXPECTED_STATE_IS_MODIFIED = 1 << 9;
118 const HAS_MODE_AND_SIZE = 1 <<10;
118 const HAS_MODE_AND_SIZE = 1 <<10;
119 const HAS_MTIME = 1 <<11;
119 const HAS_MTIME = 1 <<11;
120 const MTIME_SECOND_AMBIGUOUS = 1 << 12;
120 const MTIME_SECOND_AMBIGUOUS = 1 << 12;
121 const DIRECTORY = 1 <<13;
121 const DIRECTORY = 1 <<13;
122 const ALL_UNKNOWN_RECORDED = 1 <<14;
122 const ALL_UNKNOWN_RECORDED = 1 <<14;
123 const ALL_IGNORED_RECORDED = 1 <<15;
123 const ALL_IGNORED_RECORDED = 1 <<15;
124 }
124 }
125 }
125 }
126
126
127 /// Duration since the Unix epoch
127 /// Duration since the Unix epoch
128 #[derive(BytesCast, Copy, Clone, Debug)]
128 #[derive(BytesCast, Copy, Clone, Debug)]
129 #[repr(C)]
129 #[repr(C)]
130 struct PackedTruncatedTimestamp {
130 struct PackedTruncatedTimestamp {
131 truncated_seconds: U32Be,
131 truncated_seconds: U32Be,
132 nanoseconds: U32Be,
132 nanoseconds: U32Be,
133 }
133 }
134
134
135 /// Counted in bytes from the start of the file
135 /// Counted in bytes from the start of the file
136 ///
136 ///
137 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
137 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
138 type Offset = U32Be;
138 type Offset = U32Be;
139
139
140 /// Counted in number of items
140 /// Counted in number of items
141 ///
141 ///
142 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
142 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
143 type Size = U32Be;
143 type Size = U32Be;
144
144
145 /// Counted in bytes
145 /// Counted in bytes
146 ///
146 ///
147 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
147 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
148 type PathSize = U16Be;
148 type PathSize = U16Be;
149
149
150 /// A contiguous sequence of `len` times `Node`, representing the child nodes
150 /// A contiguous sequence of `len` times `Node`, representing the child nodes
151 /// of either some other node or of the repository root.
151 /// of either some other node or of the repository root.
152 ///
152 ///
153 /// Always sorted by ascending `full_path`, to allow binary search.
153 /// Always sorted by ascending `full_path`, to allow binary search.
154 /// Since nodes with the same parent nodes also have the same parent path,
154 /// Since nodes with the same parent nodes also have the same parent path,
155 /// only the `base_name`s need to be compared during binary search.
155 /// only the `base_name`s need to be compared during binary search.
156 #[derive(BytesCast, Copy, Clone, Debug)]
156 #[derive(BytesCast, Copy, Clone, Debug)]
157 #[repr(C)]
157 #[repr(C)]
158 struct ChildNodes {
158 struct ChildNodes {
159 start: Offset,
159 start: Offset,
160 len: Size,
160 len: Size,
161 }
161 }
162
162
163 /// A `HgPath` of `len` bytes
163 /// A `HgPath` of `len` bytes
164 #[derive(BytesCast, Copy, Clone, Debug)]
164 #[derive(BytesCast, Copy, Clone, Debug)]
165 #[repr(C)]
165 #[repr(C)]
166 struct PathSlice {
166 struct PathSlice {
167 start: Offset,
167 start: Offset,
168 len: PathSize,
168 len: PathSize,
169 }
169 }
170
170
171 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
171 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
172 type OptPathSlice = PathSlice;
172 type OptPathSlice = PathSlice;
173
173
174 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
174 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
175 ///
175 ///
176 /// This should only happen if Mercurial is buggy or a repository is corrupted.
176 /// This should only happen if Mercurial is buggy or a repository is corrupted.
177 #[derive(Debug)]
177 #[derive(Debug)]
178 pub struct DirstateV2ParseError {
178 pub struct DirstateV2ParseError {
179 message: String,
179 message: String,
180 }
180 }
181
181
182 impl DirstateV2ParseError {
182 impl DirstateV2ParseError {
183 pub fn new<S: Into<String>>(message: S) -> Self {
183 pub fn new<S: Into<String>>(message: S) -> Self {
184 Self {
184 Self {
185 message: message.into(),
185 message: message.into(),
186 }
186 }
187 }
187 }
188 }
188 }
189
189
190 impl From<DirstateV2ParseError> for HgError {
190 impl From<DirstateV2ParseError> for HgError {
191 fn from(e: DirstateV2ParseError) -> Self {
191 fn from(e: DirstateV2ParseError) -> Self {
192 HgError::corrupted(format!("dirstate-v2 parse error: {}", e.message))
192 HgError::corrupted(format!("dirstate-v2 parse error: {}", e.message))
193 }
193 }
194 }
194 }
195
195
196 impl From<DirstateV2ParseError> for crate::DirstateError {
196 impl From<DirstateV2ParseError> for crate::DirstateError {
197 fn from(error: DirstateV2ParseError) -> Self {
197 fn from(error: DirstateV2ParseError) -> Self {
198 HgError::from(error).into()
198 HgError::from(error).into()
199 }
199 }
200 }
200 }
201
201
202 impl TreeMetadata {
202 impl TreeMetadata {
203 pub fn as_bytes(&self) -> &[u8] {
203 pub fn as_bytes(&self) -> &[u8] {
204 BytesCast::as_bytes(self)
204 BytesCast::as_bytes(self)
205 }
205 }
206 }
206 }
207
207
208 impl<'on_disk> Docket<'on_disk> {
208 impl<'on_disk> Docket<'on_disk> {
209 /// Generate the identifier for a new data file
209 /// Generate the identifier for a new data file
210 ///
210 ///
211 /// TODO: support the `HGTEST_UUIDFILE` environment variable.
211 /// TODO: support the `HGTEST_UUIDFILE` environment variable.
212 /// See `mercurial/revlogutils/docket.py`
212 /// See `mercurial/revlogutils/docket.py`
213 pub fn new_uid() -> String {
213 pub fn new_uid() -> String {
214 const ID_LENGTH: usize = 8;
214 const ID_LENGTH: usize = 8;
215 let mut id = String::with_capacity(ID_LENGTH);
215 let mut id = String::with_capacity(ID_LENGTH);
216 let mut rng = rand::thread_rng();
216 let mut rng = rand::thread_rng();
217 for _ in 0..ID_LENGTH {
217 for _ in 0..ID_LENGTH {
218 // One random hexadecimal digit.
218 // One random hexadecimal digit.
219 // `unwrap` never panics because `impl Write for String`
219 // `unwrap` never panics because `impl Write for String`
220 // never returns an error.
220 // never returns an error.
221 write!(&mut id, "{:x}", rng.gen_range(0..16)).unwrap();
221 write!(&mut id, "{:x}", rng.gen_range(0..16)).unwrap();
222 }
222 }
223 id
223 id
224 }
224 }
225
225
226 pub fn serialize(
226 pub fn serialize(
227 parents: DirstateParents,
227 parents: DirstateParents,
228 tree_metadata: TreeMetadata,
228 tree_metadata: TreeMetadata,
229 data_size: u64,
229 data_size: u64,
230 uuid: &[u8],
230 uuid: &[u8],
231 ) -> Result<Vec<u8>, std::num::TryFromIntError> {
231 ) -> Result<Vec<u8>, std::num::TryFromIntError> {
232 let header = DocketHeader {
232 let header = DocketHeader {
233 marker: *V2_FORMAT_MARKER,
233 marker: *V2_FORMAT_MARKER,
234 parent_1: parents.p1.pad_to_256_bits(),
234 parent_1: parents.p1.pad_to_256_bits(),
235 parent_2: parents.p2.pad_to_256_bits(),
235 parent_2: parents.p2.pad_to_256_bits(),
236 metadata: tree_metadata,
236 metadata: tree_metadata,
237 data_size: u32::try_from(data_size)?.into(),
237 data_size: u32::try_from(data_size)?.into(),
238 uuid_size: uuid.len().try_into()?,
238 uuid_size: uuid.len().try_into()?,
239 };
239 };
240 let header = header.as_bytes();
240 let header = header.as_bytes();
241 let mut docket = Vec::with_capacity(header.len() + uuid.len());
241 let mut docket = Vec::with_capacity(header.len() + uuid.len());
242 docket.extend_from_slice(header);
242 docket.extend_from_slice(header);
243 docket.extend_from_slice(uuid);
243 docket.extend_from_slice(uuid);
244 Ok(docket)
244 Ok(docket)
245 }
245 }
246
246
247 pub fn parents(&self) -> DirstateParents {
247 pub fn parents(&self) -> DirstateParents {
248 use crate::Node;
248 use crate::Node;
249 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
249 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
250 .unwrap()
250 .unwrap()
251 .clone();
251 .clone();
252 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
252 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
253 .unwrap()
253 .unwrap()
254 .clone();
254 .clone();
255 DirstateParents { p1, p2 }
255 DirstateParents { p1, p2 }
256 }
256 }
257
257
258 pub fn tree_metadata(&self) -> &[u8] {
258 pub fn tree_metadata(&self) -> &[u8] {
259 self.header.metadata.as_bytes()
259 self.header.metadata.as_bytes()
260 }
260 }
261
261
262 pub fn data_size(&self) -> usize {
262 pub fn data_size(&self) -> usize {
263 // This `unwrap` could only panic on a 16-bit CPU
263 // This `unwrap` could only panic on a 16-bit CPU
264 self.header.data_size.get().try_into().unwrap()
264 self.header.data_size.get().try_into().unwrap()
265 }
265 }
266
266
267 pub fn data_filename(&self) -> String {
267 pub fn data_filename(&self) -> String {
268 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
268 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
269 }
269 }
270 }
270 }
271
271
272 pub fn read_docket(
272 pub fn read_docket(
273 on_disk: &[u8],
273 on_disk: &[u8],
274 ) -> Result<Docket<'_>, DirstateV2ParseError> {
274 ) -> Result<Docket<'_>, DirstateV2ParseError> {
275 let (header, uuid) = DocketHeader::from_bytes(on_disk).map_err(|e| {
275 let (header, uuid) = DocketHeader::from_bytes(on_disk).map_err(|e| {
276 DirstateV2ParseError::new(format!("when reading docket, {}", e))
276 DirstateV2ParseError::new(format!("when reading docket, {}", e))
277 })?;
277 })?;
278 let uuid_size = header.uuid_size as usize;
278 let uuid_size = header.uuid_size as usize;
279 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
279 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
280 Ok(Docket { header, uuid })
280 Ok(Docket { header, uuid })
281 } else {
281 } else {
282 Err(DirstateV2ParseError::new(
282 Err(DirstateV2ParseError::new(
283 "invalid format marker or uuid size",
283 "invalid format marker or uuid size",
284 ))
284 ))
285 }
285 }
286 }
286 }
287
287
288 pub(super) fn read<'on_disk>(
288 pub(super) fn read<'on_disk>(
289 on_disk: &'on_disk [u8],
289 on_disk: &'on_disk [u8],
290 metadata: &[u8],
290 metadata: &[u8],
291 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
291 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
292 if on_disk.is_empty() {
292 if on_disk.is_empty() {
293 let mut map = DirstateMap::empty(on_disk);
293 let mut map = DirstateMap::empty(on_disk);
294 map.dirstate_version = DirstateVersion::V2;
294 map.dirstate_version = DirstateVersion::V2;
295 return Ok(map);
295 return Ok(map);
296 }
296 }
297 let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| {
297 let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| {
298 DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e))
298 DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e))
299 })?;
299 })?;
300 let dirstate_map = DirstateMap {
300 let dirstate_map = DirstateMap {
301 on_disk,
301 on_disk,
302 root: dirstate_map::ChildNodes::OnDisk(
302 root: dirstate_map::ChildNodes::OnDisk(
303 read_nodes(on_disk, meta.root_nodes).map_err(|mut e| {
303 read_nodes(on_disk, meta.root_nodes).map_err(|mut e| {
304 e.message = format!("{}, when reading root notes", e.message);
304 e.message = format!("{}, when reading root notes", e.message);
305 e
305 e
306 })?,
306 })?,
307 ),
307 ),
308 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
308 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
309 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
309 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
310 ignore_patterns_hash: meta.ignore_patterns_hash,
310 ignore_patterns_hash: meta.ignore_patterns_hash,
311 unreachable_bytes: meta.unreachable_bytes.get(),
311 unreachable_bytes: meta.unreachable_bytes.get(),
312 old_data_size: on_disk.len(),
312 old_data_size: on_disk.len(),
313 dirstate_version: DirstateVersion::V2,
313 dirstate_version: DirstateVersion::V2,
314 };
314 };
315 Ok(dirstate_map)
315 Ok(dirstate_map)
316 }
316 }
317
317
318 impl Node {
318 impl Node {
319 pub(super) fn full_path<'on_disk>(
319 pub(super) fn full_path<'on_disk>(
320 &self,
320 &self,
321 on_disk: &'on_disk [u8],
321 on_disk: &'on_disk [u8],
322 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
322 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
323 read_hg_path(on_disk, self.full_path)
323 read_hg_path(on_disk, self.full_path)
324 }
324 }
325
325
326 pub(super) fn base_name_start<'on_disk>(
326 pub(super) fn base_name_start<'on_disk>(
327 &self,
327 &self,
328 ) -> Result<usize, DirstateV2ParseError> {
328 ) -> Result<usize, DirstateV2ParseError> {
329 let start = self.base_name_start.get();
329 let start = self.base_name_start.get();
330 if start < self.full_path.len.get() {
330 if start < self.full_path.len.get() {
331 let start = usize::try_from(start)
331 let start = usize::try_from(start)
332 // u32 -> usize, could only panic on a 16-bit CPU
332 // u32 -> usize, could only panic on a 16-bit CPU
333 .expect("dirstate-v2 base_name_start out of bounds");
333 .expect("dirstate-v2 base_name_start out of bounds");
334 Ok(start)
334 Ok(start)
335 } else {
335 } else {
336 Err(DirstateV2ParseError::new("not enough bytes for base name"))
336 Err(DirstateV2ParseError::new("not enough bytes for base name"))
337 }
337 }
338 }
338 }
339
339
340 pub(super) fn base_name<'on_disk>(
340 pub(super) fn base_name<'on_disk>(
341 &self,
341 &self,
342 on_disk: &'on_disk [u8],
342 on_disk: &'on_disk [u8],
343 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
343 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
344 let full_path = self.full_path(on_disk)?;
344 let full_path = self.full_path(on_disk)?;
345 let base_name_start = self.base_name_start()?;
345 let base_name_start = self.base_name_start()?;
346 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
346 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
347 }
347 }
348
348
349 pub(super) fn path<'on_disk>(
349 pub(super) fn path<'on_disk>(
350 &self,
350 &self,
351 on_disk: &'on_disk [u8],
351 on_disk: &'on_disk [u8],
352 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
352 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
353 Ok(WithBasename::from_raw_parts(
353 Ok(WithBasename::from_raw_parts(
354 Cow::Borrowed(self.full_path(on_disk)?),
354 Cow::Borrowed(self.full_path(on_disk)?),
355 self.base_name_start()?,
355 self.base_name_start()?,
356 ))
356 ))
357 }
357 }
358
358
359 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
359 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
360 self.copy_source.start.get() != 0
360 self.copy_source.start.get() != 0
361 }
361 }
362
362
363 pub(super) fn copy_source<'on_disk>(
363 pub(super) fn copy_source<'on_disk>(
364 &self,
364 &self,
365 on_disk: &'on_disk [u8],
365 on_disk: &'on_disk [u8],
366 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
366 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
367 Ok(if self.has_copy_source() {
367 Ok(if self.has_copy_source() {
368 Some(read_hg_path(on_disk, self.copy_source)?)
368 Some(read_hg_path(on_disk, self.copy_source)?)
369 } else {
369 } else {
370 None
370 None
371 })
371 })
372 }
372 }
373
373
374 fn flags(&self) -> Flags {
374 fn flags(&self) -> Flags {
375 Flags::from_bits_truncate(self.flags.get())
375 Flags::from_bits_truncate(self.flags.get())
376 }
376 }
377
377
378 fn has_entry(&self) -> bool {
378 fn has_entry(&self) -> bool {
379 self.flags().intersects(
379 self.flags().intersects(
380 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
380 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
381 )
381 )
382 }
382 }
383
383
384 pub(super) fn node_data(
384 pub(super) fn node_data(
385 &self,
385 &self,
386 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
386 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
387 if self.has_entry() {
387 if self.has_entry() {
388 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
388 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
389 } else if let Some(mtime) = self.cached_directory_mtime()? {
389 } else if let Some(mtime) = self.cached_directory_mtime()? {
390 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
390 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
391 } else {
391 } else {
392 Ok(dirstate_map::NodeData::None)
392 Ok(dirstate_map::NodeData::None)
393 }
393 }
394 }
394 }
395
395
396 pub(super) fn cached_directory_mtime(
396 pub(super) fn cached_directory_mtime(
397 &self,
397 &self,
398 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
398 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
399 // For now we do not have code to handle the absence of
399 // For now we do not have code to handle the absence of
400 // ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is
400 // ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is
401 // unset.
401 // unset.
402 if self.flags().contains(Flags::DIRECTORY)
402 if self.flags().contains(Flags::DIRECTORY)
403 && self.flags().contains(Flags::HAS_MTIME)
403 && self.flags().contains(Flags::HAS_MTIME)
404 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
404 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
405 {
405 {
406 Ok(Some(self.mtime()?))
406 Ok(Some(self.mtime()?))
407 } else {
407 } else {
408 Ok(None)
408 Ok(None)
409 }
409 }
410 }
410 }
411
411
412 fn synthesize_unix_mode(&self) -> u32 {
412 fn synthesize_unix_mode(&self) -> u32 {
413 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
413 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
414 libc::S_IFLNK
414 libc::S_IFLNK
415 } else {
415 } else {
416 libc::S_IFREG
416 libc::S_IFREG
417 };
417 };
418 let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
418 let permisions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
419 0o755
419 0o755
420 } else {
420 } else {
421 0o644
421 0o644
422 };
422 };
423 (file_type | permisions).into()
423 (file_type | permisions).into()
424 }
424 }
425
425
426 fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> {
426 fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> {
427 let mut m: TruncatedTimestamp = self.mtime.try_into()?;
427 let mut m: TruncatedTimestamp = self.mtime.try_into()?;
428 if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) {
428 if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) {
429 m.second_ambiguous = true;
429 m.second_ambiguous = true;
430 }
430 }
431 Ok(m)
431 Ok(m)
432 }
432 }
433
433
434 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
434 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
435 // TODO: convert through raw bits instead?
435 // TODO: convert through raw bits instead?
436 let wc_tracked = self.flags().contains(Flags::WDIR_TRACKED);
436 let wc_tracked = self.flags().contains(Flags::WDIR_TRACKED);
437 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
437 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
438 let p2_info = self.flags().contains(Flags::P2_INFO);
438 let p2_info = self.flags().contains(Flags::P2_INFO);
439 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
439 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
440 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
440 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
441 {
441 {
442 Some((self.synthesize_unix_mode(), self.size.into()))
442 Some((self.synthesize_unix_mode(), self.size.into()))
443 } else {
443 } else {
444 None
444 None
445 };
445 };
446 let mtime = if self.flags().contains(Flags::HAS_MTIME)
446 let mtime = if self.flags().contains(Flags::HAS_MTIME)
447 && !self.flags().contains(Flags::DIRECTORY)
447 && !self.flags().contains(Flags::DIRECTORY)
448 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
448 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
449 {
449 {
450 Some(self.mtime()?)
450 Some(self.mtime()?)
451 } else {
451 } else {
452 None
452 None
453 };
453 };
454 let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC)
454 let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC)
455 {
455 {
456 Some(self.flags().contains(Flags::FALLBACK_EXEC))
456 Some(self.flags().contains(Flags::FALLBACK_EXEC))
457 } else {
457 } else {
458 None
458 None
459 };
459 };
460 let fallback_symlink =
460 let fallback_symlink =
461 if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) {
461 if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) {
462 Some(self.flags().contains(Flags::FALLBACK_SYMLINK))
462 Some(self.flags().contains(Flags::FALLBACK_SYMLINK))
463 } else {
463 } else {
464 None
464 None
465 };
465 };
466 Ok(DirstateEntry::from_v2_data(DirstateV2Data {
466 Ok(DirstateEntry::from_v2_data(DirstateV2Data {
467 wc_tracked,
467 wc_tracked,
468 p1_tracked,
468 p1_tracked,
469 p2_info,
469 p2_info,
470 mode_size,
470 mode_size,
471 mtime,
471 mtime,
472 fallback_exec,
472 fallback_exec,
473 fallback_symlink,
473 fallback_symlink,
474 }))
474 }))
475 }
475 }
476
476
477 pub(super) fn entry(
477 pub(super) fn entry(
478 &self,
478 &self,
479 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
479 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
480 if self.has_entry() {
480 if self.has_entry() {
481 Ok(Some(self.assume_entry()?))
481 Ok(Some(self.assume_entry()?))
482 } else {
482 } else {
483 Ok(None)
483 Ok(None)
484 }
484 }
485 }
485 }
486
486
487 pub(super) fn children<'on_disk>(
487 pub(super) fn children<'on_disk>(
488 &self,
488 &self,
489 on_disk: &'on_disk [u8],
489 on_disk: &'on_disk [u8],
490 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
490 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
491 read_nodes(on_disk, self.children)
491 read_nodes(on_disk, self.children)
492 }
492 }
493
493
494 pub(super) fn to_in_memory_node<'on_disk>(
494 pub(super) fn to_in_memory_node<'on_disk>(
495 &self,
495 &self,
496 on_disk: &'on_disk [u8],
496 on_disk: &'on_disk [u8],
497 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
497 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
498 Ok(dirstate_map::Node {
498 Ok(dirstate_map::Node {
499 children: dirstate_map::ChildNodes::OnDisk(
499 children: dirstate_map::ChildNodes::OnDisk(
500 self.children(on_disk)?,
500 self.children(on_disk)?,
501 ),
501 ),
502 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
502 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
503 data: self.node_data()?,
503 data: self.node_data()?,
504 descendants_with_entry_count: self
504 descendants_with_entry_count: self
505 .descendants_with_entry_count
505 .descendants_with_entry_count
506 .get(),
506 .get(),
507 tracked_descendants_count: self.tracked_descendants_count.get(),
507 tracked_descendants_count: self.tracked_descendants_count.get(),
508 })
508 })
509 }
509 }
510
510
511 fn from_dirstate_entry(
511 fn from_dirstate_entry(
512 entry: &DirstateEntry,
512 entry: &DirstateEntry,
513 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
513 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
514 let DirstateV2Data {
514 let DirstateV2Data {
515 wc_tracked,
515 wc_tracked,
516 p1_tracked,
516 p1_tracked,
517 p2_info,
517 p2_info,
518 mode_size: mode_size_opt,
518 mode_size: mode_size_opt,
519 mtime: mtime_opt,
519 mtime: mtime_opt,
520 fallback_exec,
520 fallback_exec,
521 fallback_symlink,
521 fallback_symlink,
522 } = entry.v2_data();
522 } = entry.v2_data();
523 // TODO: convert through raw flag bits instead?
523 // TODO: convert through raw flag bits instead?
524 let mut flags = Flags::empty();
524 let mut flags = Flags::empty();
525 flags.set(Flags::WDIR_TRACKED, wc_tracked);
525 flags.set(Flags::WDIR_TRACKED, wc_tracked);
526 flags.set(Flags::P1_TRACKED, p1_tracked);
526 flags.set(Flags::P1_TRACKED, p1_tracked);
527 flags.set(Flags::P2_INFO, p2_info);
527 flags.set(Flags::P2_INFO, p2_info);
528 let size = if let Some((m, s)) = mode_size_opt {
528 let size = if let Some((m, s)) = mode_size_opt {
529 let exec_perm = m & (libc::S_IXUSR as u32) != 0;
529 let exec_perm = m & (libc::S_IXUSR as u32) != 0;
530 let is_symlink = m & (libc::S_IFMT as u32) == libc::S_IFLNK as u32;
530 let is_symlink = m & (libc::S_IFMT as u32) == libc::S_IFLNK as u32;
531 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
531 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
532 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
532 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
533 flags.insert(Flags::HAS_MODE_AND_SIZE);
533 flags.insert(Flags::HAS_MODE_AND_SIZE);
534 s.into()
534 s.into()
535 } else {
535 } else {
536 0.into()
536 0.into()
537 };
537 };
538 let mtime = if let Some(m) = mtime_opt {
538 let mtime = if let Some(m) = mtime_opt {
539 flags.insert(Flags::HAS_MTIME);
539 flags.insert(Flags::HAS_MTIME);
540 if m.second_ambiguous {
540 if m.second_ambiguous {
541 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS);
541 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS);
542 };
542 };
543 m.into()
543 m.into()
544 } else {
544 } else {
545 PackedTruncatedTimestamp::null()
545 PackedTruncatedTimestamp::null()
546 };
546 };
547 if let Some(f_exec) = fallback_exec {
547 if let Some(f_exec) = fallback_exec {
548 flags.insert(Flags::HAS_FALLBACK_EXEC);
548 flags.insert(Flags::HAS_FALLBACK_EXEC);
549 if f_exec {
549 if f_exec {
550 flags.insert(Flags::FALLBACK_EXEC);
550 flags.insert(Flags::FALLBACK_EXEC);
551 }
551 }
552 }
552 }
553 if let Some(f_symlink) = fallback_symlink {
553 if let Some(f_symlink) = fallback_symlink {
554 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
554 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
555 if f_symlink {
555 if f_symlink {
556 flags.insert(Flags::FALLBACK_SYMLINK);
556 flags.insert(Flags::FALLBACK_SYMLINK);
557 }
557 }
558 }
558 }
559 (flags, size, mtime)
559 (flags, size, mtime)
560 }
560 }
561 }
561 }
562
562
563 fn read_hg_path(
563 fn read_hg_path(
564 on_disk: &[u8],
564 on_disk: &[u8],
565 slice: PathSlice,
565 slice: PathSlice,
566 ) -> Result<&HgPath, DirstateV2ParseError> {
566 ) -> Result<&HgPath, DirstateV2ParseError> {
567 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
567 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
568 }
568 }
569
569
570 fn read_nodes(
570 fn read_nodes(
571 on_disk: &[u8],
571 on_disk: &[u8],
572 slice: ChildNodes,
572 slice: ChildNodes,
573 ) -> Result<&[Node], DirstateV2ParseError> {
573 ) -> Result<&[Node], DirstateV2ParseError> {
574 read_slice(on_disk, slice.start, slice.len.get())
574 read_slice(on_disk, slice.start, slice.len.get())
575 }
575 }
576
576
577 fn read_slice<T, Len>(
577 fn read_slice<T, Len>(
578 on_disk: &[u8],
578 on_disk: &[u8],
579 start: Offset,
579 start: Offset,
580 len: Len,
580 len: Len,
581 ) -> Result<&[T], DirstateV2ParseError>
581 ) -> Result<&[T], DirstateV2ParseError>
582 where
582 where
583 T: BytesCast,
583 T: BytesCast,
584 Len: TryInto<usize>,
584 Len: TryInto<usize>,
585 {
585 {
586 // Either `usize::MAX` would result in "out of bounds" error since a single
586 // Either `usize::MAX` would result in "out of bounds" error since a single
587 // `&[u8]` cannot occupy the entire addess space.
587 // `&[u8]` cannot occupy the entire addess space.
588 let start = start.get().try_into().unwrap_or(std::usize::MAX);
588 let start = start.get().try_into().unwrap_or(std::usize::MAX);
589 let len = len.try_into().unwrap_or(std::usize::MAX);
589 let len = len.try_into().unwrap_or(std::usize::MAX);
590 let bytes = match on_disk.get(start..) {
590 let bytes = match on_disk.get(start..) {
591 Some(bytes) => bytes,
591 Some(bytes) => bytes,
592 None => {
592 None => {
593 return Err(DirstateV2ParseError::new(
593 return Err(DirstateV2ParseError::new(
594 "not enough bytes from disk",
594 "not enough bytes from disk",
595 ))
595 ))
596 }
596 }
597 };
597 };
598 T::slice_from_bytes(bytes, len)
598 T::slice_from_bytes(bytes, len)
599 .map_err(|e| {
599 .map_err(|e| {
600 DirstateV2ParseError::new(format!("when reading a slice, {}", e))
600 DirstateV2ParseError::new(format!("when reading a slice, {}", e))
601 })
601 })
602 .map(|(slice, _rest)| slice)
602 .map(|(slice, _rest)| slice)
603 }
603 }
604
604
605 pub(crate) fn for_each_tracked_path<'on_disk>(
605 pub(crate) fn for_each_tracked_path<'on_disk>(
606 on_disk: &'on_disk [u8],
606 on_disk: &'on_disk [u8],
607 metadata: &[u8],
607 metadata: &[u8],
608 mut f: impl FnMut(&'on_disk HgPath),
608 mut f: impl FnMut(&'on_disk HgPath),
609 ) -> Result<(), DirstateV2ParseError> {
609 ) -> Result<(), DirstateV2ParseError> {
610 let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| {
610 let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| {
611 DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e))
611 DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e))
612 })?;
612 })?;
613 fn recur<'on_disk>(
613 fn recur<'on_disk>(
614 on_disk: &'on_disk [u8],
614 on_disk: &'on_disk [u8],
615 nodes: ChildNodes,
615 nodes: ChildNodes,
616 f: &mut impl FnMut(&'on_disk HgPath),
616 f: &mut impl FnMut(&'on_disk HgPath),
617 ) -> Result<(), DirstateV2ParseError> {
617 ) -> Result<(), DirstateV2ParseError> {
618 for node in read_nodes(on_disk, nodes)? {
618 for node in read_nodes(on_disk, nodes)? {
619 if let Some(entry) = node.entry()? {
619 if let Some(entry) = node.entry()? {
620 if entry.tracked() {
620 if entry.tracked() {
621 f(node.full_path(on_disk)?)
621 f(node.full_path(on_disk)?)
622 }
622 }
623 }
623 }
624 recur(on_disk, node.children, f)?
624 recur(on_disk, node.children, f)?
625 }
625 }
626 Ok(())
626 Ok(())
627 }
627 }
628 recur(on_disk, meta.root_nodes, &mut f)
628 recur(on_disk, meta.root_nodes, &mut f)
629 }
629 }
630
630
631 /// Returns new data and metadata, together with whether that data should be
631 /// Returns new data and metadata, together with whether that data should be
632 /// appended to the existing data file whose content is at
632 /// appended to the existing data file whose content is at
633 /// `dirstate_map.on_disk` (true), instead of written to a new data file
633 /// `dirstate_map.on_disk` (true), instead of written to a new data file
634 /// (false), and the previous size of data on disk.
634 /// (false), and the previous size of data on disk.
635 pub(super) fn write(
635 pub(super) fn write(
636 dirstate_map: &DirstateMap,
636 dirstate_map: &DirstateMap,
637 can_append: bool,
637 can_append: bool,
638 ) -> Result<(Vec<u8>, TreeMetadata, bool, usize), DirstateError> {
638 ) -> Result<(Vec<u8>, TreeMetadata, bool, usize), DirstateError> {
639 let append = can_append && dirstate_map.write_should_append();
639 let append = can_append && dirstate_map.write_should_append();
640 if append {
641 log::trace!("appending to the dirstate data file");
642 } else {
643 log::trace!("creating new dirstate data file");
644 }
640
645
641 // This ignores the space for paths, and for nodes without an entry.
646 // This ignores the space for paths, and for nodes without an entry.
642 // TODO: better estimate? Skip the `Vec` and write to a file directly?
647 // TODO: better estimate? Skip the `Vec` and write to a file directly?
643 let size_guess = std::mem::size_of::<Node>()
648 let size_guess = std::mem::size_of::<Node>()
644 * dirstate_map.nodes_with_entry_count as usize;
649 * dirstate_map.nodes_with_entry_count as usize;
645
650
646 let mut writer = Writer {
651 let mut writer = Writer {
647 dirstate_map,
652 dirstate_map,
648 append,
653 append,
649 out: Vec::with_capacity(size_guess),
654 out: Vec::with_capacity(size_guess),
650 };
655 };
651
656
652 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
657 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
653
658
654 let unreachable_bytes = if append {
659 let unreachable_bytes = if append {
655 dirstate_map.unreachable_bytes
660 dirstate_map.unreachable_bytes
656 } else {
661 } else {
657 0
662 0
658 };
663 };
659 let meta = TreeMetadata {
664 let meta = TreeMetadata {
660 root_nodes,
665 root_nodes,
661 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
666 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
662 nodes_with_copy_source_count: dirstate_map
667 nodes_with_copy_source_count: dirstate_map
663 .nodes_with_copy_source_count
668 .nodes_with_copy_source_count
664 .into(),
669 .into(),
665 unreachable_bytes: unreachable_bytes.into(),
670 unreachable_bytes: unreachable_bytes.into(),
666 unused: [0; 4],
671 unused: [0; 4],
667 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
672 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
668 };
673 };
669 Ok((writer.out, meta, append, dirstate_map.old_data_size))
674 Ok((writer.out, meta, append, dirstate_map.old_data_size))
670 }
675 }
671
676
672 struct Writer<'dmap, 'on_disk> {
677 struct Writer<'dmap, 'on_disk> {
673 dirstate_map: &'dmap DirstateMap<'on_disk>,
678 dirstate_map: &'dmap DirstateMap<'on_disk>,
674 append: bool,
679 append: bool,
675 out: Vec<u8>,
680 out: Vec<u8>,
676 }
681 }
677
682
678 impl Writer<'_, '_> {
683 impl Writer<'_, '_> {
679 fn write_nodes(
684 fn write_nodes(
680 &mut self,
685 &mut self,
681 nodes: dirstate_map::ChildNodesRef,
686 nodes: dirstate_map::ChildNodesRef,
682 ) -> Result<ChildNodes, DirstateError> {
687 ) -> Result<ChildNodes, DirstateError> {
683 // Reuse already-written nodes if possible
688 // Reuse already-written nodes if possible
684 if self.append {
689 if self.append {
685 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
690 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
686 let start = self.on_disk_offset_of(nodes_slice).expect(
691 let start = self.on_disk_offset_of(nodes_slice).expect(
687 "dirstate-v2 OnDisk nodes not found within on_disk",
692 "dirstate-v2 OnDisk nodes not found within on_disk",
688 );
693 );
689 let len = child_nodes_len_from_usize(nodes_slice.len());
694 let len = child_nodes_len_from_usize(nodes_slice.len());
690 return Ok(ChildNodes { start, len });
695 return Ok(ChildNodes { start, len });
691 }
696 }
692 }
697 }
693
698
694 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
699 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
695 // undefined iteration order. Sort to enable binary search in the
700 // undefined iteration order. Sort to enable binary search in the
696 // written file.
701 // written file.
697 let nodes = nodes.sorted();
702 let nodes = nodes.sorted();
698 let nodes_len = nodes.len();
703 let nodes_len = nodes.len();
699
704
700 // First accumulate serialized nodes in a `Vec`
705 // First accumulate serialized nodes in a `Vec`
701 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
706 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
702 for node in nodes {
707 for node in nodes {
703 let children =
708 let children =
704 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
709 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
705 let full_path = node.full_path(self.dirstate_map.on_disk)?;
710 let full_path = node.full_path(self.dirstate_map.on_disk)?;
706 let full_path = self.write_path(full_path.as_bytes());
711 let full_path = self.write_path(full_path.as_bytes());
707 let copy_source = if let Some(source) =
712 let copy_source = if let Some(source) =
708 node.copy_source(self.dirstate_map.on_disk)?
713 node.copy_source(self.dirstate_map.on_disk)?
709 {
714 {
710 self.write_path(source.as_bytes())
715 self.write_path(source.as_bytes())
711 } else {
716 } else {
712 PathSlice {
717 PathSlice {
713 start: 0.into(),
718 start: 0.into(),
714 len: 0.into(),
719 len: 0.into(),
715 }
720 }
716 };
721 };
717 on_disk_nodes.push(match node {
722 on_disk_nodes.push(match node {
718 NodeRef::InMemory(path, node) => {
723 NodeRef::InMemory(path, node) => {
719 let (flags, size, mtime) = match &node.data {
724 let (flags, size, mtime) = match &node.data {
720 dirstate_map::NodeData::Entry(entry) => {
725 dirstate_map::NodeData::Entry(entry) => {
721 Node::from_dirstate_entry(entry)
726 Node::from_dirstate_entry(entry)
722 }
727 }
723 dirstate_map::NodeData::CachedDirectory { mtime } => {
728 dirstate_map::NodeData::CachedDirectory { mtime } => {
724 // we currently never set a mtime if unknown file
729 // we currently never set a mtime if unknown file
725 // are present.
730 // are present.
726 // So if we have a mtime for a directory, we know
731 // So if we have a mtime for a directory, we know
727 // they are no unknown
732 // they are no unknown
728 // files and we
733 // files and we
729 // blindly set ALL_UNKNOWN_RECORDED.
734 // blindly set ALL_UNKNOWN_RECORDED.
730 //
735 //
731 // We never set ALL_IGNORED_RECORDED since we
736 // We never set ALL_IGNORED_RECORDED since we
732 // don't track that case
737 // don't track that case
733 // currently.
738 // currently.
734 let mut flags = Flags::DIRECTORY
739 let mut flags = Flags::DIRECTORY
735 | Flags::HAS_MTIME
740 | Flags::HAS_MTIME
736 | Flags::ALL_UNKNOWN_RECORDED;
741 | Flags::ALL_UNKNOWN_RECORDED;
737 if mtime.second_ambiguous {
742 if mtime.second_ambiguous {
738 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS)
743 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS)
739 }
744 }
740 (flags, 0.into(), (*mtime).into())
745 (flags, 0.into(), (*mtime).into())
741 }
746 }
742 dirstate_map::NodeData::None => (
747 dirstate_map::NodeData::None => (
743 Flags::DIRECTORY,
748 Flags::DIRECTORY,
744 0.into(),
749 0.into(),
745 PackedTruncatedTimestamp::null(),
750 PackedTruncatedTimestamp::null(),
746 ),
751 ),
747 };
752 };
748 Node {
753 Node {
749 children,
754 children,
750 copy_source,
755 copy_source,
751 full_path,
756 full_path,
752 base_name_start: u16::try_from(path.base_name_start())
757 base_name_start: u16::try_from(path.base_name_start())
753 // Could only panic for paths over 64 KiB
758 // Could only panic for paths over 64 KiB
754 .expect("dirstate-v2 path length overflow")
759 .expect("dirstate-v2 path length overflow")
755 .into(),
760 .into(),
756 descendants_with_entry_count: node
761 descendants_with_entry_count: node
757 .descendants_with_entry_count
762 .descendants_with_entry_count
758 .into(),
763 .into(),
759 tracked_descendants_count: node
764 tracked_descendants_count: node
760 .tracked_descendants_count
765 .tracked_descendants_count
761 .into(),
766 .into(),
762 flags: flags.bits().into(),
767 flags: flags.bits().into(),
763 size,
768 size,
764 mtime,
769 mtime,
765 }
770 }
766 }
771 }
767 NodeRef::OnDisk(node) => Node {
772 NodeRef::OnDisk(node) => Node {
768 children,
773 children,
769 copy_source,
774 copy_source,
770 full_path,
775 full_path,
771 ..*node
776 ..*node
772 },
777 },
773 })
778 })
774 }
779 }
775 // … so we can write them contiguously, after writing everything else
780 // … so we can write them contiguously, after writing everything else
776 // they refer to.
781 // they refer to.
777 let start = self.current_offset();
782 let start = self.current_offset();
778 let len = child_nodes_len_from_usize(nodes_len);
783 let len = child_nodes_len_from_usize(nodes_len);
779 self.out.extend(on_disk_nodes.as_bytes());
784 self.out.extend(on_disk_nodes.as_bytes());
780 Ok(ChildNodes { start, len })
785 Ok(ChildNodes { start, len })
781 }
786 }
782
787
783 /// If the given slice of items is within `on_disk`, returns its offset
788 /// If the given slice of items is within `on_disk`, returns its offset
784 /// from the start of `on_disk`.
789 /// from the start of `on_disk`.
785 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
790 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
786 where
791 where
787 T: BytesCast,
792 T: BytesCast,
788 {
793 {
789 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
794 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
790 let start = slice.as_ptr() as usize;
795 let start = slice.as_ptr() as usize;
791 let end = start + slice.len();
796 let end = start + slice.len();
792 start..=end
797 start..=end
793 }
798 }
794 let slice_addresses = address_range(slice.as_bytes());
799 let slice_addresses = address_range(slice.as_bytes());
795 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
800 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
796 if on_disk_addresses.contains(slice_addresses.start())
801 if on_disk_addresses.contains(slice_addresses.start())
797 && on_disk_addresses.contains(slice_addresses.end())
802 && on_disk_addresses.contains(slice_addresses.end())
798 {
803 {
799 let offset = slice_addresses.start() - on_disk_addresses.start();
804 let offset = slice_addresses.start() - on_disk_addresses.start();
800 Some(offset_from_usize(offset))
805 Some(offset_from_usize(offset))
801 } else {
806 } else {
802 None
807 None
803 }
808 }
804 }
809 }
805
810
806 fn current_offset(&mut self) -> Offset {
811 fn current_offset(&mut self) -> Offset {
807 let mut offset = self.out.len();
812 let mut offset = self.out.len();
808 if self.append {
813 if self.append {
809 offset += self.dirstate_map.on_disk.len()
814 offset += self.dirstate_map.on_disk.len()
810 }
815 }
811 offset_from_usize(offset)
816 offset_from_usize(offset)
812 }
817 }
813
818
814 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
819 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
815 let len = path_len_from_usize(slice.len());
820 let len = path_len_from_usize(slice.len());
816 // Reuse an already-written path if possible
821 // Reuse an already-written path if possible
817 if self.append {
822 if self.append {
818 if let Some(start) = self.on_disk_offset_of(slice) {
823 if let Some(start) = self.on_disk_offset_of(slice) {
819 return PathSlice { start, len };
824 return PathSlice { start, len };
820 }
825 }
821 }
826 }
822 let start = self.current_offset();
827 let start = self.current_offset();
823 self.out.extend(slice.as_bytes());
828 self.out.extend(slice.as_bytes());
824 PathSlice { start, len }
829 PathSlice { start, len }
825 }
830 }
826 }
831 }
827
832
828 fn offset_from_usize(x: usize) -> Offset {
833 fn offset_from_usize(x: usize) -> Offset {
829 u32::try_from(x)
834 u32::try_from(x)
830 // Could only panic for a dirstate file larger than 4 GiB
835 // Could only panic for a dirstate file larger than 4 GiB
831 .expect("dirstate-v2 offset overflow")
836 .expect("dirstate-v2 offset overflow")
832 .into()
837 .into()
833 }
838 }
834
839
835 fn child_nodes_len_from_usize(x: usize) -> Size {
840 fn child_nodes_len_from_usize(x: usize) -> Size {
836 u32::try_from(x)
841 u32::try_from(x)
837 // Could only panic with over 4 billion nodes
842 // Could only panic with over 4 billion nodes
838 .expect("dirstate-v2 slice length overflow")
843 .expect("dirstate-v2 slice length overflow")
839 .into()
844 .into()
840 }
845 }
841
846
842 fn path_len_from_usize(x: usize) -> PathSize {
847 fn path_len_from_usize(x: usize) -> PathSize {
843 u16::try_from(x)
848 u16::try_from(x)
844 // Could only panic for paths over 64 KiB
849 // Could only panic for paths over 64 KiB
845 .expect("dirstate-v2 path length overflow")
850 .expect("dirstate-v2 path length overflow")
846 .into()
851 .into()
847 }
852 }
848
853
849 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
854 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
850 fn from(timestamp: TruncatedTimestamp) -> Self {
855 fn from(timestamp: TruncatedTimestamp) -> Self {
851 Self {
856 Self {
852 truncated_seconds: timestamp.truncated_seconds().into(),
857 truncated_seconds: timestamp.truncated_seconds().into(),
853 nanoseconds: timestamp.nanoseconds().into(),
858 nanoseconds: timestamp.nanoseconds().into(),
854 }
859 }
855 }
860 }
856 }
861 }
857
862
858 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
863 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
859 type Error = DirstateV2ParseError;
864 type Error = DirstateV2ParseError;
860
865
861 fn try_from(
866 fn try_from(
862 timestamp: PackedTruncatedTimestamp,
867 timestamp: PackedTruncatedTimestamp,
863 ) -> Result<Self, Self::Error> {
868 ) -> Result<Self, Self::Error> {
864 Self::from_already_truncated(
869 Self::from_already_truncated(
865 timestamp.truncated_seconds.get(),
870 timestamp.truncated_seconds.get(),
866 timestamp.nanoseconds.get(),
871 timestamp.nanoseconds.get(),
867 false,
872 false,
868 )
873 )
869 }
874 }
870 }
875 }
871 impl PackedTruncatedTimestamp {
876 impl PackedTruncatedTimestamp {
872 fn null() -> Self {
877 fn null() -> Self {
873 Self {
878 Self {
874 truncated_seconds: 0.into(),
879 truncated_seconds: 0.into(),
875 nanoseconds: 0.into(),
880 nanoseconds: 0.into(),
876 }
881 }
877 }
882 }
878 }
883 }
General Comments 0
You need to be logged in to leave comments. Login now