##// END OF EJS Templates
dirstate-v2: check that root nodes are at the root before writing...
Raphaël Gomès -
r52510:f808fa11 stable
parent child Browse files
Show More
@@ -1,902 +1,915
1 //! The "version 2" disk representation of the dirstate
1 //! The "version 2" disk representation of the dirstate
2 //!
2 //!
3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
3 //! See `mercurial/helptext/internals/dirstate-v2.txt`
4
4
5 use crate::dirstate::{DirstateV2Data, TruncatedTimestamp};
5 use crate::dirstate::{DirstateV2Data, TruncatedTimestamp};
6 use crate::dirstate_tree::dirstate_map::DirstateVersion;
6 use crate::dirstate_tree::dirstate_map::DirstateVersion;
7 use crate::dirstate_tree::dirstate_map::{
7 use crate::dirstate_tree::dirstate_map::{
8 self, DirstateMap, DirstateMapWriteMode, NodeRef,
8 self, DirstateMap, DirstateMapWriteMode, NodeRef,
9 };
9 };
10 use crate::dirstate_tree::path_with_basename::WithBasename;
10 use crate::dirstate_tree::path_with_basename::WithBasename;
11 use crate::errors::HgError;
11 use crate::errors::HgError;
12 use crate::utils::hg_path::HgPath;
12 use crate::utils::hg_path::HgPath;
13 use crate::DirstateEntry;
13 use crate::DirstateEntry;
14 use crate::DirstateError;
14 use crate::DirstateError;
15 use crate::DirstateParents;
15 use crate::DirstateParents;
16 use bitflags::bitflags;
16 use bitflags::bitflags;
17 use bytes_cast::unaligned::{U16Be, U32Be};
17 use bytes_cast::unaligned::{U16Be, U32Be};
18 use bytes_cast::BytesCast;
18 use bytes_cast::BytesCast;
19 use format_bytes::format_bytes;
19 use format_bytes::format_bytes;
20 use rand::Rng;
20 use rand::Rng;
21 use std::borrow::Cow;
21 use std::borrow::Cow;
22 use std::fmt::Write;
22 use std::fmt::Write;
23
23
24 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
24 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
25 /// This a redundant sanity check more than an actual "magic number" since
25 /// This a redundant sanity check more than an actual "magic number" since
26 /// `.hg/requires` already governs which format should be used.
26 /// `.hg/requires` already governs which format should be used.
27 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
27 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
28
28
29 /// Keep space for 256-bit hashes
29 /// Keep space for 256-bit hashes
30 const STORED_NODE_ID_BYTES: usize = 32;
30 const STORED_NODE_ID_BYTES: usize = 32;
31
31
32 /// … even though only 160 bits are used for now, with SHA-1
32 /// … even though only 160 bits are used for now, with SHA-1
33 const USED_NODE_ID_BYTES: usize = 20;
33 const USED_NODE_ID_BYTES: usize = 20;
34
34
35 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
35 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
36 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
36 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
37
37
38 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
38 /// Must match constants of the same names in `mercurial/dirstateutils/v2.py`
39 const TREE_METADATA_SIZE: usize = 44;
39 const TREE_METADATA_SIZE: usize = 44;
40 const NODE_SIZE: usize = 44;
40 const NODE_SIZE: usize = 44;
41
41
42 /// Make sure that size-affecting changes are made knowingly
42 /// Make sure that size-affecting changes are made knowingly
43 #[allow(unused)]
43 #[allow(unused)]
44 fn static_assert_size_of() {
44 fn static_assert_size_of() {
45 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
45 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
46 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
46 let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>;
47 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
47 let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>;
48 }
48 }
49
49
50 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
50 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
51 #[derive(BytesCast)]
51 #[derive(BytesCast)]
52 #[repr(C)]
52 #[repr(C)]
53 struct DocketHeader {
53 struct DocketHeader {
54 marker: [u8; V2_FORMAT_MARKER.len()],
54 marker: [u8; V2_FORMAT_MARKER.len()],
55 parent_1: [u8; STORED_NODE_ID_BYTES],
55 parent_1: [u8; STORED_NODE_ID_BYTES],
56 parent_2: [u8; STORED_NODE_ID_BYTES],
56 parent_2: [u8; STORED_NODE_ID_BYTES],
57
57
58 metadata: TreeMetadata,
58 metadata: TreeMetadata,
59
59
60 /// Counted in bytes
60 /// Counted in bytes
61 data_size: Size,
61 data_size: Size,
62
62
63 uuid_size: u8,
63 uuid_size: u8,
64 }
64 }
65
65
66 pub struct Docket<'on_disk> {
66 pub struct Docket<'on_disk> {
67 header: &'on_disk DocketHeader,
67 header: &'on_disk DocketHeader,
68 pub uuid: &'on_disk [u8],
68 pub uuid: &'on_disk [u8],
69 }
69 }
70
70
71 /// Fields are documented in the *Tree metadata in the docket file*
71 /// Fields are documented in the *Tree metadata in the docket file*
72 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
72 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
73 #[derive(BytesCast)]
73 #[derive(BytesCast)]
74 #[repr(C)]
74 #[repr(C)]
75 pub struct TreeMetadata {
75 pub struct TreeMetadata {
76 root_nodes: ChildNodes,
76 root_nodes: ChildNodes,
77 nodes_with_entry_count: Size,
77 nodes_with_entry_count: Size,
78 nodes_with_copy_source_count: Size,
78 nodes_with_copy_source_count: Size,
79 unreachable_bytes: Size,
79 unreachable_bytes: Size,
80 unused: [u8; 4],
80 unused: [u8; 4],
81
81
82 /// See *Optional hash of ignore patterns* section of
82 /// See *Optional hash of ignore patterns* section of
83 /// `mercurial/helptext/internals/dirstate-v2.txt`
83 /// `mercurial/helptext/internals/dirstate-v2.txt`
84 ignore_patterns_hash: IgnorePatternsHash,
84 ignore_patterns_hash: IgnorePatternsHash,
85 }
85 }
86
86
87 /// Fields are documented in the *The data file format*
87 /// Fields are documented in the *The data file format*
88 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
88 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
89 #[derive(BytesCast, Debug)]
89 #[derive(BytesCast, Debug)]
90 #[repr(C)]
90 #[repr(C)]
91 pub(super) struct Node {
91 pub(super) struct Node {
92 full_path: PathSlice,
92 full_path: PathSlice,
93
93
94 /// In bytes from `self.full_path.start`
94 /// In bytes from `self.full_path.start`
95 base_name_start: PathSize,
95 base_name_start: PathSize,
96
96
97 copy_source: OptPathSlice,
97 copy_source: OptPathSlice,
98 children: ChildNodes,
98 children: ChildNodes,
99 pub(super) descendants_with_entry_count: Size,
99 pub(super) descendants_with_entry_count: Size,
100 pub(super) tracked_descendants_count: Size,
100 pub(super) tracked_descendants_count: Size,
101 flags: U16Be,
101 flags: U16Be,
102 size: U32Be,
102 size: U32Be,
103 mtime: PackedTruncatedTimestamp,
103 mtime: PackedTruncatedTimestamp,
104 }
104 }
105
105
106 bitflags! {
106 bitflags! {
107 #[repr(C)]
107 #[repr(C)]
108 struct Flags: u16 {
108 struct Flags: u16 {
109 const WDIR_TRACKED = 1 << 0;
109 const WDIR_TRACKED = 1 << 0;
110 const P1_TRACKED = 1 << 1;
110 const P1_TRACKED = 1 << 1;
111 const P2_INFO = 1 << 2;
111 const P2_INFO = 1 << 2;
112 const MODE_EXEC_PERM = 1 << 3;
112 const MODE_EXEC_PERM = 1 << 3;
113 const MODE_IS_SYMLINK = 1 << 4;
113 const MODE_IS_SYMLINK = 1 << 4;
114 const HAS_FALLBACK_EXEC = 1 << 5;
114 const HAS_FALLBACK_EXEC = 1 << 5;
115 const FALLBACK_EXEC = 1 << 6;
115 const FALLBACK_EXEC = 1 << 6;
116 const HAS_FALLBACK_SYMLINK = 1 << 7;
116 const HAS_FALLBACK_SYMLINK = 1 << 7;
117 const FALLBACK_SYMLINK = 1 << 8;
117 const FALLBACK_SYMLINK = 1 << 8;
118 const EXPECTED_STATE_IS_MODIFIED = 1 << 9;
118 const EXPECTED_STATE_IS_MODIFIED = 1 << 9;
119 const HAS_MODE_AND_SIZE = 1 <<10;
119 const HAS_MODE_AND_SIZE = 1 <<10;
120 const HAS_MTIME = 1 <<11;
120 const HAS_MTIME = 1 <<11;
121 const MTIME_SECOND_AMBIGUOUS = 1 << 12;
121 const MTIME_SECOND_AMBIGUOUS = 1 << 12;
122 const DIRECTORY = 1 <<13;
122 const DIRECTORY = 1 <<13;
123 const ALL_UNKNOWN_RECORDED = 1 <<14;
123 const ALL_UNKNOWN_RECORDED = 1 <<14;
124 const ALL_IGNORED_RECORDED = 1 <<15;
124 const ALL_IGNORED_RECORDED = 1 <<15;
125 }
125 }
126 }
126 }
127
127
128 /// Duration since the Unix epoch
128 /// Duration since the Unix epoch
129 #[derive(BytesCast, Copy, Clone, Debug)]
129 #[derive(BytesCast, Copy, Clone, Debug)]
130 #[repr(C)]
130 #[repr(C)]
131 struct PackedTruncatedTimestamp {
131 struct PackedTruncatedTimestamp {
132 truncated_seconds: U32Be,
132 truncated_seconds: U32Be,
133 nanoseconds: U32Be,
133 nanoseconds: U32Be,
134 }
134 }
135
135
136 /// Counted in bytes from the start of the file
136 /// Counted in bytes from the start of the file
137 ///
137 ///
138 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
138 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
139 type Offset = U32Be;
139 type Offset = U32Be;
140
140
141 /// Counted in number of items
141 /// Counted in number of items
142 ///
142 ///
143 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
143 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
144 type Size = U32Be;
144 type Size = U32Be;
145
145
146 /// Counted in bytes
146 /// Counted in bytes
147 ///
147 ///
148 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
148 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
149 type PathSize = U16Be;
149 type PathSize = U16Be;
150
150
151 /// A contiguous sequence of `len` times `Node`, representing the child nodes
151 /// A contiguous sequence of `len` times `Node`, representing the child nodes
152 /// of either some other node or of the repository root.
152 /// of either some other node or of the repository root.
153 ///
153 ///
154 /// Always sorted by ascending `full_path`, to allow binary search.
154 /// Always sorted by ascending `full_path`, to allow binary search.
155 /// Since nodes with the same parent nodes also have the same parent path,
155 /// Since nodes with the same parent nodes also have the same parent path,
156 /// only the `base_name`s need to be compared during binary search.
156 /// only the `base_name`s need to be compared during binary search.
157 #[derive(BytesCast, Copy, Clone, Debug)]
157 #[derive(BytesCast, Copy, Clone, Debug)]
158 #[repr(C)]
158 #[repr(C)]
159 struct ChildNodes {
159 struct ChildNodes {
160 start: Offset,
160 start: Offset,
161 len: Size,
161 len: Size,
162 }
162 }
163
163
164 /// A `HgPath` of `len` bytes
164 /// A `HgPath` of `len` bytes
165 #[derive(BytesCast, Copy, Clone, Debug)]
165 #[derive(BytesCast, Copy, Clone, Debug)]
166 #[repr(C)]
166 #[repr(C)]
167 struct PathSlice {
167 struct PathSlice {
168 start: Offset,
168 start: Offset,
169 len: PathSize,
169 len: PathSize,
170 }
170 }
171
171
172 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
172 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
173 type OptPathSlice = PathSlice;
173 type OptPathSlice = PathSlice;
174
174
175 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
175 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
176 ///
176 ///
177 /// This should only happen if Mercurial is buggy or a repository is corrupted.
177 /// This should only happen if Mercurial is buggy or a repository is corrupted.
178 #[derive(Debug)]
178 #[derive(Debug)]
179 pub struct DirstateV2ParseError {
179 pub struct DirstateV2ParseError {
180 message: String,
180 message: String,
181 }
181 }
182
182
183 impl DirstateV2ParseError {
183 impl DirstateV2ParseError {
184 pub fn new<S: Into<String>>(message: S) -> Self {
184 pub fn new<S: Into<String>>(message: S) -> Self {
185 Self {
185 Self {
186 message: message.into(),
186 message: message.into(),
187 }
187 }
188 }
188 }
189 }
189 }
190
190
191 impl From<DirstateV2ParseError> for HgError {
191 impl From<DirstateV2ParseError> for HgError {
192 fn from(e: DirstateV2ParseError) -> Self {
192 fn from(e: DirstateV2ParseError) -> Self {
193 HgError::corrupted(format!("dirstate-v2 parse error: {}", e.message))
193 HgError::corrupted(format!("dirstate-v2 parse error: {}", e.message))
194 }
194 }
195 }
195 }
196
196
197 impl From<DirstateV2ParseError> for crate::DirstateError {
197 impl From<DirstateV2ParseError> for crate::DirstateError {
198 fn from(error: DirstateV2ParseError) -> Self {
198 fn from(error: DirstateV2ParseError) -> Self {
199 HgError::from(error).into()
199 HgError::from(error).into()
200 }
200 }
201 }
201 }
202
202
203 impl TreeMetadata {
203 impl TreeMetadata {
204 pub fn as_bytes(&self) -> &[u8] {
204 pub fn as_bytes(&self) -> &[u8] {
205 BytesCast::as_bytes(self)
205 BytesCast::as_bytes(self)
206 }
206 }
207 }
207 }
208
208
209 impl<'on_disk> Docket<'on_disk> {
209 impl<'on_disk> Docket<'on_disk> {
210 /// Generate the identifier for a new data file
210 /// Generate the identifier for a new data file
211 ///
211 ///
212 /// TODO: support the `HGTEST_UUIDFILE` environment variable.
212 /// TODO: support the `HGTEST_UUIDFILE` environment variable.
213 /// See `mercurial/revlogutils/docket.py`
213 /// See `mercurial/revlogutils/docket.py`
214 pub fn new_uid() -> String {
214 pub fn new_uid() -> String {
215 const ID_LENGTH: usize = 8;
215 const ID_LENGTH: usize = 8;
216 let mut id = String::with_capacity(ID_LENGTH);
216 let mut id = String::with_capacity(ID_LENGTH);
217 let mut rng = rand::thread_rng();
217 let mut rng = rand::thread_rng();
218 for _ in 0..ID_LENGTH {
218 for _ in 0..ID_LENGTH {
219 // One random hexadecimal digit.
219 // One random hexadecimal digit.
220 // `unwrap` never panics because `impl Write for String`
220 // `unwrap` never panics because `impl Write for String`
221 // never returns an error.
221 // never returns an error.
222 write!(&mut id, "{:x}", rng.gen_range(0..16)).unwrap();
222 write!(&mut id, "{:x}", rng.gen_range(0..16)).unwrap();
223 }
223 }
224 id
224 id
225 }
225 }
226
226
227 pub fn serialize(
227 pub fn serialize(
228 parents: DirstateParents,
228 parents: DirstateParents,
229 tree_metadata: TreeMetadata,
229 tree_metadata: TreeMetadata,
230 data_size: u64,
230 data_size: u64,
231 uuid: &[u8],
231 uuid: &[u8],
232 ) -> Result<Vec<u8>, std::num::TryFromIntError> {
232 ) -> Result<Vec<u8>, std::num::TryFromIntError> {
233 let header = DocketHeader {
233 let header = DocketHeader {
234 marker: *V2_FORMAT_MARKER,
234 marker: *V2_FORMAT_MARKER,
235 parent_1: parents.p1.pad_to_256_bits(),
235 parent_1: parents.p1.pad_to_256_bits(),
236 parent_2: parents.p2.pad_to_256_bits(),
236 parent_2: parents.p2.pad_to_256_bits(),
237 metadata: tree_metadata,
237 metadata: tree_metadata,
238 data_size: u32::try_from(data_size)?.into(),
238 data_size: u32::try_from(data_size)?.into(),
239 uuid_size: uuid.len().try_into()?,
239 uuid_size: uuid.len().try_into()?,
240 };
240 };
241 let header = header.as_bytes();
241 let header = header.as_bytes();
242 let mut docket = Vec::with_capacity(header.len() + uuid.len());
242 let mut docket = Vec::with_capacity(header.len() + uuid.len());
243 docket.extend_from_slice(header);
243 docket.extend_from_slice(header);
244 docket.extend_from_slice(uuid);
244 docket.extend_from_slice(uuid);
245 Ok(docket)
245 Ok(docket)
246 }
246 }
247
247
248 pub fn parents(&self) -> DirstateParents {
248 pub fn parents(&self) -> DirstateParents {
249 use crate::Node;
249 use crate::Node;
250 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
250 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
251 .unwrap();
251 .unwrap();
252 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
252 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
253 .unwrap();
253 .unwrap();
254 DirstateParents { p1, p2 }
254 DirstateParents { p1, p2 }
255 }
255 }
256
256
257 pub fn tree_metadata(&self) -> &[u8] {
257 pub fn tree_metadata(&self) -> &[u8] {
258 self.header.metadata.as_bytes()
258 self.header.metadata.as_bytes()
259 }
259 }
260
260
261 pub fn data_size(&self) -> usize {
261 pub fn data_size(&self) -> usize {
262 // This `unwrap` could only panic on a 16-bit CPU
262 // This `unwrap` could only panic on a 16-bit CPU
263 self.header.data_size.get().try_into().unwrap()
263 self.header.data_size.get().try_into().unwrap()
264 }
264 }
265
265
266 pub fn data_filename(&self) -> String {
266 pub fn data_filename(&self) -> String {
267 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
267 String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap()
268 }
268 }
269 }
269 }
270
270
271 pub fn read_docket(
271 pub fn read_docket(
272 on_disk: &[u8],
272 on_disk: &[u8],
273 ) -> Result<Docket<'_>, DirstateV2ParseError> {
273 ) -> Result<Docket<'_>, DirstateV2ParseError> {
274 let (header, uuid) = DocketHeader::from_bytes(on_disk).map_err(|e| {
274 let (header, uuid) = DocketHeader::from_bytes(on_disk).map_err(|e| {
275 DirstateV2ParseError::new(format!("when reading docket, {}", e))
275 DirstateV2ParseError::new(format!("when reading docket, {}", e))
276 })?;
276 })?;
277 let uuid_size = header.uuid_size as usize;
277 let uuid_size = header.uuid_size as usize;
278 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
278 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
279 Ok(Docket { header, uuid })
279 Ok(Docket { header, uuid })
280 } else {
280 } else {
281 Err(DirstateV2ParseError::new(
281 Err(DirstateV2ParseError::new(
282 "invalid format marker or uuid size",
282 "invalid format marker or uuid size",
283 ))
283 ))
284 }
284 }
285 }
285 }
286
286
287 pub(super) fn read<'on_disk>(
287 pub(super) fn read<'on_disk>(
288 on_disk: &'on_disk [u8],
288 on_disk: &'on_disk [u8],
289 metadata: &[u8],
289 metadata: &[u8],
290 uuid: Vec<u8>,
290 uuid: Vec<u8>,
291 identity: Option<u64>,
291 identity: Option<u64>,
292 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
292 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
293 if on_disk.is_empty() {
293 if on_disk.is_empty() {
294 let mut map = DirstateMap::empty(on_disk);
294 let mut map = DirstateMap::empty(on_disk);
295 map.dirstate_version = DirstateVersion::V2;
295 map.dirstate_version = DirstateVersion::V2;
296 return Ok(map);
296 return Ok(map);
297 }
297 }
298 let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| {
298 let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| {
299 DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e))
299 DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e))
300 })?;
300 })?;
301 let dirstate_map = DirstateMap {
301 let dirstate_map = DirstateMap {
302 on_disk,
302 on_disk,
303 root: dirstate_map::ChildNodes::OnDisk(
303 root: dirstate_map::ChildNodes::OnDisk(
304 read_nodes(on_disk, meta.root_nodes).map_err(|mut e| {
304 read_nodes(on_disk, meta.root_nodes).map_err(|mut e| {
305 e.message = format!("{}, when reading root notes", e.message);
305 e.message = format!("{}, when reading root notes", e.message);
306 e
306 e
307 })?,
307 })?,
308 ),
308 ),
309 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
309 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
310 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
310 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
311 ignore_patterns_hash: meta.ignore_patterns_hash,
311 ignore_patterns_hash: meta.ignore_patterns_hash,
312 unreachable_bytes: meta.unreachable_bytes.get(),
312 unreachable_bytes: meta.unreachable_bytes.get(),
313 old_data_size: on_disk.len(),
313 old_data_size: on_disk.len(),
314 old_uuid: Some(uuid),
314 old_uuid: Some(uuid),
315 identity,
315 identity,
316 dirstate_version: DirstateVersion::V2,
316 dirstate_version: DirstateVersion::V2,
317 write_mode: DirstateMapWriteMode::Auto,
317 write_mode: DirstateMapWriteMode::Auto,
318 };
318 };
319 Ok(dirstate_map)
319 Ok(dirstate_map)
320 }
320 }
321
321
322 impl Node {
322 impl Node {
323 pub(super) fn full_path<'on_disk>(
323 pub(super) fn full_path<'on_disk>(
324 &self,
324 &self,
325 on_disk: &'on_disk [u8],
325 on_disk: &'on_disk [u8],
326 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
326 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
327 read_hg_path(on_disk, self.full_path)
327 read_hg_path(on_disk, self.full_path)
328 }
328 }
329
329
330 pub(super) fn base_name_start(
330 pub(super) fn base_name_start(
331 &self,
331 &self,
332 ) -> Result<usize, DirstateV2ParseError> {
332 ) -> Result<usize, DirstateV2ParseError> {
333 let start = self.base_name_start.get();
333 let start = self.base_name_start.get();
334 if start < self.full_path.len.get() {
334 if start < self.full_path.len.get() {
335 let start = usize::try_from(start)
335 let start = usize::try_from(start)
336 // u32 -> usize, could only panic on a 16-bit CPU
336 // u32 -> usize, could only panic on a 16-bit CPU
337 .expect("dirstate-v2 base_name_start out of bounds");
337 .expect("dirstate-v2 base_name_start out of bounds");
338 Ok(start)
338 Ok(start)
339 } else {
339 } else {
340 Err(DirstateV2ParseError::new("not enough bytes for base name"))
340 Err(DirstateV2ParseError::new("not enough bytes for base name"))
341 }
341 }
342 }
342 }
343
343
344 pub(super) fn base_name<'on_disk>(
344 pub(super) fn base_name<'on_disk>(
345 &self,
345 &self,
346 on_disk: &'on_disk [u8],
346 on_disk: &'on_disk [u8],
347 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
347 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
348 let full_path = self.full_path(on_disk)?;
348 let full_path = self.full_path(on_disk)?;
349 let base_name_start = self.base_name_start()?;
349 let base_name_start = self.base_name_start()?;
350 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
350 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
351 }
351 }
352
352
353 pub(super) fn path<'on_disk>(
353 pub(super) fn path<'on_disk>(
354 &self,
354 &self,
355 on_disk: &'on_disk [u8],
355 on_disk: &'on_disk [u8],
356 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
356 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
357 Ok(WithBasename::from_raw_parts(
357 Ok(WithBasename::from_raw_parts(
358 Cow::Borrowed(self.full_path(on_disk)?),
358 Cow::Borrowed(self.full_path(on_disk)?),
359 self.base_name_start()?,
359 self.base_name_start()?,
360 ))
360 ))
361 }
361 }
362
362
363 pub(super) fn has_copy_source(&self) -> bool {
363 pub(super) fn has_copy_source(&self) -> bool {
364 self.copy_source.start.get() != 0
364 self.copy_source.start.get() != 0
365 }
365 }
366
366
367 pub(super) fn copy_source<'on_disk>(
367 pub(super) fn copy_source<'on_disk>(
368 &self,
368 &self,
369 on_disk: &'on_disk [u8],
369 on_disk: &'on_disk [u8],
370 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
370 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
371 Ok(if self.has_copy_source() {
371 Ok(if self.has_copy_source() {
372 Some(read_hg_path(on_disk, self.copy_source)?)
372 Some(read_hg_path(on_disk, self.copy_source)?)
373 } else {
373 } else {
374 None
374 None
375 })
375 })
376 }
376 }
377
377
378 fn flags(&self) -> Flags {
378 fn flags(&self) -> Flags {
379 Flags::from_bits_truncate(self.flags.get())
379 Flags::from_bits_truncate(self.flags.get())
380 }
380 }
381
381
382 fn has_entry(&self) -> bool {
382 fn has_entry(&self) -> bool {
383 self.flags().intersects(
383 self.flags().intersects(
384 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
384 Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO,
385 )
385 )
386 }
386 }
387
387
388 pub(super) fn node_data(
388 pub(super) fn node_data(
389 &self,
389 &self,
390 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
390 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
391 if self.has_entry() {
391 if self.has_entry() {
392 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
392 Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
393 } else if let Some(mtime) = self.cached_directory_mtime()? {
393 } else if let Some(mtime) = self.cached_directory_mtime()? {
394 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
394 Ok(dirstate_map::NodeData::CachedDirectory { mtime })
395 } else {
395 } else {
396 Ok(dirstate_map::NodeData::None)
396 Ok(dirstate_map::NodeData::None)
397 }
397 }
398 }
398 }
399
399
400 pub(super) fn cached_directory_mtime(
400 pub(super) fn cached_directory_mtime(
401 &self,
401 &self,
402 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
402 ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
403 // For now we do not have code to handle the absence of
403 // For now we do not have code to handle the absence of
404 // ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is
404 // ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is
405 // unset.
405 // unset.
406 if self.flags().contains(Flags::DIRECTORY)
406 if self.flags().contains(Flags::DIRECTORY)
407 && self.flags().contains(Flags::HAS_MTIME)
407 && self.flags().contains(Flags::HAS_MTIME)
408 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
408 && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
409 {
409 {
410 Ok(Some(self.mtime()?))
410 Ok(Some(self.mtime()?))
411 } else {
411 } else {
412 Ok(None)
412 Ok(None)
413 }
413 }
414 }
414 }
415
415
416 fn synthesize_unix_mode(&self) -> u32 {
416 fn synthesize_unix_mode(&self) -> u32 {
417 // Some platforms' libc don't have the same type (MacOS uses i32 here)
417 // Some platforms' libc don't have the same type (MacOS uses i32 here)
418 #[allow(clippy::unnecessary_cast)]
418 #[allow(clippy::unnecessary_cast)]
419 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
419 let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) {
420 libc::S_IFLNK as u32
420 libc::S_IFLNK as u32
421 } else {
421 } else {
422 libc::S_IFREG as u32
422 libc::S_IFREG as u32
423 };
423 };
424 let permissions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
424 let permissions = if self.flags().contains(Flags::MODE_EXEC_PERM) {
425 0o755
425 0o755
426 } else {
426 } else {
427 0o644
427 0o644
428 };
428 };
429 file_type | permissions
429 file_type | permissions
430 }
430 }
431
431
432 fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> {
432 fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> {
433 let mut m: TruncatedTimestamp = self.mtime.try_into()?;
433 let mut m: TruncatedTimestamp = self.mtime.try_into()?;
434 if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) {
434 if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) {
435 m.second_ambiguous = true;
435 m.second_ambiguous = true;
436 }
436 }
437 Ok(m)
437 Ok(m)
438 }
438 }
439
439
440 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
440 fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
441 // TODO: convert through raw bits instead?
441 // TODO: convert through raw bits instead?
442 let wc_tracked = self.flags().contains(Flags::WDIR_TRACKED);
442 let wc_tracked = self.flags().contains(Flags::WDIR_TRACKED);
443 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
443 let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
444 let p2_info = self.flags().contains(Flags::P2_INFO);
444 let p2_info = self.flags().contains(Flags::P2_INFO);
445 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
445 let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
446 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
446 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
447 {
447 {
448 Some((self.synthesize_unix_mode(), self.size.into()))
448 Some((self.synthesize_unix_mode(), self.size.into()))
449 } else {
449 } else {
450 None
450 None
451 };
451 };
452 let mtime = if self.flags().contains(Flags::HAS_MTIME)
452 let mtime = if self.flags().contains(Flags::HAS_MTIME)
453 && !self.flags().contains(Flags::DIRECTORY)
453 && !self.flags().contains(Flags::DIRECTORY)
454 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
454 && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
455 {
455 {
456 Some(self.mtime()?)
456 Some(self.mtime()?)
457 } else {
457 } else {
458 None
458 None
459 };
459 };
460 let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC)
460 let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC)
461 {
461 {
462 Some(self.flags().contains(Flags::FALLBACK_EXEC))
462 Some(self.flags().contains(Flags::FALLBACK_EXEC))
463 } else {
463 } else {
464 None
464 None
465 };
465 };
466 let fallback_symlink =
466 let fallback_symlink =
467 if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) {
467 if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) {
468 Some(self.flags().contains(Flags::FALLBACK_SYMLINK))
468 Some(self.flags().contains(Flags::FALLBACK_SYMLINK))
469 } else {
469 } else {
470 None
470 None
471 };
471 };
472 Ok(DirstateEntry::from_v2_data(DirstateV2Data {
472 Ok(DirstateEntry::from_v2_data(DirstateV2Data {
473 wc_tracked,
473 wc_tracked,
474 p1_tracked,
474 p1_tracked,
475 p2_info,
475 p2_info,
476 mode_size,
476 mode_size,
477 mtime,
477 mtime,
478 fallback_exec,
478 fallback_exec,
479 fallback_symlink,
479 fallback_symlink,
480 }))
480 }))
481 }
481 }
482
482
483 pub(super) fn entry(
483 pub(super) fn entry(
484 &self,
484 &self,
485 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
485 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
486 if self.has_entry() {
486 if self.has_entry() {
487 Ok(Some(self.assume_entry()?))
487 Ok(Some(self.assume_entry()?))
488 } else {
488 } else {
489 Ok(None)
489 Ok(None)
490 }
490 }
491 }
491 }
492
492
493 pub(super) fn children<'on_disk>(
493 pub(super) fn children<'on_disk>(
494 &self,
494 &self,
495 on_disk: &'on_disk [u8],
495 on_disk: &'on_disk [u8],
496 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
496 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
497 read_nodes(on_disk, self.children)
497 read_nodes(on_disk, self.children)
498 }
498 }
499
499
500 pub(super) fn to_in_memory_node<'on_disk>(
500 pub(super) fn to_in_memory_node<'on_disk>(
501 &self,
501 &self,
502 on_disk: &'on_disk [u8],
502 on_disk: &'on_disk [u8],
503 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
503 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
504 Ok(dirstate_map::Node {
504 Ok(dirstate_map::Node {
505 children: dirstate_map::ChildNodes::OnDisk(
505 children: dirstate_map::ChildNodes::OnDisk(
506 self.children(on_disk)?,
506 self.children(on_disk)?,
507 ),
507 ),
508 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
508 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
509 data: self.node_data()?,
509 data: self.node_data()?,
510 descendants_with_entry_count: self
510 descendants_with_entry_count: self
511 .descendants_with_entry_count
511 .descendants_with_entry_count
512 .get(),
512 .get(),
513 tracked_descendants_count: self.tracked_descendants_count.get(),
513 tracked_descendants_count: self.tracked_descendants_count.get(),
514 })
514 })
515 }
515 }
516
516
517 fn from_dirstate_entry(
517 fn from_dirstate_entry(
518 entry: &DirstateEntry,
518 entry: &DirstateEntry,
519 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
519 ) -> (Flags, U32Be, PackedTruncatedTimestamp) {
520 let DirstateV2Data {
520 let DirstateV2Data {
521 wc_tracked,
521 wc_tracked,
522 p1_tracked,
522 p1_tracked,
523 p2_info,
523 p2_info,
524 mode_size: mode_size_opt,
524 mode_size: mode_size_opt,
525 mtime: mtime_opt,
525 mtime: mtime_opt,
526 fallback_exec,
526 fallback_exec,
527 fallback_symlink,
527 fallback_symlink,
528 } = entry.v2_data();
528 } = entry.v2_data();
529 // TODO: convert through raw flag bits instead?
529 // TODO: convert through raw flag bits instead?
530 let mut flags = Flags::empty();
530 let mut flags = Flags::empty();
531 flags.set(Flags::WDIR_TRACKED, wc_tracked);
531 flags.set(Flags::WDIR_TRACKED, wc_tracked);
532 flags.set(Flags::P1_TRACKED, p1_tracked);
532 flags.set(Flags::P1_TRACKED, p1_tracked);
533 flags.set(Flags::P2_INFO, p2_info);
533 flags.set(Flags::P2_INFO, p2_info);
534 // Some platforms' libc don't have the same type (MacOS uses i32 here)
534 // Some platforms' libc don't have the same type (MacOS uses i32 here)
535 #[allow(clippy::unnecessary_cast)]
535 #[allow(clippy::unnecessary_cast)]
536 let size = if let Some((m, s)) = mode_size_opt {
536 let size = if let Some((m, s)) = mode_size_opt {
537 let exec_perm = m & (libc::S_IXUSR as u32) != 0;
537 let exec_perm = m & (libc::S_IXUSR as u32) != 0;
538 let is_symlink = m & (libc::S_IFMT as u32) == libc::S_IFLNK as u32;
538 let is_symlink = m & (libc::S_IFMT as u32) == libc::S_IFLNK as u32;
539 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
539 flags.set(Flags::MODE_EXEC_PERM, exec_perm);
540 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
540 flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
541 flags.insert(Flags::HAS_MODE_AND_SIZE);
541 flags.insert(Flags::HAS_MODE_AND_SIZE);
542 s.into()
542 s.into()
543 } else {
543 } else {
544 0.into()
544 0.into()
545 };
545 };
546 let mtime = if let Some(m) = mtime_opt {
546 let mtime = if let Some(m) = mtime_opt {
547 flags.insert(Flags::HAS_MTIME);
547 flags.insert(Flags::HAS_MTIME);
548 if m.second_ambiguous {
548 if m.second_ambiguous {
549 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS);
549 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS);
550 };
550 };
551 m.into()
551 m.into()
552 } else {
552 } else {
553 PackedTruncatedTimestamp::null()
553 PackedTruncatedTimestamp::null()
554 };
554 };
555 if let Some(f_exec) = fallback_exec {
555 if let Some(f_exec) = fallback_exec {
556 flags.insert(Flags::HAS_FALLBACK_EXEC);
556 flags.insert(Flags::HAS_FALLBACK_EXEC);
557 if f_exec {
557 if f_exec {
558 flags.insert(Flags::FALLBACK_EXEC);
558 flags.insert(Flags::FALLBACK_EXEC);
559 }
559 }
560 }
560 }
561 if let Some(f_symlink) = fallback_symlink {
561 if let Some(f_symlink) = fallback_symlink {
562 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
562 flags.insert(Flags::HAS_FALLBACK_SYMLINK);
563 if f_symlink {
563 if f_symlink {
564 flags.insert(Flags::FALLBACK_SYMLINK);
564 flags.insert(Flags::FALLBACK_SYMLINK);
565 }
565 }
566 }
566 }
567 (flags, size, mtime)
567 (flags, size, mtime)
568 }
568 }
569 }
569 }
570
570
571 fn read_hg_path(
571 fn read_hg_path(
572 on_disk: &[u8],
572 on_disk: &[u8],
573 slice: PathSlice,
573 slice: PathSlice,
574 ) -> Result<&HgPath, DirstateV2ParseError> {
574 ) -> Result<&HgPath, DirstateV2ParseError> {
575 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
575 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
576 }
576 }
577
577
578 fn read_nodes(
578 fn read_nodes(
579 on_disk: &[u8],
579 on_disk: &[u8],
580 slice: ChildNodes,
580 slice: ChildNodes,
581 ) -> Result<&[Node], DirstateV2ParseError> {
581 ) -> Result<&[Node], DirstateV2ParseError> {
582 read_slice(on_disk, slice.start, slice.len.get())
582 read_slice(on_disk, slice.start, slice.len.get())
583 }
583 }
584
584
585 fn read_slice<T, Len>(
585 fn read_slice<T, Len>(
586 on_disk: &[u8],
586 on_disk: &[u8],
587 start: Offset,
587 start: Offset,
588 len: Len,
588 len: Len,
589 ) -> Result<&[T], DirstateV2ParseError>
589 ) -> Result<&[T], DirstateV2ParseError>
590 where
590 where
591 T: BytesCast,
591 T: BytesCast,
592 Len: TryInto<usize>,
592 Len: TryInto<usize>,
593 {
593 {
594 // Either `usize::MAX` would result in "out of bounds" error since a single
594 // Either `usize::MAX` would result in "out of bounds" error since a single
595 // `&[u8]` cannot occupy the entire addess space.
595 // `&[u8]` cannot occupy the entire addess space.
596 let start = start.get().try_into().unwrap_or(std::usize::MAX);
596 let start = start.get().try_into().unwrap_or(std::usize::MAX);
597 let len = len.try_into().unwrap_or(std::usize::MAX);
597 let len = len.try_into().unwrap_or(std::usize::MAX);
598 let bytes = match on_disk.get(start..) {
598 let bytes = match on_disk.get(start..) {
599 Some(bytes) => bytes,
599 Some(bytes) => bytes,
600 None => {
600 None => {
601 return Err(DirstateV2ParseError::new(
601 return Err(DirstateV2ParseError::new(
602 "not enough bytes from disk",
602 "not enough bytes from disk",
603 ))
603 ))
604 }
604 }
605 };
605 };
606 T::slice_from_bytes(bytes, len)
606 T::slice_from_bytes(bytes, len)
607 .map_err(|e| {
607 .map_err(|e| {
608 DirstateV2ParseError::new(format!("when reading a slice, {}", e))
608 DirstateV2ParseError::new(format!("when reading a slice, {}", e))
609 })
609 })
610 .map(|(slice, _rest)| slice)
610 .map(|(slice, _rest)| slice)
611 }
611 }
612
612
613 /// Returns new data and metadata, together with whether that data should be
613 /// Returns new data and metadata, together with whether that data should be
614 /// appended to the existing data file whose content is at
614 /// appended to the existing data file whose content is at
615 /// `dirstate_map.on_disk` (true), instead of written to a new data file
615 /// `dirstate_map.on_disk` (true), instead of written to a new data file
616 /// (false), and the previous size of data on disk.
616 /// (false), and the previous size of data on disk.
617 pub(super) fn write(
617 pub(super) fn write(
618 dirstate_map: &DirstateMap,
618 dirstate_map: &DirstateMap,
619 write_mode: DirstateMapWriteMode,
619 write_mode: DirstateMapWriteMode,
620 ) -> Result<(Vec<u8>, TreeMetadata, bool, usize), DirstateError> {
620 ) -> Result<(Vec<u8>, TreeMetadata, bool, usize), DirstateError> {
621 let append = match write_mode {
621 let append = match write_mode {
622 DirstateMapWriteMode::Auto => dirstate_map.write_should_append(),
622 DirstateMapWriteMode::Auto => dirstate_map.write_should_append(),
623 DirstateMapWriteMode::ForceNewDataFile => false,
623 DirstateMapWriteMode::ForceNewDataFile => false,
624 DirstateMapWriteMode::ForceAppend => true,
624 DirstateMapWriteMode::ForceAppend => true,
625 };
625 };
626 if append {
626 if append {
627 log::trace!("appending to the dirstate data file");
627 log::trace!("appending to the dirstate data file");
628 } else {
628 } else {
629 log::trace!("creating new dirstate data file");
629 log::trace!("creating new dirstate data file");
630 }
630 }
631
631
632 // This ignores the space for paths, and for nodes without an entry.
632 // This ignores the space for paths, and for nodes without an entry.
633 // TODO: better estimate? Skip the `Vec` and write to a file directly?
633 // TODO: better estimate? Skip the `Vec` and write to a file directly?
634 let size_guess = std::mem::size_of::<Node>()
634 let size_guess = std::mem::size_of::<Node>()
635 * dirstate_map.nodes_with_entry_count as usize;
635 * dirstate_map.nodes_with_entry_count as usize;
636
636
637 let mut writer = Writer {
637 let mut writer = Writer {
638 dirstate_map,
638 dirstate_map,
639 append,
639 append,
640 out: Vec::with_capacity(size_guess),
640 out: Vec::with_capacity(size_guess),
641 };
641 };
642
642
643 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
643 let root_nodes = dirstate_map.root.as_ref();
644 for node in root_nodes.iter() {
645 // Catch some corruptions before we write to disk
646 let full_path = node.full_path(dirstate_map.on_disk)?;
647 let base_name = node.base_name(dirstate_map.on_disk)?;
648 if full_path != base_name {
649 let explanation = format!(
650 "Dirstate root node '{}' is not at the root",
651 full_path
652 );
653 return Err(HgError::corrupted(explanation).into());
654 }
655 }
656 let root_nodes = writer.write_nodes(root_nodes)?;
644
657
645 let unreachable_bytes = if append {
658 let unreachable_bytes = if append {
646 dirstate_map.unreachable_bytes
659 dirstate_map.unreachable_bytes
647 } else {
660 } else {
648 0
661 0
649 };
662 };
650 let meta = TreeMetadata {
663 let meta = TreeMetadata {
651 root_nodes,
664 root_nodes,
652 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
665 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
653 nodes_with_copy_source_count: dirstate_map
666 nodes_with_copy_source_count: dirstate_map
654 .nodes_with_copy_source_count
667 .nodes_with_copy_source_count
655 .into(),
668 .into(),
656 unreachable_bytes: unreachable_bytes.into(),
669 unreachable_bytes: unreachable_bytes.into(),
657 unused: [0; 4],
670 unused: [0; 4],
658 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
671 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
659 };
672 };
660 Ok((writer.out, meta, append, dirstate_map.old_data_size))
673 Ok((writer.out, meta, append, dirstate_map.old_data_size))
661 }
674 }
662
675
663 struct Writer<'dmap, 'on_disk> {
676 struct Writer<'dmap, 'on_disk> {
664 dirstate_map: &'dmap DirstateMap<'on_disk>,
677 dirstate_map: &'dmap DirstateMap<'on_disk>,
665 append: bool,
678 append: bool,
666 out: Vec<u8>,
679 out: Vec<u8>,
667 }
680 }
668
681
669 impl Writer<'_, '_> {
682 impl Writer<'_, '_> {
670 fn write_nodes(
683 fn write_nodes(
671 &mut self,
684 &mut self,
672 nodes: dirstate_map::ChildNodesRef,
685 nodes: dirstate_map::ChildNodesRef,
673 ) -> Result<ChildNodes, DirstateError> {
686 ) -> Result<ChildNodes, DirstateError> {
674 // Reuse already-written nodes if possible
687 // Reuse already-written nodes if possible
675 if self.append {
688 if self.append {
676 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
689 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
677 let start = self.on_disk_offset_of(nodes_slice).expect(
690 let start = self.on_disk_offset_of(nodes_slice).expect(
678 "dirstate-v2 OnDisk nodes not found within on_disk",
691 "dirstate-v2 OnDisk nodes not found within on_disk",
679 );
692 );
680 let len = child_nodes_len_from_usize(nodes_slice.len());
693 let len = child_nodes_len_from_usize(nodes_slice.len());
681 return Ok(ChildNodes { start, len });
694 return Ok(ChildNodes { start, len });
682 }
695 }
683 }
696 }
684
697
685 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
698 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
686 // undefined iteration order. Sort to enable binary search in the
699 // undefined iteration order. Sort to enable binary search in the
687 // written file.
700 // written file.
688 let nodes = nodes.sorted();
701 let nodes = nodes.sorted();
689 let nodes_len = nodes.len();
702 let nodes_len = nodes.len();
690
703
691 // First accumulate serialized nodes in a `Vec`
704 // First accumulate serialized nodes in a `Vec`
692 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
705 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
693 for node in nodes {
706 for node in nodes {
694 let children = node.children(self.dirstate_map.on_disk)?;
707 let children = node.children(self.dirstate_map.on_disk)?;
695 let full_path = node.full_path(self.dirstate_map.on_disk)?;
708 let full_path = node.full_path(self.dirstate_map.on_disk)?;
696 self.check_children(&children, full_path)?;
709 self.check_children(&children, full_path)?;
697
710
698 let children = self.write_nodes(children)?;
711 let children = self.write_nodes(children)?;
699 let full_path = self.write_path(full_path.as_bytes());
712 let full_path = self.write_path(full_path.as_bytes());
700 let copy_source = if let Some(source) =
713 let copy_source = if let Some(source) =
701 node.copy_source(self.dirstate_map.on_disk)?
714 node.copy_source(self.dirstate_map.on_disk)?
702 {
715 {
703 self.write_path(source.as_bytes())
716 self.write_path(source.as_bytes())
704 } else {
717 } else {
705 PathSlice {
718 PathSlice {
706 start: 0.into(),
719 start: 0.into(),
707 len: 0.into(),
720 len: 0.into(),
708 }
721 }
709 };
722 };
710 on_disk_nodes.push(match node {
723 on_disk_nodes.push(match node {
711 NodeRef::InMemory(path, node) => {
724 NodeRef::InMemory(path, node) => {
712 let (flags, size, mtime) = match &node.data {
725 let (flags, size, mtime) = match &node.data {
713 dirstate_map::NodeData::Entry(entry) => {
726 dirstate_map::NodeData::Entry(entry) => {
714 Node::from_dirstate_entry(entry)
727 Node::from_dirstate_entry(entry)
715 }
728 }
716 dirstate_map::NodeData::CachedDirectory { mtime } => {
729 dirstate_map::NodeData::CachedDirectory { mtime } => {
717 // we currently never set a mtime if unknown file
730 // we currently never set a mtime if unknown file
718 // are present.
731 // are present.
719 // So if we have a mtime for a directory, we know
732 // So if we have a mtime for a directory, we know
720 // they are no unknown
733 // they are no unknown
721 // files and we
734 // files and we
722 // blindly set ALL_UNKNOWN_RECORDED.
735 // blindly set ALL_UNKNOWN_RECORDED.
723 //
736 //
724 // We never set ALL_IGNORED_RECORDED since we
737 // We never set ALL_IGNORED_RECORDED since we
725 // don't track that case
738 // don't track that case
726 // currently.
739 // currently.
727 let mut flags = Flags::DIRECTORY
740 let mut flags = Flags::DIRECTORY
728 | Flags::HAS_MTIME
741 | Flags::HAS_MTIME
729 | Flags::ALL_UNKNOWN_RECORDED;
742 | Flags::ALL_UNKNOWN_RECORDED;
730 if mtime.second_ambiguous {
743 if mtime.second_ambiguous {
731 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS)
744 flags.insert(Flags::MTIME_SECOND_AMBIGUOUS)
732 }
745 }
733 (flags, 0.into(), (*mtime).into())
746 (flags, 0.into(), (*mtime).into())
734 }
747 }
735 dirstate_map::NodeData::None => (
748 dirstate_map::NodeData::None => (
736 Flags::DIRECTORY,
749 Flags::DIRECTORY,
737 0.into(),
750 0.into(),
738 PackedTruncatedTimestamp::null(),
751 PackedTruncatedTimestamp::null(),
739 ),
752 ),
740 };
753 };
741 Node {
754 Node {
742 children,
755 children,
743 copy_source,
756 copy_source,
744 full_path,
757 full_path,
745 base_name_start: u16::try_from(path.base_name_start())
758 base_name_start: u16::try_from(path.base_name_start())
746 // Could only panic for paths over 64 KiB
759 // Could only panic for paths over 64 KiB
747 .expect("dirstate-v2 path length overflow")
760 .expect("dirstate-v2 path length overflow")
748 .into(),
761 .into(),
749 descendants_with_entry_count: node
762 descendants_with_entry_count: node
750 .descendants_with_entry_count
763 .descendants_with_entry_count
751 .into(),
764 .into(),
752 tracked_descendants_count: node
765 tracked_descendants_count: node
753 .tracked_descendants_count
766 .tracked_descendants_count
754 .into(),
767 .into(),
755 flags: flags.bits().into(),
768 flags: flags.bits().into(),
756 size,
769 size,
757 mtime,
770 mtime,
758 }
771 }
759 }
772 }
760 NodeRef::OnDisk(node) => Node {
773 NodeRef::OnDisk(node) => Node {
761 children,
774 children,
762 copy_source,
775 copy_source,
763 full_path,
776 full_path,
764 ..*node
777 ..*node
765 },
778 },
766 })
779 })
767 }
780 }
768 // … so we can write them contiguously, after writing everything else
781 // … so we can write them contiguously, after writing everything else
769 // they refer to.
782 // they refer to.
770 let start = self.current_offset();
783 let start = self.current_offset();
771 let len = child_nodes_len_from_usize(nodes_len);
784 let len = child_nodes_len_from_usize(nodes_len);
772 self.out.extend(on_disk_nodes.as_bytes());
785 self.out.extend(on_disk_nodes.as_bytes());
773 Ok(ChildNodes { start, len })
786 Ok(ChildNodes { start, len })
774 }
787 }
775
788
776 /// Catch some dirstate corruptions before writing them to disk
789 /// Catch some dirstate corruptions before writing them to disk
777 fn check_children(
790 fn check_children(
778 &mut self,
791 &mut self,
779 children: &dirstate_map::ChildNodesRef,
792 children: &dirstate_map::ChildNodesRef,
780 full_path: &HgPath,
793 full_path: &HgPath,
781 ) -> Result<(), DirstateError> {
794 ) -> Result<(), DirstateError> {
782 for child in children.iter() {
795 for child in children.iter() {
783 let child_full_path =
796 let child_full_path =
784 child.full_path(self.dirstate_map.on_disk)?;
797 child.full_path(self.dirstate_map.on_disk)?;
785
798
786 let prefix_length = child_full_path.len()
799 let prefix_length = child_full_path.len()
787 // remove the filename
800 // remove the filename
788 - child.base_name(self.dirstate_map.on_disk)?.len()
801 - child.base_name(self.dirstate_map.on_disk)?.len()
789 // remove the slash
802 // remove the slash
790 - 1;
803 - 1;
791
804
792 let child_prefix = &child_full_path.as_bytes()[..prefix_length];
805 let child_prefix = &child_full_path.as_bytes()[..prefix_length];
793
806
794 if child_prefix != full_path.as_bytes() {
807 if child_prefix != full_path.as_bytes() {
795 let explanation = format!(
808 let explanation = format!(
796 "dirstate child node's path '{}' \
809 "dirstate child node's path '{}' \
797 does not start with its parent's path '{}'",
810 does not start with its parent's path '{}'",
798 child_full_path, full_path,
811 child_full_path, full_path,
799 );
812 );
800
813
801 return Err(HgError::corrupted(explanation).into());
814 return Err(HgError::corrupted(explanation).into());
802 }
815 }
803 }
816 }
804 Ok(())
817 Ok(())
805 }
818 }
806
819
807 /// If the given slice of items is within `on_disk`, returns its offset
820 /// If the given slice of items is within `on_disk`, returns its offset
808 /// from the start of `on_disk`.
821 /// from the start of `on_disk`.
809 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
822 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
810 where
823 where
811 T: BytesCast,
824 T: BytesCast,
812 {
825 {
813 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
826 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
814 let start = slice.as_ptr() as usize;
827 let start = slice.as_ptr() as usize;
815 let end = start + slice.len();
828 let end = start + slice.len();
816 start..=end
829 start..=end
817 }
830 }
818 let slice_addresses = address_range(slice.as_bytes());
831 let slice_addresses = address_range(slice.as_bytes());
819 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
832 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
820 if on_disk_addresses.contains(slice_addresses.start())
833 if on_disk_addresses.contains(slice_addresses.start())
821 && on_disk_addresses.contains(slice_addresses.end())
834 && on_disk_addresses.contains(slice_addresses.end())
822 {
835 {
823 let offset = slice_addresses.start() - on_disk_addresses.start();
836 let offset = slice_addresses.start() - on_disk_addresses.start();
824 Some(offset_from_usize(offset))
837 Some(offset_from_usize(offset))
825 } else {
838 } else {
826 None
839 None
827 }
840 }
828 }
841 }
829
842
830 fn current_offset(&mut self) -> Offset {
843 fn current_offset(&mut self) -> Offset {
831 let mut offset = self.out.len();
844 let mut offset = self.out.len();
832 if self.append {
845 if self.append {
833 offset += self.dirstate_map.on_disk.len()
846 offset += self.dirstate_map.on_disk.len()
834 }
847 }
835 offset_from_usize(offset)
848 offset_from_usize(offset)
836 }
849 }
837
850
838 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
851 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
839 let len = path_len_from_usize(slice.len());
852 let len = path_len_from_usize(slice.len());
840 // Reuse an already-written path if possible
853 // Reuse an already-written path if possible
841 if self.append {
854 if self.append {
842 if let Some(start) = self.on_disk_offset_of(slice) {
855 if let Some(start) = self.on_disk_offset_of(slice) {
843 return PathSlice { start, len };
856 return PathSlice { start, len };
844 }
857 }
845 }
858 }
846 let start = self.current_offset();
859 let start = self.current_offset();
847 self.out.extend(slice.as_bytes());
860 self.out.extend(slice.as_bytes());
848 PathSlice { start, len }
861 PathSlice { start, len }
849 }
862 }
850 }
863 }
851
864
852 fn offset_from_usize(x: usize) -> Offset {
865 fn offset_from_usize(x: usize) -> Offset {
853 u32::try_from(x)
866 u32::try_from(x)
854 // Could only panic for a dirstate file larger than 4 GiB
867 // Could only panic for a dirstate file larger than 4 GiB
855 .expect("dirstate-v2 offset overflow")
868 .expect("dirstate-v2 offset overflow")
856 .into()
869 .into()
857 }
870 }
858
871
859 fn child_nodes_len_from_usize(x: usize) -> Size {
872 fn child_nodes_len_from_usize(x: usize) -> Size {
860 u32::try_from(x)
873 u32::try_from(x)
861 // Could only panic with over 4 billion nodes
874 // Could only panic with over 4 billion nodes
862 .expect("dirstate-v2 slice length overflow")
875 .expect("dirstate-v2 slice length overflow")
863 .into()
876 .into()
864 }
877 }
865
878
866 fn path_len_from_usize(x: usize) -> PathSize {
879 fn path_len_from_usize(x: usize) -> PathSize {
867 u16::try_from(x)
880 u16::try_from(x)
868 // Could only panic for paths over 64 KiB
881 // Could only panic for paths over 64 KiB
869 .expect("dirstate-v2 path length overflow")
882 .expect("dirstate-v2 path length overflow")
870 .into()
883 .into()
871 }
884 }
872
885
873 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
886 impl From<TruncatedTimestamp> for PackedTruncatedTimestamp {
874 fn from(timestamp: TruncatedTimestamp) -> Self {
887 fn from(timestamp: TruncatedTimestamp) -> Self {
875 Self {
888 Self {
876 truncated_seconds: timestamp.truncated_seconds().into(),
889 truncated_seconds: timestamp.truncated_seconds().into(),
877 nanoseconds: timestamp.nanoseconds().into(),
890 nanoseconds: timestamp.nanoseconds().into(),
878 }
891 }
879 }
892 }
880 }
893 }
881
894
882 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
895 impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp {
883 type Error = DirstateV2ParseError;
896 type Error = DirstateV2ParseError;
884
897
885 fn try_from(
898 fn try_from(
886 timestamp: PackedTruncatedTimestamp,
899 timestamp: PackedTruncatedTimestamp,
887 ) -> Result<Self, Self::Error> {
900 ) -> Result<Self, Self::Error> {
888 Self::from_already_truncated(
901 Self::from_already_truncated(
889 timestamp.truncated_seconds.get(),
902 timestamp.truncated_seconds.get(),
890 timestamp.nanoseconds.get(),
903 timestamp.nanoseconds.get(),
891 false,
904 false,
892 )
905 )
893 }
906 }
894 }
907 }
895 impl PackedTruncatedTimestamp {
908 impl PackedTruncatedTimestamp {
896 fn null() -> Self {
909 fn null() -> Self {
897 Self {
910 Self {
898 truncated_seconds: 0.into(),
911 truncated_seconds: 0.into(),
899 nanoseconds: 0.into(),
912 nanoseconds: 0.into(),
900 }
913 }
901 }
914 }
902 }
915 }
General Comments 0
You need to be logged in to leave comments. Login now