##// END OF EJS Templates
dirstate-v2: Move fixed-size tree metadata into the docket file...
Simon Sapin -
r48482:78f7f0d4 default
parent child Browse files
Show More
@@ -999,11 +999,7 b' def debugdirstateignorepatternshash(ui, '
999 999 if repo.dirstate._use_dirstate_v2:
1000 1000 docket = repo.dirstate._map.docket
1001 1001 hash_len = 20 # 160 bits for SHA-1
1002 hash_offset = docket.data_size - hash_len # hash is at the end
1003 data_filename = docket.data_filename()
1004 with repo.vfs(data_filename) as f:
1005 f.seek(hash_offset)
1006 hash_bytes = f.read(hash_len)
1002 hash_bytes = docket.tree_metadata[-hash_len:]
1007 1003 ui.write(binascii.hexlify(hash_bytes) + b'\n')
1008 1004
1009 1005
@@ -638,7 +638,7 b' if rustmod is not None:'
638 638 else:
639 639 data = b''
640 640 self._rustmap = rustmod.DirstateMap.new_v2(
641 data, self.docket.data_size
641 data, self.docket.data_size, self.docket.tree_metadata
642 642 )
643 643 parents = self.docket.parents
644 644 else:
@@ -665,7 +665,7 b' if rustmod is not None:'
665 665
666 666 # We can only append to an existing data file if there is one
667 667 can_append = self.docket.uuid is not None
668 packed, append = self._rustmap.write_v2(now, can_append)
668 packed, meta, append = self._rustmap.write_v2(now, can_append)
669 669 if append:
670 670 docket = self.docket
671 671 data_filename = docket.data_filename()
@@ -679,12 +679,13 b' if rustmod is not None:'
679 679 assert written == len(packed), (written, len(packed))
680 680 docket.data_size += len(packed)
681 681 docket.parents = self.parents()
682 docket.tree_metadata = meta
682 683 st.write(docket.serialize())
683 684 st.close()
684 685 else:
685 686 old_docket = self.docket
686 687 new_docket = docketmod.DirstateDocket.with_new_uuid(
687 self.parents(), len(packed)
688 self.parents(), len(packed), meta
688 689 )
689 690 data_filename = new_docket.data_filename()
690 691 if tr:
@@ -14,47 +14,60 b' from ..revlogutils import docket as dock'
14 14
15 15 V2_FORMAT_MARKER = b"dirstate-v2\n"
16 16
17 # Must match the constant of the same name in
18 # `rust/hg-core/src/dirstate_tree/on_disk.rs`
19 TREE_METADATA_SIZE = 40
20
17 21 # * 12 bytes: format marker
18 22 # * 32 bytes: node ID of the working directory's first parent
19 23 # * 32 bytes: node ID of the working directory's second parent
20 24 # * 4 bytes: big-endian used size of the data file
25 # * {TREE_METADATA_SIZE} bytes: tree metadata, parsed separately
21 26 # * 1 byte: length of the data file's UUID
22 27 # * variable: data file's UUID
23 28 #
24 29 # Node IDs are null-padded if shorter than 32 bytes.
25 30 # A data file shorter than the specified used size is corrupted (truncated)
26 HEADER = struct.Struct(">{}s32s32sLB".format(len(V2_FORMAT_MARKER)))
31 HEADER = struct.Struct(
32 ">{}s32s32sL{}sB".format(len(V2_FORMAT_MARKER), TREE_METADATA_SIZE)
33 )
27 34
28 35
29 36 class DirstateDocket(object):
30 37 data_filename_pattern = b'dirstate.%s.d'
31 38
32 def __init__(self, parents, data_size, uuid):
39 def __init__(self, parents, data_size, tree_metadata, uuid):
33 40 self.parents = parents
34 41 self.data_size = data_size
42 self.tree_metadata = tree_metadata
35 43 self.uuid = uuid
36 44
37 45 @classmethod
38 def with_new_uuid(cls, parents, data):
39 return cls(parents, data, docket_mod.make_uid())
46 def with_new_uuid(cls, parents, data_size, tree_metadata):
47 return cls(parents, data_size, tree_metadata, docket_mod.make_uid())
40 48
41 49 @classmethod
42 50 def parse(cls, data, nodeconstants):
43 51 if not data:
44 52 parents = (nodeconstants.nullid, nodeconstants.nullid)
45 return cls(parents, 0, None)
46 marker, p1, p2, data_size, uuid_size = HEADER.unpack_from(data)
53 return cls(parents, 0, b'', None)
54 marker, p1, p2, data_size, meta, uuid_size = HEADER.unpack_from(data)
47 55 if marker != V2_FORMAT_MARKER:
48 56 raise ValueError("expected dirstate-v2 marker")
49 57 uuid = data[HEADER.size : HEADER.size + uuid_size]
50 58 p1 = p1[: nodeconstants.nodelen]
51 59 p2 = p2[: nodeconstants.nodelen]
52 return cls((p1, p2), data_size, uuid)
60 return cls((p1, p2), data_size, meta, uuid)
53 61
54 62 def serialize(self):
55 63 p1, p2 = self.parents
56 64 header = HEADER.pack(
57 V2_FORMAT_MARKER, p1, p2, self.data_size, len(self.uuid)
65 V2_FORMAT_MARKER,
66 p1,
67 p2,
68 self.data_size,
69 self.tree_metadata,
70 len(self.uuid),
58 71 )
59 72 return header + self.uuid
60 73
@@ -424,9 +424,10 b" impl<'on_disk> DirstateMap<'on_disk> {"
424 424 pub fn new_v2(
425 425 on_disk: &'on_disk [u8],
426 426 data_size: usize,
427 metadata: &[u8],
427 428 ) -> Result<Self, DirstateError> {
428 429 if let Some(data) = on_disk.get(..data_size) {
429 Ok(on_disk::read(data)?)
430 Ok(on_disk::read(data, metadata)?)
430 431 } else {
431 432 Err(DirstateV2ParseError.into())
432 433 }
@@ -1094,15 +1095,16 b" impl<'on_disk> super::dispatch::Dirstate"
1094 1095 Ok(packed)
1095 1096 }
1096 1097
1097 /// Returns new data together with whether that data should be appended to
1098 /// the existing data file whose content is at `self.on_disk` (true),
1099 /// instead of written to a new data file (false).
1098 /// Returns new data and metadata together with whether that data should be
1099 /// appended to the existing data file whose content is at
1100 /// `self.on_disk` (true), instead of written to a new data file
1101 /// (false).
1100 1102 #[timed]
1101 1103 fn pack_v2(
1102 1104 &mut self,
1103 1105 now: Timestamp,
1104 1106 can_append: bool,
1105 ) -> Result<(Vec<u8>, bool), DirstateError> {
1107 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
1106 1108 // TODO: how do we want to handle this in 2038?
1107 1109 let now: i32 = now.0.try_into().expect("time overflow");
1108 1110 let mut paths = Vec::new();
@@ -182,16 +182,17 b' pub trait DirstateMapMethods {'
182 182 /// serialize bytes to write a dirstate data file to disk in dirstate-v2
183 183 /// format.
184 184 ///
185 /// Returns new data together with whether that data should be appended to
186 /// the existing data file whose content is at `self.on_disk` (true),
187 /// instead of written to a new data file (false).
185 /// Returns new data and metadata together with whether that data should be
186 /// appended to the existing data file whose content is at
187 /// `self.on_disk` (true), instead of written to a new data file
188 /// (false).
188 189 ///
189 190 /// Note: this is only supported by the tree dirstate map.
190 191 fn pack_v2(
191 192 &mut self,
192 193 now: Timestamp,
193 194 can_append: bool,
194 ) -> Result<(Vec<u8>, bool), DirstateError>;
195 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError>;
195 196
196 197 /// Run the status algorithm.
197 198 ///
@@ -395,7 +396,7 b' impl DirstateMapMethods for DirstateMap '
395 396 &mut self,
396 397 _now: Timestamp,
397 398 _can_append: bool,
398 ) -> Result<(Vec<u8>, bool), DirstateError> {
399 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
399 400 panic!(
400 401 "should have used dirstate_tree::DirstateMap to use the v2 format"
401 402 )
@@ -47,6 +47,18 b' const USED_NODE_ID_BYTES: usize = 20;'
47 47 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
48 48 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
49 49
50 /// Must match the constant of the same name in
51 /// `mercurial/dirstateutils/docket.py`
52 const TREE_METADATA_SIZE: usize = 40;
53
54 /// Make sure that size-affecting changes are made knowingly
55 #[allow(unused)]
56 fn static_assert_size_of() {
57 let _ = std::mem::transmute::<DocketHeader, [u8; 121]>;
58 let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
59 let _ = std::mem::transmute::<Node, [u8; 43]>;
60 }
61
50 62 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
51 63 #[derive(BytesCast)]
52 64 #[repr(C)]
@@ -58,6 +70,8 b' struct DocketHeader {'
58 70 /// Counted in bytes
59 71 data_size: Size,
60 72
73 metadata: TreeMetadata,
74
61 75 uuid_size: u8,
62 76 }
63 77
@@ -68,7 +82,7 b" pub struct Docket<'on_disk> {"
68 82
69 83 #[derive(BytesCast)]
70 84 #[repr(C)]
71 struct Root {
85 struct TreeMetadata {
72 86 root_nodes: ChildNodes,
73 87 nodes_with_entry_count: Size,
74 88 nodes_with_copy_source_count: Size,
@@ -134,7 +148,7 b' pub(super) struct Node {'
134 148 /// - All direct children of this directory (as returned by
135 149 /// `std::fs::read_dir`) either have a corresponding dirstate node, or
136 150 /// are ignored by ignore patterns whose hash is in
137 /// `Root::ignore_patterns_hash`.
151 /// `TreeMetadata::ignore_patterns_hash`.
138 152 ///
139 153 /// This means that if `std::fs::symlink_metadata` later reports the
140 154 /// same modification time and ignored patterns haven’t changed, a run
@@ -205,13 +219,6 b' struct PathSlice {'
205 219 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
206 220 type OptPathSlice = PathSlice;
207 221
208 /// Make sure that size-affecting changes are made knowingly
209 fn _static_assert_size_of() {
210 let _ = std::mem::transmute::<DocketHeader, [u8; 81]>;
211 let _ = std::mem::transmute::<Root, [u8; 40]>;
212 let _ = std::mem::transmute::<Node, [u8; 43]>;
213 }
214
215 222 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
216 223 ///
217 224 /// This should only happen if Mercurial is buggy or a repository is corrupted.
@@ -242,6 +249,10 b" impl<'on_disk> Docket<'on_disk> {"
242 249 DirstateParents { p1, p2 }
243 250 }
244 251
252 pub fn tree_metadata(&self) -> &[u8] {
253 self.header.metadata.as_bytes()
254 }
255
245 256 pub fn data_size(&self) -> usize {
246 257 // This `unwrap` could only panic on a 16-bit CPU
247 258 self.header.data_size.get().try_into().unwrap()
@@ -265,40 +276,25 b' pub fn read_docket('
265 276 }
266 277 }
267 278
268 fn read_root<'on_disk>(
269 on_disk: &'on_disk [u8],
270 ) -> Result<&'on_disk Root, DirstateV2ParseError> {
271 // Find the `Root` at the end of the given slice
272 let root_offset = on_disk
273 .len()
274 .checked_sub(std::mem::size_of::<Root>())
275 // A non-empty slice too short is an error
276 .ok_or(DirstateV2ParseError)?;
277 let (root, _) = Root::from_bytes(&on_disk[root_offset..])
278 .map_err(|_| DirstateV2ParseError)?;
279 Ok(root)
280 }
281
282 279 pub(super) fn read<'on_disk>(
283 280 on_disk: &'on_disk [u8],
281 metadata: &[u8],
284 282 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
285 283 if on_disk.is_empty() {
286 284 return Ok(DirstateMap::empty(on_disk));
287 285 }
288 let root = read_root(on_disk)?;
289 let mut unreachable_bytes = root.unreachable_bytes.get();
290 // Each append writes a new `Root`, so it’s never reused
291 unreachable_bytes += std::mem::size_of::<Root>() as u32;
286 let (meta, _) = TreeMetadata::from_bytes(metadata)
287 .map_err(|_| DirstateV2ParseError)?;
292 288 let dirstate_map = DirstateMap {
293 289 on_disk,
294 290 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
295 291 on_disk,
296 root.root_nodes,
292 meta.root_nodes,
297 293 )?),
298 nodes_with_entry_count: root.nodes_with_entry_count.get(),
299 nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(),
300 ignore_patterns_hash: root.ignore_patterns_hash,
301 unreachable_bytes,
294 nodes_with_entry_count: meta.nodes_with_entry_count.get(),
295 nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
296 ignore_patterns_hash: meta.ignore_patterns_hash,
297 unreachable_bytes: meta.unreachable_bytes.get(),
302 298 };
303 299 Ok(dirstate_map)
304 300 }
@@ -530,9 +526,11 b' where'
530 526
531 527 pub(crate) fn for_each_tracked_path<'on_disk>(
532 528 on_disk: &'on_disk [u8],
529 metadata: &[u8],
533 530 mut f: impl FnMut(&'on_disk HgPath),
534 531 ) -> Result<(), DirstateV2ParseError> {
535 let root = read_root(on_disk)?;
532 let (meta, _) = TreeMetadata::from_bytes(metadata)
533 .map_err(|_| DirstateV2ParseError)?;
536 534 fn recur<'on_disk>(
537 535 on_disk: &'on_disk [u8],
538 536 nodes: ChildNodes,
@@ -548,23 +546,23 b" pub(crate) fn for_each_tracked_path<'on_"
548 546 }
549 547 Ok(())
550 548 }
551 recur(on_disk, root.root_nodes, &mut f)
549 recur(on_disk, meta.root_nodes, &mut f)
552 550 }
553 551
554 /// Returns new data together with whether that data should be appended to the
555 /// existing data file whose content is at `dirstate_map.on_disk` (true),
556 /// instead of written to a new data file (false).
552 /// Returns new data and metadata, together with whether that data should be
553 /// appended to the existing data file whose content is at
554 /// `dirstate_map.on_disk` (true), instead of written to a new data file
555 /// (false).
557 556 pub(super) fn write(
558 557 dirstate_map: &mut DirstateMap,
559 558 can_append: bool,
560 ) -> Result<(Vec<u8>, bool), DirstateError> {
559 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
561 560 let append = can_append && dirstate_map.write_should_append();
562 561
563 562 // This ignores the space for paths, and for nodes without an entry.
564 563 // TODO: better estimate? Skip the `Vec` and write to a file directly?
565 let size_guess = std::mem::size_of::<Root>()
566 + std::mem::size_of::<Node>()
567 * dirstate_map.nodes_with_entry_count as usize;
564 let size_guess = std::mem::size_of::<Node>()
565 * dirstate_map.nodes_with_entry_count as usize;
568 566
569 567 let mut writer = Writer {
570 568 dirstate_map,
@@ -574,7 +572,7 b' pub(super) fn write('
574 572
575 573 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
576 574
577 let root = Root {
575 let meta = TreeMetadata {
578 576 root_nodes,
579 577 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
580 578 nodes_with_copy_source_count: dirstate_map
@@ -583,8 +581,7 b' pub(super) fn write('
583 581 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
584 582 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
585 583 };
586 writer.out.extend(root.as_bytes());
587 Ok((writer.out, append))
584 Ok((writer.out, meta.as_bytes().to_vec(), append))
588 585 }
589 586
590 587 struct Writer<'dmap, 'on_disk> {
@@ -22,27 +22,33 b' use rayon::prelude::*;'
22 22 pub struct Dirstate {
23 23 /// The `dirstate` content.
24 24 content: Vec<u8>,
25 dirstate_v2: bool,
25 v2_metadata: Option<Vec<u8>>,
26 26 }
27 27
28 28 impl Dirstate {
29 29 pub fn new(repo: &Repo) -> Result<Self, HgError> {
30 30 let mut content = repo.hg_vfs().read("dirstate")?;
31 if repo.has_dirstate_v2() {
31 let v2_metadata = if repo.has_dirstate_v2() {
32 32 let docket = read_docket(&content)?;
33 let meta = docket.tree_metadata().to_vec();
33 34 content = repo.hg_vfs().read(docket.data_filename())?;
34 }
35 Some(meta)
36 } else {
37 None
38 };
35 39 Ok(Self {
36 40 content,
37 dirstate_v2: repo.has_dirstate_v2(),
41 v2_metadata,
38 42 })
39 43 }
40 44
41 45 pub fn tracked_files(&self) -> Result<Vec<&HgPath>, DirstateError> {
42 46 let mut files = Vec::new();
43 47 if !self.content.is_empty() {
44 if self.dirstate_v2 {
45 for_each_tracked_path(&self.content, |path| files.push(path))?
48 if let Some(meta) = &self.v2_metadata {
49 for_each_tracked_path(&self.content, meta, |path| {
50 files.push(path)
51 })?
46 52 } else {
47 53 let _parents = parse_dirstate_entries(
48 54 &self.content,
@@ -84,12 +84,14 b' py_class!(pub class DirstateMap |py| {'
84 84 def new_v2(
85 85 on_disk: PyBytes,
86 86 data_size: usize,
87 tree_metadata: PyBytes,
87 88 ) -> PyResult<PyObject> {
88 89 let dirstate_error = |e: DirstateError| {
89 90 PyErr::new::<exc::OSError, _>(py, format!("Dirstate error: {:?}", e))
90 91 };
91 let inner = OwningDirstateMap::new_v2(py, on_disk, data_size)
92 .map_err(dirstate_error)?;
92 let inner = OwningDirstateMap::new_v2(
93 py, on_disk, data_size, tree_metadata,
94 ).map_err(dirstate_error)?;
93 95 let map = Self::create_instance(py, Box::new(inner))?;
94 96 Ok(map.into_object())
95 97 }
@@ -353,9 +355,11 b' py_class!(pub class DirstateMap |py| {'
353 355 let mut inner = self.inner(py).borrow_mut();
354 356 let result = inner.pack_v2(now, can_append);
355 357 match result {
356 Ok((packed, append)) => {
358 Ok((packed, tree_metadata, append)) => {
357 359 let packed = PyBytes::new(py, &packed);
358 Ok((packed, append).to_py_object(py).into_object())
360 let tree_metadata = PyBytes::new(py, &tree_metadata);
361 let tuple = (packed, tree_metadata, append);
362 Ok(tuple.to_py_object(py).into_object())
359 363 },
360 364 Err(_) => Err(PyErr::new::<exc::OSError, _>(
361 365 py,
@@ -128,7 +128,7 b' impl DirstateMapMethods for OwningDirsta'
128 128 &mut self,
129 129 now: Timestamp,
130 130 can_append: bool,
131 ) -> Result<(Vec<u8>, bool), DirstateError> {
131 ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
132 132 self.get_mut().pack_v2(now, can_append)
133 133 }
134 134
@@ -49,9 +49,11 b' impl OwningDirstateMap {'
49 49 py: Python,
50 50 on_disk: PyBytes,
51 51 data_size: usize,
52 tree_metadata: PyBytes,
52 53 ) -> Result<Self, DirstateError> {
53 54 let bytes: &'_ [u8] = on_disk.data(py);
54 let map = DirstateMap::new_v2(bytes, data_size)?;
55 let map =
56 DirstateMap::new_v2(bytes, data_size, tree_metadata.data(py))?;
55 57
56 58 // Like in `bytes` above, this `'_` lifetime parameter borrows from
57 59 // the bytes buffer owned by `on_disk`.
@@ -168,13 +168,16 b' pub fn run(invocation: &crate::CliInvoca'
168 168 let repo = invocation.repo?;
169 169 let dirstate_data_mmap;
170 170 let (mut dmap, parents) = if repo.has_dirstate_v2() {
171 let docket_data =
172 repo.hg_vfs().read("dirstate").io_not_found_as_none()?;
171 173 let parents;
172 174 let dirstate_data;
173 175 let data_size;
174 if let Some(docket_data) =
175 repo.hg_vfs().read("dirstate").io_not_found_as_none()?
176 {
177 let docket = on_disk::read_docket(&docket_data)?;
176 let docket;
177 let tree_metadata;
178 if let Some(docket_data) = &docket_data {
179 docket = on_disk::read_docket(docket_data)?;
180 tree_metadata = docket.tree_metadata();
178 181 parents = Some(docket.parents());
179 182 data_size = docket.data_size();
180 183 dirstate_data_mmap = repo
@@ -184,10 +187,12 b' pub fn run(invocation: &crate::CliInvoca'
184 187 dirstate_data = dirstate_data_mmap.as_deref().unwrap_or(b"");
185 188 } else {
186 189 parents = None;
190 tree_metadata = b"";
187 191 data_size = 0;
188 192 dirstate_data = b"";
189 193 }
190 let dmap = DirstateMap::new_v2(dirstate_data, data_size)?;
194 let dmap =
195 DirstateMap::new_v2(dirstate_data, data_size, tree_metadata)?;
191 196 (dmap, parents)
192 197 } else {
193 198 dirstate_data_mmap =
General Comments 0
You need to be logged in to leave comments. Login now