##// END OF EJS Templates
dirstate-v2: Add heuristic for when to create a new data file...
Simon Sapin -
r48481:d9411836 default
parent child Browse files
Show More
@@ -1,1232 +1,1270 b''
1 1 use bytes_cast::BytesCast;
2 2 use micro_timer::timed;
3 3 use std::borrow::Cow;
4 4 use std::convert::TryInto;
5 5 use std::path::PathBuf;
6 6
7 7 use super::on_disk;
8 8 use super::on_disk::DirstateV2ParseError;
9 9 use super::path_with_basename::WithBasename;
10 10 use crate::dirstate::parsers::pack_entry;
11 11 use crate::dirstate::parsers::packed_entry_size;
12 12 use crate::dirstate::parsers::parse_dirstate_entries;
13 13 use crate::dirstate::parsers::Timestamp;
14 14 use crate::dirstate::MTIME_UNSET;
15 15 use crate::dirstate::SIZE_FROM_OTHER_PARENT;
16 16 use crate::dirstate::SIZE_NON_NORMAL;
17 17 use crate::dirstate::V1_RANGEMASK;
18 18 use crate::matchers::Matcher;
19 19 use crate::utils::hg_path::{HgPath, HgPathBuf};
20 20 use crate::CopyMapIter;
21 21 use crate::DirstateEntry;
22 22 use crate::DirstateError;
23 23 use crate::DirstateParents;
24 24 use crate::DirstateStatus;
25 25 use crate::EntryState;
26 26 use crate::FastHashMap;
27 27 use crate::PatternFileWarning;
28 28 use crate::StateMapIter;
29 29 use crate::StatusError;
30 30 use crate::StatusOptions;
31 31
32 /// Append to an existing data file if the amount of unreachable data (not used
33 /// anymore) is less than this fraction of the total amount of existing data.
34 const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5;
35
32 36 pub struct DirstateMap<'on_disk> {
33 37 /// Contents of the `.hg/dirstate` file
34 38 pub(super) on_disk: &'on_disk [u8],
35 39
36 40 pub(super) root: ChildNodes<'on_disk>,
37 41
38 42 /// Number of nodes anywhere in the tree that have `.entry.is_some()`.
39 43 pub(super) nodes_with_entry_count: u32,
40 44
41 45 /// Number of nodes anywhere in the tree that have
42 46 /// `.copy_source.is_some()`.
43 47 pub(super) nodes_with_copy_source_count: u32,
44 48
45 49 /// See on_disk::Header
46 50 pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash,
51
52 /// How many bytes of `on_disk` are not used anymore
53 pub(super) unreachable_bytes: u32,
47 54 }
48 55
49 56 /// Using a plain `HgPathBuf` of the full path from the repository root as a
50 57 /// map key would also work: all paths in a given map have the same parent
51 58 /// path, so comparing full paths gives the same result as comparing base
52 59 /// names. However `HashMap` would waste time always re-hashing the same
53 60 /// string prefix.
54 61 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
55 62
56 63 /// Similar to `&'tree Cow<'on_disk, HgPath>`, but can also be returned
57 64 /// for on-disk nodes that don’t actually have a `Cow` to borrow.
58 65 pub(super) enum BorrowedPath<'tree, 'on_disk> {
59 66 InMemory(&'tree HgPathBuf),
60 67 OnDisk(&'on_disk HgPath),
61 68 }
62 69
63 70 pub(super) enum ChildNodes<'on_disk> {
64 71 InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
65 72 OnDisk(&'on_disk [on_disk::Node]),
66 73 }
67 74
68 75 pub(super) enum ChildNodesRef<'tree, 'on_disk> {
69 76 InMemory(&'tree FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
70 77 OnDisk(&'on_disk [on_disk::Node]),
71 78 }
72 79
73 80 pub(super) enum NodeRef<'tree, 'on_disk> {
74 81 InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>),
75 82 OnDisk(&'on_disk on_disk::Node),
76 83 }
77 84
78 85 impl<'tree, 'on_disk> BorrowedPath<'tree, 'on_disk> {
79 86 pub fn detach_from_tree(&self) -> Cow<'on_disk, HgPath> {
80 87 match *self {
81 88 BorrowedPath::InMemory(in_memory) => Cow::Owned(in_memory.clone()),
82 89 BorrowedPath::OnDisk(on_disk) => Cow::Borrowed(on_disk),
83 90 }
84 91 }
85 92 }
86 93
87 94 impl<'tree, 'on_disk> std::ops::Deref for BorrowedPath<'tree, 'on_disk> {
88 95 type Target = HgPath;
89 96
90 97 fn deref(&self) -> &HgPath {
91 98 match *self {
92 99 BorrowedPath::InMemory(in_memory) => in_memory,
93 100 BorrowedPath::OnDisk(on_disk) => on_disk,
94 101 }
95 102 }
96 103 }
97 104
98 105 impl Default for ChildNodes<'_> {
99 106 fn default() -> Self {
100 107 ChildNodes::InMemory(Default::default())
101 108 }
102 109 }
103 110
104 111 impl<'on_disk> ChildNodes<'on_disk> {
105 112 pub(super) fn as_ref<'tree>(
106 113 &'tree self,
107 114 ) -> ChildNodesRef<'tree, 'on_disk> {
108 115 match self {
109 116 ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes),
110 117 ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes),
111 118 }
112 119 }
113 120
114 121 pub(super) fn is_empty(&self) -> bool {
115 122 match self {
116 123 ChildNodes::InMemory(nodes) => nodes.is_empty(),
117 124 ChildNodes::OnDisk(nodes) => nodes.is_empty(),
118 125 }
119 126 }
120 127
121 pub(super) fn make_mut(
128 fn make_mut(
122 129 &mut self,
123 130 on_disk: &'on_disk [u8],
131 unreachable_bytes: &mut u32,
124 132 ) -> Result<
125 133 &mut FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>,
126 134 DirstateV2ParseError,
127 135 > {
128 136 match self {
129 137 ChildNodes::InMemory(nodes) => Ok(nodes),
130 138 ChildNodes::OnDisk(nodes) => {
139 *unreachable_bytes +=
140 std::mem::size_of_val::<[on_disk::Node]>(nodes) as u32;
131 141 let nodes = nodes
132 142 .iter()
133 143 .map(|node| {
134 144 Ok((
135 145 node.path(on_disk)?,
136 146 node.to_in_memory_node(on_disk)?,
137 147 ))
138 148 })
139 149 .collect::<Result<_, _>>()?;
140 150 *self = ChildNodes::InMemory(nodes);
141 151 match self {
142 152 ChildNodes::InMemory(nodes) => Ok(nodes),
143 153 ChildNodes::OnDisk(_) => unreachable!(),
144 154 }
145 155 }
146 156 }
147 157 }
148 158 }
149 159
150 160 impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> {
151 161 pub(super) fn get(
152 162 &self,
153 163 base_name: &HgPath,
154 164 on_disk: &'on_disk [u8],
155 165 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
156 166 match self {
157 167 ChildNodesRef::InMemory(nodes) => Ok(nodes
158 168 .get_key_value(base_name)
159 169 .map(|(k, v)| NodeRef::InMemory(k, v))),
160 170 ChildNodesRef::OnDisk(nodes) => {
161 171 let mut parse_result = Ok(());
162 172 let search_result = nodes.binary_search_by(|node| {
163 173 match node.base_name(on_disk) {
164 174 Ok(node_base_name) => node_base_name.cmp(base_name),
165 175 Err(e) => {
166 176 parse_result = Err(e);
167 177 // Dummy comparison result, `search_result` won’t
168 178 // be used since `parse_result` is an error
169 179 std::cmp::Ordering::Equal
170 180 }
171 181 }
172 182 });
173 183 parse_result.map(|()| {
174 184 search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i]))
175 185 })
176 186 }
177 187 }
178 188 }
179 189
180 190 /// Iterate in undefined order
181 191 pub(super) fn iter(
182 192 &self,
183 193 ) -> impl Iterator<Item = NodeRef<'tree, 'on_disk>> {
184 194 match self {
185 195 ChildNodesRef::InMemory(nodes) => itertools::Either::Left(
186 196 nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)),
187 197 ),
188 198 ChildNodesRef::OnDisk(nodes) => {
189 199 itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk))
190 200 }
191 201 }
192 202 }
193 203
194 204 /// Iterate in parallel in undefined order
195 205 pub(super) fn par_iter(
196 206 &self,
197 207 ) -> impl rayon::iter::ParallelIterator<Item = NodeRef<'tree, 'on_disk>>
198 208 {
199 209 use rayon::prelude::*;
200 210 match self {
201 211 ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left(
202 212 nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)),
203 213 ),
204 214 ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right(
205 215 nodes.par_iter().map(NodeRef::OnDisk),
206 216 ),
207 217 }
208 218 }
209 219
210 220 pub(super) fn sorted(&self) -> Vec<NodeRef<'tree, 'on_disk>> {
211 221 match self {
212 222 ChildNodesRef::InMemory(nodes) => {
213 223 let mut vec: Vec<_> = nodes
214 224 .iter()
215 225 .map(|(k, v)| NodeRef::InMemory(k, v))
216 226 .collect();
217 227 fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath {
218 228 match node {
219 229 NodeRef::InMemory(path, _node) => path.base_name(),
220 230 NodeRef::OnDisk(_) => unreachable!(),
221 231 }
222 232 }
223 233 // `sort_unstable_by_key` doesn’t allow keys borrowing from the
224 234 // value: https://github.com/rust-lang/rust/issues/34162
225 235 vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b)));
226 236 vec
227 237 }
228 238 ChildNodesRef::OnDisk(nodes) => {
229 239 // Nodes on disk are already sorted
230 240 nodes.iter().map(NodeRef::OnDisk).collect()
231 241 }
232 242 }
233 243 }
234 244 }
235 245
236 246 impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> {
237 247 pub(super) fn full_path(
238 248 &self,
239 249 on_disk: &'on_disk [u8],
240 250 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
241 251 match self {
242 252 NodeRef::InMemory(path, _node) => Ok(path.full_path()),
243 253 NodeRef::OnDisk(node) => node.full_path(on_disk),
244 254 }
245 255 }
246 256
247 257 /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
248 258 /// HgPath>` detached from `'tree`
249 259 pub(super) fn full_path_borrowed(
250 260 &self,
251 261 on_disk: &'on_disk [u8],
252 262 ) -> Result<BorrowedPath<'tree, 'on_disk>, DirstateV2ParseError> {
253 263 match self {
254 264 NodeRef::InMemory(path, _node) => match path.full_path() {
255 265 Cow::Borrowed(on_disk) => Ok(BorrowedPath::OnDisk(on_disk)),
256 266 Cow::Owned(in_memory) => Ok(BorrowedPath::InMemory(in_memory)),
257 267 },
258 268 NodeRef::OnDisk(node) => {
259 269 Ok(BorrowedPath::OnDisk(node.full_path(on_disk)?))
260 270 }
261 271 }
262 272 }
263 273
264 274 pub(super) fn base_name(
265 275 &self,
266 276 on_disk: &'on_disk [u8],
267 277 ) -> Result<&'tree HgPath, DirstateV2ParseError> {
268 278 match self {
269 279 NodeRef::InMemory(path, _node) => Ok(path.base_name()),
270 280 NodeRef::OnDisk(node) => node.base_name(on_disk),
271 281 }
272 282 }
273 283
274 284 pub(super) fn children(
275 285 &self,
276 286 on_disk: &'on_disk [u8],
277 287 ) -> Result<ChildNodesRef<'tree, 'on_disk>, DirstateV2ParseError> {
278 288 match self {
279 289 NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()),
280 290 NodeRef::OnDisk(node) => {
281 291 Ok(ChildNodesRef::OnDisk(node.children(on_disk)?))
282 292 }
283 293 }
284 294 }
285 295
286 296 pub(super) fn has_copy_source(&self) -> bool {
287 297 match self {
288 298 NodeRef::InMemory(_path, node) => node.copy_source.is_some(),
289 299 NodeRef::OnDisk(node) => node.has_copy_source(),
290 300 }
291 301 }
292 302
293 303 pub(super) fn copy_source(
294 304 &self,
295 305 on_disk: &'on_disk [u8],
296 306 ) -> Result<Option<&'tree HgPath>, DirstateV2ParseError> {
297 307 match self {
298 308 NodeRef::InMemory(_path, node) => {
299 309 Ok(node.copy_source.as_ref().map(|s| &**s))
300 310 }
301 311 NodeRef::OnDisk(node) => node.copy_source(on_disk),
302 312 }
303 313 }
304 314
305 315 pub(super) fn entry(
306 316 &self,
307 317 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
308 318 match self {
309 319 NodeRef::InMemory(_path, node) => {
310 320 Ok(node.data.as_entry().copied())
311 321 }
312 322 NodeRef::OnDisk(node) => node.entry(),
313 323 }
314 324 }
315 325
316 326 pub(super) fn state(
317 327 &self,
318 328 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
319 329 match self {
320 330 NodeRef::InMemory(_path, node) => {
321 331 Ok(node.data.as_entry().map(|entry| entry.state))
322 332 }
323 333 NodeRef::OnDisk(node) => node.state(),
324 334 }
325 335 }
326 336
327 337 pub(super) fn cached_directory_mtime(
328 338 &self,
329 339 ) -> Option<&'tree on_disk::Timestamp> {
330 340 match self {
331 341 NodeRef::InMemory(_path, node) => match &node.data {
332 342 NodeData::CachedDirectory { mtime } => Some(mtime),
333 343 _ => None,
334 344 },
335 345 NodeRef::OnDisk(node) => node.cached_directory_mtime(),
336 346 }
337 347 }
338 348
339 349 pub(super) fn descendants_with_entry_count(&self) -> u32 {
340 350 match self {
341 351 NodeRef::InMemory(_path, node) => {
342 352 node.descendants_with_entry_count
343 353 }
344 354 NodeRef::OnDisk(node) => node.descendants_with_entry_count.get(),
345 355 }
346 356 }
347 357
348 358 pub(super) fn tracked_descendants_count(&self) -> u32 {
349 359 match self {
350 360 NodeRef::InMemory(_path, node) => node.tracked_descendants_count,
351 361 NodeRef::OnDisk(node) => node.tracked_descendants_count.get(),
352 362 }
353 363 }
354 364 }
355 365
356 366 /// Represents a file or a directory
357 367 #[derive(Default)]
358 368 pub(super) struct Node<'on_disk> {
359 369 pub(super) data: NodeData,
360 370
361 371 pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
362 372
363 373 pub(super) children: ChildNodes<'on_disk>,
364 374
365 375 /// How many (non-inclusive) descendants of this node have an entry.
366 376 pub(super) descendants_with_entry_count: u32,
367 377
368 378 /// How many (non-inclusive) descendants of this node have an entry whose
369 379 /// state is "tracked".
370 380 pub(super) tracked_descendants_count: u32,
371 381 }
372 382
373 383 pub(super) enum NodeData {
374 384 Entry(DirstateEntry),
375 385 CachedDirectory { mtime: on_disk::Timestamp },
376 386 None,
377 387 }
378 388
379 389 impl Default for NodeData {
380 390 fn default() -> Self {
381 391 NodeData::None
382 392 }
383 393 }
384 394
385 395 impl NodeData {
386 396 fn has_entry(&self) -> bool {
387 397 match self {
388 398 NodeData::Entry(_) => true,
389 399 _ => false,
390 400 }
391 401 }
392 402
393 403 fn as_entry(&self) -> Option<&DirstateEntry> {
394 404 match self {
395 405 NodeData::Entry(entry) => Some(entry),
396 406 _ => None,
397 407 }
398 408 }
399 409 }
400 410
401 411 impl<'on_disk> DirstateMap<'on_disk> {
402 412 pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
403 413 Self {
404 414 on_disk,
405 415 root: ChildNodes::default(),
406 416 nodes_with_entry_count: 0,
407 417 nodes_with_copy_source_count: 0,
408 418 ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
419 unreachable_bytes: 0,
409 420 }
410 421 }
411 422
412 423 #[timed]
413 424 pub fn new_v2(
414 425 on_disk: &'on_disk [u8],
415 426 data_size: usize,
416 427 ) -> Result<Self, DirstateError> {
417 428 if let Some(data) = on_disk.get(..data_size) {
418 429 Ok(on_disk::read(data)?)
419 430 } else {
420 431 Err(DirstateV2ParseError.into())
421 432 }
422 433 }
423 434
424 435 #[timed]
425 436 pub fn new_v1(
426 437 on_disk: &'on_disk [u8],
427 438 ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
428 439 let mut map = Self::empty(on_disk);
429 440 if map.on_disk.is_empty() {
430 441 return Ok((map, None));
431 442 }
432 443
433 444 let parents = parse_dirstate_entries(
434 445 map.on_disk,
435 446 |path, entry, copy_source| {
436 447 let tracked = entry.state.is_tracked();
437 448 let node = Self::get_or_insert_node(
438 449 map.on_disk,
450 &mut map.unreachable_bytes,
439 451 &mut map.root,
440 452 path,
441 453 WithBasename::to_cow_borrowed,
442 454 |ancestor| {
443 455 if tracked {
444 456 ancestor.tracked_descendants_count += 1
445 457 }
446 458 ancestor.descendants_with_entry_count += 1
447 459 },
448 460 )?;
449 461 assert!(
450 462 !node.data.has_entry(),
451 463 "duplicate dirstate entry in read"
452 464 );
453 465 assert!(
454 466 node.copy_source.is_none(),
455 467 "duplicate dirstate entry in read"
456 468 );
457 469 node.data = NodeData::Entry(*entry);
458 470 node.copy_source = copy_source.map(Cow::Borrowed);
459 471 map.nodes_with_entry_count += 1;
460 472 if copy_source.is_some() {
461 473 map.nodes_with_copy_source_count += 1
462 474 }
463 475 Ok(())
464 476 },
465 477 )?;
466 478 let parents = Some(parents.clone());
467 479
468 480 Ok((map, parents))
469 481 }
470 482
471 483 /// Assuming dirstate-v2 format, returns whether the next write should
472 484 /// append to the existing data file that contains `self.on_disk` (true),
473 485 /// or create a new data file from scratch (false).
474 486 pub(super) fn write_should_append(&self) -> bool {
475 // Soon this will be a heuristic based on the amount of unreachable
476 // data. For now it’s pseudo-random in order to make tests exercise
477 // both code paths.
478
479 fn bad_rng() -> u32 {
480 std::time::SystemTime::now()
481 .duration_since(std::time::UNIX_EPOCH)
482 .unwrap()
483 .subsec_millis()
484 }
485
486 bad_rng() % 2 == 0
487 let ratio = self.unreachable_bytes as f32 / self.on_disk.len() as f32;
488 ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
487 489 }
488 490
489 491 fn get_node<'tree>(
490 492 &'tree self,
491 493 path: &HgPath,
492 494 ) -> Result<Option<NodeRef<'tree, 'on_disk>>, DirstateV2ParseError> {
493 495 let mut children = self.root.as_ref();
494 496 let mut components = path.components();
495 497 let mut component =
496 498 components.next().expect("expected at least one components");
497 499 loop {
498 500 if let Some(child) = children.get(component, self.on_disk)? {
499 501 if let Some(next_component) = components.next() {
500 502 component = next_component;
501 503 children = child.children(self.on_disk)?;
502 504 } else {
503 505 return Ok(Some(child));
504 506 }
505 507 } else {
506 508 return Ok(None);
507 509 }
508 510 }
509 511 }
510 512
511 513 /// Returns a mutable reference to the node at `path` if it exists
512 514 ///
513 515 /// This takes `root` instead of `&mut self` so that callers can mutate
514 516 /// other fields while the returned borrow is still valid
515 517 fn get_node_mut<'tree>(
516 518 on_disk: &'on_disk [u8],
519 unreachable_bytes: &mut u32,
517 520 root: &'tree mut ChildNodes<'on_disk>,
518 521 path: &HgPath,
519 522 ) -> Result<Option<&'tree mut Node<'on_disk>>, DirstateV2ParseError> {
520 523 let mut children = root;
521 524 let mut components = path.components();
522 525 let mut component =
523 526 components.next().expect("expected at least one components");
524 527 loop {
525 if let Some(child) = children.make_mut(on_disk)?.get_mut(component)
528 if let Some(child) = children
529 .make_mut(on_disk, unreachable_bytes)?
530 .get_mut(component)
526 531 {
527 532 if let Some(next_component) = components.next() {
528 533 component = next_component;
529 534 children = &mut child.children;
530 535 } else {
531 536 return Ok(Some(child));
532 537 }
533 538 } else {
534 539 return Ok(None);
535 540 }
536 541 }
537 542 }
538 543
539 544 pub(super) fn get_or_insert<'tree, 'path>(
540 545 &'tree mut self,
541 546 path: &HgPath,
542 547 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
543 548 Self::get_or_insert_node(
544 549 self.on_disk,
550 &mut self.unreachable_bytes,
545 551 &mut self.root,
546 552 path,
547 553 WithBasename::to_cow_owned,
548 554 |_| {},
549 555 )
550 556 }
551 557
552 pub(super) fn get_or_insert_node<'tree, 'path>(
558 fn get_or_insert_node<'tree, 'path>(
553 559 on_disk: &'on_disk [u8],
560 unreachable_bytes: &mut u32,
554 561 root: &'tree mut ChildNodes<'on_disk>,
555 562 path: &'path HgPath,
556 563 to_cow: impl Fn(
557 564 WithBasename<&'path HgPath>,
558 565 ) -> WithBasename<Cow<'on_disk, HgPath>>,
559 566 mut each_ancestor: impl FnMut(&mut Node),
560 567 ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
561 568 let mut child_nodes = root;
562 569 let mut inclusive_ancestor_paths =
563 570 WithBasename::inclusive_ancestors_of(path);
564 571 let mut ancestor_path = inclusive_ancestor_paths
565 572 .next()
566 573 .expect("expected at least one inclusive ancestor");
567 574 loop {
568 575 // TODO: can we avoid allocating an owned key in cases where the
569 576 // map already contains that key, without introducing double
570 577 // lookup?
571 578 let child_node = child_nodes
572 .make_mut(on_disk)?
579 .make_mut(on_disk, unreachable_bytes)?
573 580 .entry(to_cow(ancestor_path))
574 581 .or_default();
575 582 if let Some(next) = inclusive_ancestor_paths.next() {
576 583 each_ancestor(child_node);
577 584 ancestor_path = next;
578 585 child_nodes = &mut child_node.children;
579 586 } else {
580 587 return Ok(child_node);
581 588 }
582 589 }
583 590 }
584 591
585 592 fn add_or_remove_file(
586 593 &mut self,
587 594 path: &HgPath,
588 595 old_state: EntryState,
589 596 new_entry: DirstateEntry,
590 597 ) -> Result<(), DirstateV2ParseError> {
591 598 let had_entry = old_state != EntryState::Unknown;
592 599 let tracked_count_increment =
593 600 match (old_state.is_tracked(), new_entry.state.is_tracked()) {
594 601 (false, true) => 1,
595 602 (true, false) => -1,
596 603 _ => 0,
597 604 };
598 605
599 606 let node = Self::get_or_insert_node(
600 607 self.on_disk,
608 &mut self.unreachable_bytes,
601 609 &mut self.root,
602 610 path,
603 611 WithBasename::to_cow_owned,
604 612 |ancestor| {
605 613 if !had_entry {
606 614 ancestor.descendants_with_entry_count += 1;
607 615 }
608 616
609 617 // We can’t use `+= increment` because the counter is unsigned,
610 618 // and we want debug builds to detect accidental underflow
611 619 // through zero
612 620 match tracked_count_increment {
613 621 1 => ancestor.tracked_descendants_count += 1,
614 622 -1 => ancestor.tracked_descendants_count -= 1,
615 623 _ => {}
616 624 }
617 625 },
618 626 )?;
619 627 if !had_entry {
620 628 self.nodes_with_entry_count += 1
621 629 }
622 630 node.data = NodeData::Entry(new_entry);
623 631 Ok(())
624 632 }
625 633
626 634 fn iter_nodes<'tree>(
627 635 &'tree self,
628 636 ) -> impl Iterator<
629 637 Item = Result<NodeRef<'tree, 'on_disk>, DirstateV2ParseError>,
630 638 > + 'tree {
631 639 // Depth first tree traversal.
632 640 //
633 641 // If we could afford internal iteration and recursion,
634 642 // this would look like:
635 643 //
636 644 // ```
637 645 // fn traverse_children(
638 646 // children: &ChildNodes,
639 647 // each: &mut impl FnMut(&Node),
640 648 // ) {
641 649 // for child in children.values() {
642 650 // traverse_children(&child.children, each);
643 651 // each(child);
644 652 // }
645 653 // }
646 654 // ```
647 655 //
648 656 // However we want an external iterator and therefore can’t use the
649 657 // call stack. Use an explicit stack instead:
650 658 let mut stack = Vec::new();
651 659 let mut iter = self.root.as_ref().iter();
652 660 std::iter::from_fn(move || {
653 661 while let Some(child_node) = iter.next() {
654 662 let children = match child_node.children(self.on_disk) {
655 663 Ok(children) => children,
656 664 Err(error) => return Some(Err(error)),
657 665 };
658 666 // Pseudo-recursion
659 667 let new_iter = children.iter();
660 668 let old_iter = std::mem::replace(&mut iter, new_iter);
661 669 stack.push((child_node, old_iter));
662 670 }
663 671 // Found the end of a `children.iter()` iterator.
664 672 if let Some((child_node, next_iter)) = stack.pop() {
665 673 // "Return" from pseudo-recursion by restoring state from the
666 674 // explicit stack
667 675 iter = next_iter;
668 676
669 677 Some(Ok(child_node))
670 678 } else {
671 679 // Reached the bottom of the stack, we’re done
672 680 None
673 681 }
674 682 })
675 683 }
676 684
677 685 fn clear_known_ambiguous_mtimes(
678 686 &mut self,
679 687 paths: &[impl AsRef<HgPath>],
680 688 ) -> Result<(), DirstateV2ParseError> {
681 689 for path in paths {
682 690 if let Some(node) = Self::get_node_mut(
683 691 self.on_disk,
692 &mut self.unreachable_bytes,
684 693 &mut self.root,
685 694 path.as_ref(),
686 695 )? {
687 696 if let NodeData::Entry(entry) = &mut node.data {
688 697 entry.clear_mtime();
689 698 }
690 699 }
691 700 }
692 701 Ok(())
693 702 }
694 703
695 704 /// Return a faillilble iterator of full paths of nodes that have an
696 705 /// `entry` for which the given `predicate` returns true.
697 706 ///
698 707 /// Fallibility means that each iterator item is a `Result`, which may
699 708 /// indicate a parse error of the on-disk dirstate-v2 format. Such errors
700 709 /// should only happen if Mercurial is buggy or a repository is corrupted.
701 710 fn filter_full_paths<'tree>(
702 711 &'tree self,
703 712 predicate: impl Fn(&DirstateEntry) -> bool + 'tree,
704 713 ) -> impl Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + 'tree
705 714 {
706 715 filter_map_results(self.iter_nodes(), move |node| {
707 716 if let Some(entry) = node.entry()? {
708 717 if predicate(&entry) {
709 718 return Ok(Some(node.full_path(self.on_disk)?));
710 719 }
711 720 }
712 721 Ok(None)
713 722 })
714 723 }
724
725 fn count_dropped_path(unreachable_bytes: &mut u32, path: &Cow<HgPath>) {
726 if let Cow::Borrowed(path) = path {
727 *unreachable_bytes += path.len() as u32
728 }
729 }
715 730 }
716 731
717 732 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
718 733 ///
719 734 /// The callback is only called for incoming `Ok` values. Errors are passed
720 735 /// through as-is. In order to let it use the `?` operator the callback is
721 736 /// expected to return a `Result` of `Option`, instead of an `Option` of
722 737 /// `Result`.
723 738 fn filter_map_results<'a, I, F, A, B, E>(
724 739 iter: I,
725 740 f: F,
726 741 ) -> impl Iterator<Item = Result<B, E>> + 'a
727 742 where
728 743 I: Iterator<Item = Result<A, E>> + 'a,
729 744 F: Fn(A) -> Result<Option<B>, E> + 'a,
730 745 {
731 746 iter.filter_map(move |result| match result {
732 747 Ok(node) => f(node).transpose(),
733 748 Err(e) => Some(Err(e)),
734 749 })
735 750 }
736 751
737 752 impl<'on_disk> super::dispatch::DirstateMapMethods for DirstateMap<'on_disk> {
738 753 fn clear(&mut self) {
739 754 self.root = Default::default();
740 755 self.nodes_with_entry_count = 0;
741 756 self.nodes_with_copy_source_count = 0;
742 757 }
743 758
744 759 fn add_file(
745 760 &mut self,
746 761 filename: &HgPath,
747 762 entry: DirstateEntry,
748 763 added: bool,
749 764 merged: bool,
750 765 from_p2: bool,
751 766 possibly_dirty: bool,
752 767 ) -> Result<(), DirstateError> {
753 768 let mut entry = entry;
754 769 if added {
755 770 assert!(!possibly_dirty);
756 771 assert!(!from_p2);
757 772 entry.state = EntryState::Added;
758 773 entry.size = SIZE_NON_NORMAL;
759 774 entry.mtime = MTIME_UNSET;
760 775 } else if merged {
761 776 assert!(!possibly_dirty);
762 777 assert!(!from_p2);
763 778 entry.state = EntryState::Merged;
764 779 entry.size = SIZE_FROM_OTHER_PARENT;
765 780 entry.mtime = MTIME_UNSET;
766 781 } else if from_p2 {
767 782 assert!(!possibly_dirty);
768 783 entry.state = EntryState::Normal;
769 784 entry.size = SIZE_FROM_OTHER_PARENT;
770 785 entry.mtime = MTIME_UNSET;
771 786 } else if possibly_dirty {
772 787 entry.state = EntryState::Normal;
773 788 entry.size = SIZE_NON_NORMAL;
774 789 entry.mtime = MTIME_UNSET;
775 790 } else {
776 791 entry.state = EntryState::Normal;
777 792 entry.size = entry.size & V1_RANGEMASK;
778 793 entry.mtime = entry.mtime & V1_RANGEMASK;
779 794 }
780 795
781 796 let old_state = match self.get(filename)? {
782 797 Some(e) => e.state,
783 798 None => EntryState::Unknown,
784 799 };
785 800
786 801 Ok(self.add_or_remove_file(filename, old_state, entry)?)
787 802 }
788 803
789 804 fn remove_file(
790 805 &mut self,
791 806 filename: &HgPath,
792 807 in_merge: bool,
793 808 ) -> Result<(), DirstateError> {
794 809 let old_entry_opt = self.get(filename)?;
795 810 let old_state = match old_entry_opt {
796 811 Some(e) => e.state,
797 812 None => EntryState::Unknown,
798 813 };
799 814 let mut size = 0;
800 815 if in_merge {
801 816 // XXX we should not be able to have 'm' state and 'FROM_P2' if not
802 817 // during a merge. So I (marmoute) am not sure we need the
803 818 // conditionnal at all. Adding double checking this with assert
804 819 // would be nice.
805 820 if let Some(old_entry) = old_entry_opt {
806 821 // backup the previous state
807 822 if old_entry.state == EntryState::Merged {
808 823 size = SIZE_NON_NORMAL;
809 824 } else if old_entry.state == EntryState::Normal
810 825 && old_entry.size == SIZE_FROM_OTHER_PARENT
811 826 {
812 827 // other parent
813 828 size = SIZE_FROM_OTHER_PARENT;
814 829 }
815 830 }
816 831 }
817 832 if size == 0 {
818 833 self.copy_map_remove(filename)?;
819 834 }
820 835 let entry = DirstateEntry {
821 836 state: EntryState::Removed,
822 837 mode: 0,
823 838 size,
824 839 mtime: 0,
825 840 };
826 841 Ok(self.add_or_remove_file(filename, old_state, entry)?)
827 842 }
828 843
829 844 fn drop_file(&mut self, filename: &HgPath) -> Result<bool, DirstateError> {
830 845 let old_state = match self.get(filename)? {
831 846 Some(e) => e.state,
832 847 None => EntryState::Unknown,
833 848 };
834 849 struct Dropped {
835 850 was_tracked: bool,
836 851 had_entry: bool,
837 852 had_copy_source: bool,
838 853 }
839 854
840 855 /// If this returns `Ok(Some((dropped, removed)))`, then
841 856 ///
842 857 /// * `dropped` is about the leaf node that was at `filename`
843 858 /// * `removed` is whether this particular level of recursion just
844 859 /// removed a node in `nodes`.
845 860 fn recur<'on_disk>(
846 861 on_disk: &'on_disk [u8],
862 unreachable_bytes: &mut u32,
847 863 nodes: &mut ChildNodes<'on_disk>,
848 864 path: &HgPath,
849 865 ) -> Result<Option<(Dropped, bool)>, DirstateV2ParseError> {
850 866 let (first_path_component, rest_of_path) =
851 867 path.split_first_component();
852 let node = if let Some(node) =
853 nodes.make_mut(on_disk)?.get_mut(first_path_component)
868 let nodes = nodes.make_mut(on_disk, unreachable_bytes)?;
869 let node = if let Some(node) = nodes.get_mut(first_path_component)
854 870 {
855 871 node
856 872 } else {
857 873 return Ok(None);
858 874 };
859 875 let dropped;
860 876 if let Some(rest) = rest_of_path {
861 if let Some((d, removed)) =
862 recur(on_disk, &mut node.children, rest)?
863 {
877 if let Some((d, removed)) = recur(
878 on_disk,
879 unreachable_bytes,
880 &mut node.children,
881 rest,
882 )? {
864 883 dropped = d;
865 884 if dropped.had_entry {
866 885 node.descendants_with_entry_count -= 1;
867 886 }
868 887 if dropped.was_tracked {
869 888 node.tracked_descendants_count -= 1;
870 889 }
871 890
872 891 // Directory caches must be invalidated when removing a
873 892 // child node
874 893 if removed {
875 894 if let NodeData::CachedDirectory { .. } = &node.data {
876 895 node.data = NodeData::None
877 896 }
878 897 }
879 898 } else {
880 899 return Ok(None);
881 900 }
882 901 } else {
883 902 let had_entry = node.data.has_entry();
884 903 if had_entry {
885 904 node.data = NodeData::None
886 905 }
906 if let Some(source) = &node.copy_source {
907 DirstateMap::count_dropped_path(unreachable_bytes, source)
908 }
887 909 dropped = Dropped {
888 910 was_tracked: node
889 911 .data
890 912 .as_entry()
891 913 .map_or(false, |entry| entry.state.is_tracked()),
892 914 had_entry,
893 915 had_copy_source: node.copy_source.take().is_some(),
894 916 };
895 917 }
896 918 // After recursion, for both leaf (rest_of_path is None) nodes and
897 919 // parent nodes, remove a node if it just became empty.
898 920 let remove = !node.data.has_entry()
899 921 && node.copy_source.is_none()
900 922 && node.children.is_empty();
901 923 if remove {
902 nodes.make_mut(on_disk)?.remove(first_path_component);
924 let (key, _) =
925 nodes.remove_entry(first_path_component).unwrap();
926 DirstateMap::count_dropped_path(
927 unreachable_bytes,
928 key.full_path(),
929 )
903 930 }
904 931 Ok(Some((dropped, remove)))
905 932 }
906 933
907 if let Some((dropped, _removed)) =
908 recur(self.on_disk, &mut self.root, filename)?
909 {
934 if let Some((dropped, _removed)) = recur(
935 self.on_disk,
936 &mut self.unreachable_bytes,
937 &mut self.root,
938 filename,
939 )? {
910 940 if dropped.had_entry {
911 941 self.nodes_with_entry_count -= 1
912 942 }
913 943 if dropped.had_copy_source {
914 944 self.nodes_with_copy_source_count -= 1
915 945 }
916 946 Ok(dropped.had_entry)
917 947 } else {
918 948 debug_assert!(!old_state.is_tracked());
919 949 Ok(false)
920 950 }
921 951 }
922 952
923 953 fn clear_ambiguous_times(
924 954 &mut self,
925 955 filenames: Vec<HgPathBuf>,
926 956 now: i32,
927 957 ) -> Result<(), DirstateV2ParseError> {
928 958 for filename in filenames {
929 if let Some(node) =
930 Self::get_node_mut(self.on_disk, &mut self.root, &filename)?
931 {
959 if let Some(node) = Self::get_node_mut(
960 self.on_disk,
961 &mut self.unreachable_bytes,
962 &mut self.root,
963 &filename,
964 )? {
932 965 if let NodeData::Entry(entry) = &mut node.data {
933 966 entry.clear_ambiguous_mtime(now);
934 967 }
935 968 }
936 969 }
937 970 Ok(())
938 971 }
939 972
940 973 fn non_normal_entries_contains(
941 974 &mut self,
942 975 key: &HgPath,
943 976 ) -> Result<bool, DirstateV2ParseError> {
944 977 Ok(if let Some(node) = self.get_node(key)? {
945 978 node.entry()?.map_or(false, |entry| entry.is_non_normal())
946 979 } else {
947 980 false
948 981 })
949 982 }
950 983
951 984 fn non_normal_entries_remove(&mut self, _key: &HgPath) {
952 985 // Do nothing, this `DirstateMap` does not have a separate "non normal
953 986 // entries" set that need to be kept up to date
954 987 }
955 988
956 989 fn non_normal_or_other_parent_paths(
957 990 &mut self,
958 991 ) -> Box<dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + '_>
959 992 {
960 993 Box::new(self.filter_full_paths(|entry| {
961 994 entry.is_non_normal() || entry.is_from_other_parent()
962 995 }))
963 996 }
964 997
965 998 fn set_non_normal_other_parent_entries(&mut self, _force: bool) {
966 999 // Do nothing, this `DirstateMap` does not have a separate "non normal
967 1000 // entries" and "from other parent" sets that need to be recomputed
968 1001 }
969 1002
970 1003 fn iter_non_normal_paths(
971 1004 &mut self,
972 1005 ) -> Box<
973 1006 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_,
974 1007 > {
975 1008 self.iter_non_normal_paths_panic()
976 1009 }
977 1010
978 1011 fn iter_non_normal_paths_panic(
979 1012 &self,
980 1013 ) -> Box<
981 1014 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_,
982 1015 > {
983 1016 Box::new(self.filter_full_paths(|entry| entry.is_non_normal()))
984 1017 }
985 1018
986 1019 fn iter_other_parent_paths(
987 1020 &mut self,
988 1021 ) -> Box<
989 1022 dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + Send + '_,
990 1023 > {
991 1024 Box::new(self.filter_full_paths(|entry| entry.is_from_other_parent()))
992 1025 }
993 1026
994 1027 fn has_tracked_dir(
995 1028 &mut self,
996 1029 directory: &HgPath,
997 1030 ) -> Result<bool, DirstateError> {
998 1031 if let Some(node) = self.get_node(directory)? {
999 1032 // A node without a `DirstateEntry` was created to hold child
1000 1033 // nodes, and is therefore a directory.
1001 1034 let state = node.state()?;
1002 1035 Ok(state.is_none() && node.tracked_descendants_count() > 0)
1003 1036 } else {
1004 1037 Ok(false)
1005 1038 }
1006 1039 }
1007 1040
1008 1041 fn has_dir(&mut self, directory: &HgPath) -> Result<bool, DirstateError> {
1009 1042 if let Some(node) = self.get_node(directory)? {
1010 1043 // A node without a `DirstateEntry` was created to hold child
1011 1044 // nodes, and is therefore a directory.
1012 1045 let state = node.state()?;
1013 1046 Ok(state.is_none() && node.descendants_with_entry_count() > 0)
1014 1047 } else {
1015 1048 Ok(false)
1016 1049 }
1017 1050 }
1018 1051
1019 1052 #[timed]
1020 1053 fn pack_v1(
1021 1054 &mut self,
1022 1055 parents: DirstateParents,
1023 1056 now: Timestamp,
1024 1057 ) -> Result<Vec<u8>, DirstateError> {
1025 1058 let now: i32 = now.0.try_into().expect("time overflow");
1026 1059 let mut ambiguous_mtimes = Vec::new();
1027 1060 // Optizimation (to be measured?): pre-compute size to avoid `Vec`
1028 1061 // reallocations
1029 1062 let mut size = parents.as_bytes().len();
1030 1063 for node in self.iter_nodes() {
1031 1064 let node = node?;
1032 1065 if let Some(entry) = node.entry()? {
1033 1066 size += packed_entry_size(
1034 1067 node.full_path(self.on_disk)?,
1035 1068 node.copy_source(self.on_disk)?,
1036 1069 );
1037 1070 if entry.mtime_is_ambiguous(now) {
1038 1071 ambiguous_mtimes.push(
1039 1072 node.full_path_borrowed(self.on_disk)?
1040 1073 .detach_from_tree(),
1041 1074 )
1042 1075 }
1043 1076 }
1044 1077 }
1045 1078 self.clear_known_ambiguous_mtimes(&ambiguous_mtimes)?;
1046 1079
1047 1080 let mut packed = Vec::with_capacity(size);
1048 1081 packed.extend(parents.as_bytes());
1049 1082
1050 1083 for node in self.iter_nodes() {
1051 1084 let node = node?;
1052 1085 if let Some(entry) = node.entry()? {
1053 1086 pack_entry(
1054 1087 node.full_path(self.on_disk)?,
1055 1088 &entry,
1056 1089 node.copy_source(self.on_disk)?,
1057 1090 &mut packed,
1058 1091 );
1059 1092 }
1060 1093 }
1061 1094 Ok(packed)
1062 1095 }
1063 1096
1064 1097 /// Returns new data together with whether that data should be appended to
1065 1098 /// the existing data file whose content is at `self.on_disk` (true),
1066 1099 /// instead of written to a new data file (false).
1067 1100 #[timed]
1068 1101 fn pack_v2(
1069 1102 &mut self,
1070 1103 now: Timestamp,
1071 1104 can_append: bool,
1072 1105 ) -> Result<(Vec<u8>, bool), DirstateError> {
1073 1106 // TODO: how do we want to handle this in 2038?
1074 1107 let now: i32 = now.0.try_into().expect("time overflow");
1075 1108 let mut paths = Vec::new();
1076 1109 for node in self.iter_nodes() {
1077 1110 let node = node?;
1078 1111 if let Some(entry) = node.entry()? {
1079 1112 if entry.mtime_is_ambiguous(now) {
1080 1113 paths.push(
1081 1114 node.full_path_borrowed(self.on_disk)?
1082 1115 .detach_from_tree(),
1083 1116 )
1084 1117 }
1085 1118 }
1086 1119 }
1087 1120 // Borrow of `self` ends here since we collect cloned paths
1088 1121
1089 1122 self.clear_known_ambiguous_mtimes(&paths)?;
1090 1123
1091 1124 on_disk::write(self, can_append)
1092 1125 }
1093 1126
1094 1127 fn status<'a>(
1095 1128 &'a mut self,
1096 1129 matcher: &'a (dyn Matcher + Sync),
1097 1130 root_dir: PathBuf,
1098 1131 ignore_files: Vec<PathBuf>,
1099 1132 options: StatusOptions,
1100 1133 ) -> Result<(DirstateStatus<'a>, Vec<PatternFileWarning>), StatusError>
1101 1134 {
1102 1135 super::status::status(self, matcher, root_dir, ignore_files, options)
1103 1136 }
1104 1137
1105 1138 fn copy_map_len(&self) -> usize {
1106 1139 self.nodes_with_copy_source_count as usize
1107 1140 }
1108 1141
1109 1142 fn copy_map_iter(&self) -> CopyMapIter<'_> {
1110 1143 Box::new(filter_map_results(self.iter_nodes(), move |node| {
1111 1144 Ok(if let Some(source) = node.copy_source(self.on_disk)? {
1112 1145 Some((node.full_path(self.on_disk)?, source))
1113 1146 } else {
1114 1147 None
1115 1148 })
1116 1149 }))
1117 1150 }
1118 1151
1119 1152 fn copy_map_contains_key(
1120 1153 &self,
1121 1154 key: &HgPath,
1122 1155 ) -> Result<bool, DirstateV2ParseError> {
1123 1156 Ok(if let Some(node) = self.get_node(key)? {
1124 1157 node.has_copy_source()
1125 1158 } else {
1126 1159 false
1127 1160 })
1128 1161 }
1129 1162
1130 1163 fn copy_map_get(
1131 1164 &self,
1132 1165 key: &HgPath,
1133 1166 ) -> Result<Option<&HgPath>, DirstateV2ParseError> {
1134 1167 if let Some(node) = self.get_node(key)? {
1135 1168 if let Some(source) = node.copy_source(self.on_disk)? {
1136 1169 return Ok(Some(source));
1137 1170 }
1138 1171 }
1139 1172 Ok(None)
1140 1173 }
1141 1174
1142 1175 fn copy_map_remove(
1143 1176 &mut self,
1144 1177 key: &HgPath,
1145 1178 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1146 1179 let count = &mut self.nodes_with_copy_source_count;
1147 Ok(
1148 Self::get_node_mut(self.on_disk, &mut self.root, key)?.and_then(
1149 |node| {
1150 if node.copy_source.is_some() {
1151 *count -= 1
1152 }
1153 node.copy_source.take().map(Cow::into_owned)
1154 },
1155 ),
1156 )
1180 let unreachable_bytes = &mut self.unreachable_bytes;
1181 Ok(Self::get_node_mut(
1182 self.on_disk,
1183 unreachable_bytes,
1184 &mut self.root,
1185 key,
1186 )?
1187 .and_then(|node| {
1188 if let Some(source) = &node.copy_source {
1189 *count -= 1;
1190 Self::count_dropped_path(unreachable_bytes, source);
1191 }
1192 node.copy_source.take().map(Cow::into_owned)
1193 }))
1157 1194 }
1158 1195
1159 1196 fn copy_map_insert(
1160 1197 &mut self,
1161 1198 key: HgPathBuf,
1162 1199 value: HgPathBuf,
1163 1200 ) -> Result<Option<HgPathBuf>, DirstateV2ParseError> {
1164 1201 let node = Self::get_or_insert_node(
1165 1202 self.on_disk,
1203 &mut self.unreachable_bytes,
1166 1204 &mut self.root,
1167 1205 &key,
1168 1206 WithBasename::to_cow_owned,
1169 1207 |_ancestor| {},
1170 1208 )?;
1171 1209 if node.copy_source.is_none() {
1172 1210 self.nodes_with_copy_source_count += 1
1173 1211 }
1174 1212 Ok(node.copy_source.replace(value.into()).map(Cow::into_owned))
1175 1213 }
1176 1214
1177 1215 fn len(&self) -> usize {
1178 1216 self.nodes_with_entry_count as usize
1179 1217 }
1180 1218
1181 1219 fn contains_key(
1182 1220 &self,
1183 1221 key: &HgPath,
1184 1222 ) -> Result<bool, DirstateV2ParseError> {
1185 1223 Ok(self.get(key)?.is_some())
1186 1224 }
1187 1225
1188 1226 fn get(
1189 1227 &self,
1190 1228 key: &HgPath,
1191 1229 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
1192 1230 Ok(if let Some(node) = self.get_node(key)? {
1193 1231 node.entry()?
1194 1232 } else {
1195 1233 None
1196 1234 })
1197 1235 }
1198 1236
1199 1237 fn iter(&self) -> StateMapIter<'_> {
1200 1238 Box::new(filter_map_results(self.iter_nodes(), move |node| {
1201 1239 Ok(if let Some(entry) = node.entry()? {
1202 1240 Some((node.full_path(self.on_disk)?, entry))
1203 1241 } else {
1204 1242 None
1205 1243 })
1206 1244 }))
1207 1245 }
1208 1246
1209 1247 fn iter_directories(
1210 1248 &self,
1211 1249 ) -> Box<
1212 1250 dyn Iterator<
1213 1251 Item = Result<
1214 1252 (&HgPath, Option<Timestamp>),
1215 1253 DirstateV2ParseError,
1216 1254 >,
1217 1255 > + Send
1218 1256 + '_,
1219 1257 > {
1220 1258 Box::new(filter_map_results(self.iter_nodes(), move |node| {
1221 1259 Ok(if node.state()?.is_none() {
1222 1260 Some((
1223 1261 node.full_path(self.on_disk)?,
1224 1262 node.cached_directory_mtime()
1225 1263 .map(|mtime| Timestamp(mtime.seconds())),
1226 1264 ))
1227 1265 } else {
1228 1266 None
1229 1267 })
1230 1268 }))
1231 1269 }
1232 1270 }
@@ -1,748 +1,756 b''
1 1 //! The "version 2" disk representation of the dirstate
2 2 //!
3 3 //! # File format
4 4 //!
5 5 //! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts
6 6 //! with a fixed-sized header whose layout is defined by the `DocketHeader`
7 7 //! struct, followed by the data file identifier.
8 8 //!
9 9 //! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That
10 10 //! file may be longer than the size given in the docket, but not shorter. Only
11 11 //! the start of the data file up to the given size is considered. The
12 12 //! fixed-size "root" of the dirstate tree whose layout is defined by the
13 13 //! `Root` struct is found at the end of that slice of data.
14 14 //!
15 15 //! Its `root_nodes` field contains the slice (offset and length) to
16 16 //! the nodes representing the files and directories at the root of the
17 17 //! repository. Each node is also fixed-size, defined by the `Node` struct.
18 18 //! Nodes in turn contain slices to variable-size paths, and to their own child
19 19 //! nodes (if any) for nested files and directories.
20 20
21 21 use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
22 22 use crate::dirstate_tree::path_with_basename::WithBasename;
23 23 use crate::errors::HgError;
24 24 use crate::utils::hg_path::HgPath;
25 25 use crate::DirstateEntry;
26 26 use crate::DirstateError;
27 27 use crate::DirstateParents;
28 28 use crate::EntryState;
29 29 use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be};
30 30 use bytes_cast::BytesCast;
31 31 use format_bytes::format_bytes;
32 32 use std::borrow::Cow;
33 33 use std::convert::{TryFrom, TryInto};
34 34 use std::time::{Duration, SystemTime, UNIX_EPOCH};
35 35
36 36 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
37 37 /// This a redundant sanity check more than an actual "magic number" since
38 38 /// `.hg/requires` already governs which format should be used.
39 39 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
40 40
41 41 /// Keep space for 256-bit hashes
42 42 const STORED_NODE_ID_BYTES: usize = 32;
43 43
44 44 /// … even though only 160 bits are used for now, with SHA-1
45 45 const USED_NODE_ID_BYTES: usize = 20;
46 46
47 47 pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
48 48 pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];
49 49
50 50 // Must match `HEADER` in `mercurial/dirstateutils/docket.py`
51 51 #[derive(BytesCast)]
52 52 #[repr(C)]
53 53 struct DocketHeader {
54 54 marker: [u8; V2_FORMAT_MARKER.len()],
55 55 parent_1: [u8; STORED_NODE_ID_BYTES],
56 56 parent_2: [u8; STORED_NODE_ID_BYTES],
57 57
58 58 /// Counted in bytes
59 59 data_size: Size,
60 60
61 61 uuid_size: u8,
62 62 }
63 63
64 64 pub struct Docket<'on_disk> {
65 65 header: &'on_disk DocketHeader,
66 66 uuid: &'on_disk [u8],
67 67 }
68 68
69 69 #[derive(BytesCast)]
70 70 #[repr(C)]
71 71 struct Root {
72 72 root_nodes: ChildNodes,
73 73 nodes_with_entry_count: Size,
74 74 nodes_with_copy_source_count: Size,
75 75
76 /// How many bytes of this data file are not used anymore
77 unreachable_bytes: Size,
78
76 79 /// If non-zero, a hash of ignore files that were used for some previous
77 80 /// run of the `status` algorithm.
78 81 ///
79 82 /// We define:
80 83 ///
81 84 /// * "Root" ignore files are `.hgignore` at the root of the repository if
82 85 /// it exists, and files from `ui.ignore.*` config. This set of files is
83 86 /// then sorted by the string representation of their path.
84 87 /// * The "expanded contents" of an ignore files is the byte string made
85 88 /// by concatenating its contents with the "expanded contents" of other
86 89 /// files included with `include:` or `subinclude:` files, in inclusion
87 90 /// order. This definition is recursive, as included files can
88 91 /// themselves include more files.
89 92 ///
90 93 /// This hash is defined as the SHA-1 of the concatenation (in sorted
91 94 /// order) of the "expanded contents" of each "root" ignore file.
92 95 /// (Note that computing this does not require actually concatenating byte
93 96 /// strings into contiguous memory, instead SHA-1 hashing can be done
94 97 /// incrementally.)
95 98 ignore_patterns_hash: IgnorePatternsHash,
96 99 }
97 100
98 101 #[derive(BytesCast)]
99 102 #[repr(C)]
100 103 pub(super) struct Node {
101 104 full_path: PathSlice,
102 105
103 106 /// In bytes from `self.full_path.start`
104 107 base_name_start: PathSize,
105 108
106 109 copy_source: OptPathSlice,
107 110 children: ChildNodes,
108 111 pub(super) descendants_with_entry_count: Size,
109 112 pub(super) tracked_descendants_count: Size,
110 113
111 114 /// Depending on the value of `state`:
112 115 ///
113 116 /// * A null byte: `data` is not used.
114 117 ///
115 118 /// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together
116 119 /// represent a dirstate entry like in the v1 format.
117 120 ///
118 121 /// * A `d` ASCII byte: the bytes of `data` should instead be interpreted
119 122 /// as the `Timestamp` for the mtime of a cached directory.
120 123 ///
121 124 /// The presence of this state means that at some point, this path in
122 125 /// the working directory was observed:
123 126 ///
124 127 /// - To be a directory
125 128 /// - With the modification time as given by `Timestamp`
126 129 /// - That timestamp was already strictly in the past when observed,
127 130 /// meaning that later changes cannot happen in the same clock tick
128 131 /// and must cause a different modification time (unless the system
129 132 /// clock jumps back and we get unlucky, which is not impossible but
130 133 /// but deemed unlikely enough).
131 134 /// - All direct children of this directory (as returned by
132 135 /// `std::fs::read_dir`) either have a corresponding dirstate node, or
133 136 /// are ignored by ignore patterns whose hash is in
134 137 /// `Root::ignore_patterns_hash`.
135 138 ///
136 139 /// This means that if `std::fs::symlink_metadata` later reports the
137 140 /// same modification time and ignored patterns haven’t changed, a run
138 141 /// of status that is not listing ignored files can skip calling
139 142 /// `std::fs::read_dir` again for this directory, iterate child
140 143 /// dirstate nodes instead.
141 144 state: u8,
142 145 data: Entry,
143 146 }
144 147
145 148 #[derive(BytesCast, Copy, Clone)]
146 149 #[repr(C)]
147 150 struct Entry {
148 151 mode: I32Be,
149 152 mtime: I32Be,
150 153 size: I32Be,
151 154 }
152 155
153 156 /// Duration since the Unix epoch
154 157 #[derive(BytesCast, Copy, Clone, PartialEq)]
155 158 #[repr(C)]
156 159 pub(super) struct Timestamp {
157 160 seconds: I64Be,
158 161
159 162 /// In `0 .. 1_000_000_000`.
160 163 ///
161 164 /// This timestamp is later or earlier than `(seconds, 0)` by this many
162 165 /// nanoseconds, if `seconds` is non-negative or negative, respectively.
163 166 nanoseconds: U32Be,
164 167 }
165 168
166 169 /// Counted in bytes from the start of the file
167 170 ///
168 171 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
169 172 type Offset = U32Be;
170 173
171 174 /// Counted in number of items
172 175 ///
173 176 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
174 177 type Size = U32Be;
175 178
176 179 /// Counted in bytes
177 180 ///
178 181 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
179 182 type PathSize = U16Be;
180 183
181 184 /// A contiguous sequence of `len` times `Node`, representing the child nodes
182 185 /// of either some other node or of the repository root.
183 186 ///
184 187 /// Always sorted by ascending `full_path`, to allow binary search.
185 188 /// Since nodes with the same parent nodes also have the same parent path,
186 189 /// only the `base_name`s need to be compared during binary search.
187 190 #[derive(BytesCast, Copy, Clone)]
188 191 #[repr(C)]
189 192 struct ChildNodes {
190 193 start: Offset,
191 194 len: Size,
192 195 }
193 196
194 197 /// A `HgPath` of `len` bytes
195 198 #[derive(BytesCast, Copy, Clone)]
196 199 #[repr(C)]
197 200 struct PathSlice {
198 201 start: Offset,
199 202 len: PathSize,
200 203 }
201 204
202 205 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
203 206 type OptPathSlice = PathSlice;
204 207
205 208 /// Make sure that size-affecting changes are made knowingly
206 209 fn _static_assert_size_of() {
207 210 let _ = std::mem::transmute::<DocketHeader, [u8; 81]>;
208 let _ = std::mem::transmute::<Root, [u8; 36]>;
211 let _ = std::mem::transmute::<Root, [u8; 40]>;
209 212 let _ = std::mem::transmute::<Node, [u8; 43]>;
210 213 }
211 214
212 215 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
213 216 ///
214 217 /// This should only happen if Mercurial is buggy or a repository is corrupted.
215 218 #[derive(Debug)]
216 219 pub struct DirstateV2ParseError;
217 220
218 221 impl From<DirstateV2ParseError> for HgError {
219 222 fn from(_: DirstateV2ParseError) -> Self {
220 223 HgError::corrupted("dirstate-v2 parse error")
221 224 }
222 225 }
223 226
224 227 impl From<DirstateV2ParseError> for crate::DirstateError {
225 228 fn from(error: DirstateV2ParseError) -> Self {
226 229 HgError::from(error).into()
227 230 }
228 231 }
229 232
230 233 impl<'on_disk> Docket<'on_disk> {
231 234 pub fn parents(&self) -> DirstateParents {
232 235 use crate::Node;
233 236 let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
234 237 .unwrap()
235 238 .clone();
236 239 let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
237 240 .unwrap()
238 241 .clone();
239 242 DirstateParents { p1, p2 }
240 243 }
241 244
242 245 pub fn data_size(&self) -> usize {
243 246 // This `unwrap` could only panic on a 16-bit CPU
244 247 self.header.data_size.get().try_into().unwrap()
245 248 }
246 249
247 250 pub fn data_filename(&self) -> String {
248 251 String::from_utf8(format_bytes!(b"dirstate.{}.d", self.uuid)).unwrap()
249 252 }
250 253 }
251 254
252 255 pub fn read_docket(
253 256 on_disk: &[u8],
254 257 ) -> Result<Docket<'_>, DirstateV2ParseError> {
255 258 let (header, uuid) =
256 259 DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
257 260 let uuid_size = header.uuid_size as usize;
258 261 if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
259 262 Ok(Docket { header, uuid })
260 263 } else {
261 264 Err(DirstateV2ParseError)
262 265 }
263 266 }
264 267
265 268 fn read_root<'on_disk>(
266 269 on_disk: &'on_disk [u8],
267 270 ) -> Result<&'on_disk Root, DirstateV2ParseError> {
268 271 // Find the `Root` at the end of the given slice
269 272 let root_offset = on_disk
270 273 .len()
271 274 .checked_sub(std::mem::size_of::<Root>())
272 275 // A non-empty slice too short is an error
273 276 .ok_or(DirstateV2ParseError)?;
274 277 let (root, _) = Root::from_bytes(&on_disk[root_offset..])
275 278 .map_err(|_| DirstateV2ParseError)?;
276 279 Ok(root)
277 280 }
278 281
279 282 pub(super) fn read<'on_disk>(
280 283 on_disk: &'on_disk [u8],
281 284 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
282 285 if on_disk.is_empty() {
283 286 return Ok(DirstateMap::empty(on_disk));
284 287 }
285 288 let root = read_root(on_disk)?;
289 let mut unreachable_bytes = root.unreachable_bytes.get();
290 // Each append writes a new `Root`, so it’s never reused
291 unreachable_bytes += std::mem::size_of::<Root>() as u32;
286 292 let dirstate_map = DirstateMap {
287 293 on_disk,
288 294 root: dirstate_map::ChildNodes::OnDisk(read_nodes(
289 295 on_disk,
290 296 root.root_nodes,
291 297 )?),
292 298 nodes_with_entry_count: root.nodes_with_entry_count.get(),
293 299 nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(),
294 300 ignore_patterns_hash: root.ignore_patterns_hash,
301 unreachable_bytes,
295 302 };
296 303 Ok(dirstate_map)
297 304 }
298 305
299 306 impl Node {
300 307 pub(super) fn full_path<'on_disk>(
301 308 &self,
302 309 on_disk: &'on_disk [u8],
303 310 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
304 311 read_hg_path(on_disk, self.full_path)
305 312 }
306 313
307 314 pub(super) fn base_name_start<'on_disk>(
308 315 &self,
309 316 ) -> Result<usize, DirstateV2ParseError> {
310 317 let start = self.base_name_start.get();
311 318 if start < self.full_path.len.get() {
312 319 let start = usize::try_from(start)
313 320 // u32 -> usize, could only panic on a 16-bit CPU
314 321 .expect("dirstate-v2 base_name_start out of bounds");
315 322 Ok(start)
316 323 } else {
317 324 Err(DirstateV2ParseError)
318 325 }
319 326 }
320 327
321 328 pub(super) fn base_name<'on_disk>(
322 329 &self,
323 330 on_disk: &'on_disk [u8],
324 331 ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
325 332 let full_path = self.full_path(on_disk)?;
326 333 let base_name_start = self.base_name_start()?;
327 334 Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
328 335 }
329 336
330 337 pub(super) fn path<'on_disk>(
331 338 &self,
332 339 on_disk: &'on_disk [u8],
333 340 ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
334 341 Ok(WithBasename::from_raw_parts(
335 342 Cow::Borrowed(self.full_path(on_disk)?),
336 343 self.base_name_start()?,
337 344 ))
338 345 }
339 346
340 347 pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
341 348 self.copy_source.start.get() != 0
342 349 }
343 350
344 351 pub(super) fn copy_source<'on_disk>(
345 352 &self,
346 353 on_disk: &'on_disk [u8],
347 354 ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
348 355 Ok(if self.has_copy_source() {
349 356 Some(read_hg_path(on_disk, self.copy_source)?)
350 357 } else {
351 358 None
352 359 })
353 360 }
354 361
355 362 pub(super) fn node_data(
356 363 &self,
357 364 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
358 365 let entry = |state| {
359 366 dirstate_map::NodeData::Entry(self.entry_with_given_state(state))
360 367 };
361 368
362 369 match self.state {
363 370 b'\0' => Ok(dirstate_map::NodeData::None),
364 371 b'd' => Ok(dirstate_map::NodeData::CachedDirectory {
365 372 mtime: *self.data.as_timestamp(),
366 373 }),
367 374 b'n' => Ok(entry(EntryState::Normal)),
368 375 b'a' => Ok(entry(EntryState::Added)),
369 376 b'r' => Ok(entry(EntryState::Removed)),
370 377 b'm' => Ok(entry(EntryState::Merged)),
371 378 _ => Err(DirstateV2ParseError),
372 379 }
373 380 }
374 381
375 382 pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {
376 383 if self.state == b'd' {
377 384 Some(self.data.as_timestamp())
378 385 } else {
379 386 None
380 387 }
381 388 }
382 389
383 390 pub(super) fn state(
384 391 &self,
385 392 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
386 393 match self.state {
387 394 b'\0' | b'd' => Ok(None),
388 395 b'n' => Ok(Some(EntryState::Normal)),
389 396 b'a' => Ok(Some(EntryState::Added)),
390 397 b'r' => Ok(Some(EntryState::Removed)),
391 398 b'm' => Ok(Some(EntryState::Merged)),
392 399 _ => Err(DirstateV2ParseError),
393 400 }
394 401 }
395 402
396 403 fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry {
397 404 DirstateEntry {
398 405 state,
399 406 mode: self.data.mode.get(),
400 407 mtime: self.data.mtime.get(),
401 408 size: self.data.size.get(),
402 409 }
403 410 }
404 411
405 412 pub(super) fn entry(
406 413 &self,
407 414 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
408 415 Ok(self
409 416 .state()?
410 417 .map(|state| self.entry_with_given_state(state)))
411 418 }
412 419
413 420 pub(super) fn children<'on_disk>(
414 421 &self,
415 422 on_disk: &'on_disk [u8],
416 423 ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
417 424 read_nodes(on_disk, self.children)
418 425 }
419 426
420 427 pub(super) fn to_in_memory_node<'on_disk>(
421 428 &self,
422 429 on_disk: &'on_disk [u8],
423 430 ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
424 431 Ok(dirstate_map::Node {
425 432 children: dirstate_map::ChildNodes::OnDisk(
426 433 self.children(on_disk)?,
427 434 ),
428 435 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
429 436 data: self.node_data()?,
430 437 descendants_with_entry_count: self
431 438 .descendants_with_entry_count
432 439 .get(),
433 440 tracked_descendants_count: self.tracked_descendants_count.get(),
434 441 })
435 442 }
436 443 }
437 444
438 445 impl Entry {
439 446 fn from_timestamp(timestamp: Timestamp) -> Self {
440 447 // Safety: both types implement the `ByteCast` trait, so we could
441 448 // safely use `as_bytes` and `from_bytes` to do this conversion. Using
442 449 // `transmute` instead makes the compiler check that the two types
443 450 // have the same size, which eliminates the error case of
444 451 // `from_bytes`.
445 452 unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }
446 453 }
447 454
448 455 fn as_timestamp(&self) -> &Timestamp {
449 456 // Safety: same as above in `from_timestamp`
450 457 unsafe { &*(self as *const Entry as *const Timestamp) }
451 458 }
452 459 }
453 460
454 461 impl Timestamp {
455 462 pub fn seconds(&self) -> i64 {
456 463 self.seconds.get()
457 464 }
458 465 }
459 466
460 467 impl From<SystemTime> for Timestamp {
461 468 fn from(system_time: SystemTime) -> Self {
462 469 let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {
463 470 Ok(duration) => {
464 471 (duration.as_secs() as i64, duration.subsec_nanos())
465 472 }
466 473 Err(error) => {
467 474 let negative = error.duration();
468 475 (-(negative.as_secs() as i64), negative.subsec_nanos())
469 476 }
470 477 };
471 478 Timestamp {
472 479 seconds: secs.into(),
473 480 nanoseconds: nanos.into(),
474 481 }
475 482 }
476 483 }
477 484
478 485 impl From<&'_ Timestamp> for SystemTime {
479 486 fn from(timestamp: &'_ Timestamp) -> Self {
480 487 let secs = timestamp.seconds.get();
481 488 let nanos = timestamp.nanoseconds.get();
482 489 if secs >= 0 {
483 490 UNIX_EPOCH + Duration::new(secs as u64, nanos)
484 491 } else {
485 492 UNIX_EPOCH - Duration::new((-secs) as u64, nanos)
486 493 }
487 494 }
488 495 }
489 496
490 497 fn read_hg_path(
491 498 on_disk: &[u8],
492 499 slice: PathSlice,
493 500 ) -> Result<&HgPath, DirstateV2ParseError> {
494 501 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
495 502 }
496 503
497 504 fn read_nodes(
498 505 on_disk: &[u8],
499 506 slice: ChildNodes,
500 507 ) -> Result<&[Node], DirstateV2ParseError> {
501 508 read_slice(on_disk, slice.start, slice.len.get())
502 509 }
503 510
504 511 fn read_slice<T, Len>(
505 512 on_disk: &[u8],
506 513 start: Offset,
507 514 len: Len,
508 515 ) -> Result<&[T], DirstateV2ParseError>
509 516 where
510 517 T: BytesCast,
511 518 Len: TryInto<usize>,
512 519 {
513 520 // Either `usize::MAX` would result in "out of bounds" error since a single
514 521 // `&[u8]` cannot occupy the entire addess space.
515 522 let start = start.get().try_into().unwrap_or(std::usize::MAX);
516 523 let len = len.try_into().unwrap_or(std::usize::MAX);
517 524 on_disk
518 525 .get(start..)
519 526 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
520 527 .map(|(slice, _rest)| slice)
521 528 .ok_or_else(|| DirstateV2ParseError)
522 529 }
523 530
524 531 pub(crate) fn for_each_tracked_path<'on_disk>(
525 532 on_disk: &'on_disk [u8],
526 533 mut f: impl FnMut(&'on_disk HgPath),
527 534 ) -> Result<(), DirstateV2ParseError> {
528 535 let root = read_root(on_disk)?;
529 536 fn recur<'on_disk>(
530 537 on_disk: &'on_disk [u8],
531 538 nodes: ChildNodes,
532 539 f: &mut impl FnMut(&'on_disk HgPath),
533 540 ) -> Result<(), DirstateV2ParseError> {
534 541 for node in read_nodes(on_disk, nodes)? {
535 542 if let Some(state) = node.state()? {
536 543 if state.is_tracked() {
537 544 f(node.full_path(on_disk)?)
538 545 }
539 546 }
540 547 recur(on_disk, node.children, f)?
541 548 }
542 549 Ok(())
543 550 }
544 551 recur(on_disk, root.root_nodes, &mut f)
545 552 }
546 553
547 554 /// Returns new data together with whether that data should be appended to the
548 555 /// existing data file whose content is at `dirstate_map.on_disk` (true),
549 556 /// instead of written to a new data file (false).
550 557 pub(super) fn write(
551 558 dirstate_map: &mut DirstateMap,
552 559 can_append: bool,
553 560 ) -> Result<(Vec<u8>, bool), DirstateError> {
554 561 let append = can_append && dirstate_map.write_should_append();
555 562
556 563 // This ignores the space for paths, and for nodes without an entry.
557 564 // TODO: better estimate? Skip the `Vec` and write to a file directly?
558 565 let size_guess = std::mem::size_of::<Root>()
559 566 + std::mem::size_of::<Node>()
560 567 * dirstate_map.nodes_with_entry_count as usize;
561 568
562 569 let mut writer = Writer {
563 570 dirstate_map,
564 571 append,
565 572 out: Vec::with_capacity(size_guess),
566 573 };
567 574
568 575 let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;
569 576
570 577 let root = Root {
571 578 root_nodes,
572 579 nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
573 580 nodes_with_copy_source_count: dirstate_map
574 581 .nodes_with_copy_source_count
575 582 .into(),
583 unreachable_bytes: dirstate_map.unreachable_bytes.into(),
576 584 ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
577 585 };
578 586 writer.out.extend(root.as_bytes());
579 587 Ok((writer.out, append))
580 588 }
581 589
582 590 struct Writer<'dmap, 'on_disk> {
583 591 dirstate_map: &'dmap DirstateMap<'on_disk>,
584 592 append: bool,
585 593 out: Vec<u8>,
586 594 }
587 595
588 596 impl Writer<'_, '_> {
589 597 fn write_nodes(
590 598 &mut self,
591 599 nodes: dirstate_map::ChildNodesRef,
592 600 ) -> Result<ChildNodes, DirstateError> {
593 601 // Reuse already-written nodes if possible
594 602 if self.append {
595 603 if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
596 604 let start = self.on_disk_offset_of(nodes_slice).expect(
597 605 "dirstate-v2 OnDisk nodes not found within on_disk",
598 606 );
599 607 let len = child_nodes_len_from_usize(nodes_slice.len());
600 608 return Ok(ChildNodes { start, len });
601 609 }
602 610 }
603 611
604 612 // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
605 613 // undefined iteration order. Sort to enable binary search in the
606 614 // written file.
607 615 let nodes = nodes.sorted();
608 616 let nodes_len = nodes.len();
609 617
610 618 // First accumulate serialized nodes in a `Vec`
611 619 let mut on_disk_nodes = Vec::with_capacity(nodes_len);
612 620 for node in nodes {
613 621 let children =
614 622 self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
615 623 let full_path = node.full_path(self.dirstate_map.on_disk)?;
616 624 let full_path = self.write_path(full_path.as_bytes());
617 625 let copy_source = if let Some(source) =
618 626 node.copy_source(self.dirstate_map.on_disk)?
619 627 {
620 628 self.write_path(source.as_bytes())
621 629 } else {
622 630 PathSlice {
623 631 start: 0.into(),
624 632 len: 0.into(),
625 633 }
626 634 };
627 635 on_disk_nodes.push(match node {
628 636 NodeRef::InMemory(path, node) => {
629 637 let (state, data) = match &node.data {
630 638 dirstate_map::NodeData::Entry(entry) => (
631 639 entry.state.into(),
632 640 Entry {
633 641 mode: entry.mode.into(),
634 642 mtime: entry.mtime.into(),
635 643 size: entry.size.into(),
636 644 },
637 645 ),
638 646 dirstate_map::NodeData::CachedDirectory { mtime } => {
639 647 (b'd', Entry::from_timestamp(*mtime))
640 648 }
641 649 dirstate_map::NodeData::None => (
642 650 b'\0',
643 651 Entry {
644 652 mode: 0.into(),
645 653 mtime: 0.into(),
646 654 size: 0.into(),
647 655 },
648 656 ),
649 657 };
650 658 Node {
651 659 children,
652 660 copy_source,
653 661 full_path,
654 662 base_name_start: u16::try_from(path.base_name_start())
655 663 // Could only panic for paths over 64 KiB
656 664 .expect("dirstate-v2 path length overflow")
657 665 .into(),
658 666 descendants_with_entry_count: node
659 667 .descendants_with_entry_count
660 668 .into(),
661 669 tracked_descendants_count: node
662 670 .tracked_descendants_count
663 671 .into(),
664 672 state,
665 673 data,
666 674 }
667 675 }
668 676 NodeRef::OnDisk(node) => Node {
669 677 children,
670 678 copy_source,
671 679 full_path,
672 680 ..*node
673 681 },
674 682 })
675 683 }
676 684 // … so we can write them contiguously, after writing everything else
677 685 // they refer to.
678 686 let start = self.current_offset();
679 687 let len = child_nodes_len_from_usize(nodes_len);
680 688 self.out.extend(on_disk_nodes.as_bytes());
681 689 Ok(ChildNodes { start, len })
682 690 }
683 691
684 692 /// If the given slice of items is within `on_disk`, returns its offset
685 693 /// from the start of `on_disk`.
686 694 fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
687 695 where
688 696 T: BytesCast,
689 697 {
690 698 fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
691 699 let start = slice.as_ptr() as usize;
692 700 let end = start + slice.len();
693 701 start..=end
694 702 }
695 703 let slice_addresses = address_range(slice.as_bytes());
696 704 let on_disk_addresses = address_range(self.dirstate_map.on_disk);
697 705 if on_disk_addresses.contains(slice_addresses.start())
698 706 && on_disk_addresses.contains(slice_addresses.end())
699 707 {
700 708 let offset = slice_addresses.start() - on_disk_addresses.start();
701 709 Some(offset_from_usize(offset))
702 710 } else {
703 711 None
704 712 }
705 713 }
706 714
707 715 fn current_offset(&mut self) -> Offset {
708 716 let mut offset = self.out.len();
709 717 if self.append {
710 718 offset += self.dirstate_map.on_disk.len()
711 719 }
712 720 offset_from_usize(offset)
713 721 }
714 722
715 723 fn write_path(&mut self, slice: &[u8]) -> PathSlice {
716 724 let len = path_len_from_usize(slice.len());
717 725 // Reuse an already-written path if possible
718 726 if self.append {
719 727 if let Some(start) = self.on_disk_offset_of(slice) {
720 728 return PathSlice { start, len };
721 729 }
722 730 }
723 731 let start = self.current_offset();
724 732 self.out.extend(slice.as_bytes());
725 733 PathSlice { start, len }
726 734 }
727 735 }
728 736
729 737 fn offset_from_usize(x: usize) -> Offset {
730 738 u32::try_from(x)
731 739 // Could only panic for a dirstate file larger than 4 GiB
732 740 .expect("dirstate-v2 offset overflow")
733 741 .into()
734 742 }
735 743
736 744 fn child_nodes_len_from_usize(x: usize) -> Size {
737 745 u32::try_from(x)
738 746 // Could only panic with over 4 billion nodes
739 747 .expect("dirstate-v2 slice length overflow")
740 748 .into()
741 749 }
742 750
743 751 fn path_len_from_usize(x: usize) -> PathSize {
744 752 u16::try_from(x)
745 753 // Could only panic for paths over 64 KiB
746 754 .expect("dirstate-v2 path length overflow")
747 755 .into()
748 756 }
General Comments 0
You need to be logged in to leave comments. Login now