##// END OF EJS Templates
rust: fix clippy lints...
Raphaël Gomès -
r53188:f90796d3 default
parent child Browse files
Show More
@@ -1,847 +1,846
1 1 // ancestors.rs
2 2 //
3 3 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Rust versions of generic DAG ancestors algorithms for Mercurial
9 9
10 10 use super::{Graph, GraphError, Revision, NULL_REVISION};
11 11 use crate::dagops;
12 12 use std::cmp::max;
13 13 use std::collections::{BinaryHeap, HashSet};
14 14
15 15 /// Iterator over the ancestors of a given list of revisions
16 16 /// This is a generic type, defined and implemented for any Graph, so that
17 17 /// it's easy to
18 18 ///
19 19 /// - unit test in pure Rust
20 20 /// - bind to main Mercurial code, potentially in several ways and have these
21 21 /// bindings evolve over time
22 22 pub struct AncestorsIterator<G: Graph> {
23 23 graph: G,
24 24 visit: BinaryHeap<Revision>,
25 25 seen: HashSet<Revision>,
26 26 stoprev: Revision,
27 27 }
28 28
29 29 pub struct MissingAncestors<G: Graph> {
30 30 graph: G,
31 31 bases: HashSet<Revision>,
32 32 max_base: Revision,
33 33 }
34 34
35 35 impl<G: Graph> AncestorsIterator<G> {
36 36 /// Constructor.
37 37 ///
38 38 /// if `inclusive` is true, then the init revisions are emitted in
39 39 /// particular, otherwise iteration starts from their parents.
40 40 pub fn new(
41 41 graph: G,
42 42 initrevs: impl IntoIterator<Item = Revision>,
43 43 stoprev: Revision,
44 44 inclusive: bool,
45 45 ) -> Result<Self, GraphError> {
46 46 let filtered_initrevs = initrevs.into_iter().filter(|&r| r >= stoprev);
47 47 if inclusive {
48 48 let visit: BinaryHeap<Revision> = filtered_initrevs.collect();
49 49 let seen = visit.iter().cloned().collect();
50 50 return Ok(AncestorsIterator {
51 51 visit,
52 52 seen,
53 53 stoprev,
54 54 graph,
55 55 });
56 56 }
57 57 let mut this = AncestorsIterator {
58 58 visit: BinaryHeap::new(),
59 59 seen: HashSet::new(),
60 60 stoprev,
61 61 graph,
62 62 };
63 63 this.seen.insert(NULL_REVISION);
64 64 for rev in filtered_initrevs {
65 65 for parent in this.graph.parents(rev)?.iter().cloned() {
66 66 this.conditionally_push_rev(parent);
67 67 }
68 68 }
69 69 Ok(this)
70 70 }
71 71
72 72 #[inline]
73 73 fn conditionally_push_rev(&mut self, rev: Revision) {
74 74 if self.stoprev <= rev && self.seen.insert(rev) {
75 75 self.visit.push(rev);
76 76 }
77 77 }
78 78
79 79 /// Consumes partially the iterator to tell if the given target
80 80 /// revision
81 81 /// is in the ancestors it emits.
82 82 /// This is meant for iterators actually dedicated to that kind of
83 83 /// purpose
84 84 pub fn contains(&mut self, target: Revision) -> Result<bool, GraphError> {
85 85 if self.seen.contains(&target) && target != NULL_REVISION {
86 86 return Ok(true);
87 87 }
88 88 for item in self {
89 89 let rev = item?;
90 90 if rev == target {
91 91 return Ok(true);
92 92 }
93 93 if rev < target {
94 94 return Ok(false);
95 95 }
96 96 }
97 97 Ok(false)
98 98 }
99 99
100 100 pub fn peek(&self) -> Option<Revision> {
101 101 self.visit.peek().cloned()
102 102 }
103 103
104 104 /// Tell if the iterator is about an empty set
105 105 ///
106 106 /// The result does not depend whether the iterator has been consumed
107 107 /// or not.
108 108 /// This is mostly meant for iterators backing a lazy ancestors set
109 109 pub fn is_empty(&self) -> bool {
110 110 if self.visit.len() > 0 {
111 111 return false;
112 112 }
113 113 if self.seen.len() > 1 {
114 114 return false;
115 115 }
116 116 // at this point, the seen set is at most a singleton.
117 117 // If not `self.inclusive`, it's still possible that it has only
118 118 // the null revision
119 119 self.seen.is_empty() || self.seen.contains(&NULL_REVISION)
120 120 }
121 121 }
122 122
123 123 /// Main implementation for the iterator
124 124 ///
125 125 /// The algorithm is the same as in `_lazyancestorsiter()` from `ancestors.py`
126 126 /// with a few non crucial differences:
127 127 ///
128 128 /// - there's no filtering of invalid parent revisions. Actually, it should be
129 129 /// consistent and more efficient to filter them from the end caller.
130 130 /// - we don't have the optimization for adjacent revisions (i.e., the case
131 131 /// where `p1 == rev - 1`), because it amounts to update the first element of
132 132 /// the heap without sifting, which Rust's BinaryHeap doesn't let us do.
133 133 /// - we save a few pushes by comparing with `stoprev` before pushing
134 134 impl<G: Graph> Iterator for AncestorsIterator<G> {
135 135 type Item = Result<Revision, GraphError>;
136 136
137 137 fn next(&mut self) -> Option<Self::Item> {
138 138 let current = match self.visit.peek() {
139 139 None => {
140 140 return None;
141 141 }
142 142 Some(c) => *c,
143 143 };
144 144 let [p1, p2] = match self.graph.parents(current) {
145 145 Ok(ps) => ps,
146 146 Err(e) => return Some(Err(e)),
147 147 };
148 148 if p1 < self.stoprev || !self.seen.insert(p1) {
149 149 self.visit.pop();
150 150 } else {
151 151 *(self.visit.peek_mut().unwrap()) = p1;
152 152 };
153 153
154 154 self.conditionally_push_rev(p2);
155 155 Some(Ok(current))
156 156 }
157 157 }
158 158
159 159 impl<G: Graph> MissingAncestors<G> {
160 160 pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self {
161 161 let mut created = MissingAncestors {
162 162 graph,
163 163 bases: HashSet::new(),
164 164 max_base: NULL_REVISION,
165 165 };
166 166 created.add_bases(bases);
167 167 created
168 168 }
169 169
170 170 pub fn has_bases(&self) -> bool {
171 171 !self.bases.is_empty()
172 172 }
173 173
174 174 /// Return a reference to current bases.
175 175 ///
176 176 /// This is useful in unit tests, but also setdiscovery.py does
177 177 /// read the bases attribute of a ancestor.missingancestors instance.
178 178 pub fn get_bases(&self) -> &HashSet<Revision> {
179 179 &self.bases
180 180 }
181 181
182 182 /// Computes the relative heads of current bases.
183 183 ///
184 184 /// The object is still usable after this.
185 185 pub fn bases_heads(&self) -> Result<HashSet<Revision>, GraphError> {
186 186 dagops::heads(&self.graph, self.bases.iter())
187 187 }
188 188
189 189 /// Consumes the object and returns the relative heads of its bases.
190 190 pub fn into_bases_heads(
191 191 mut self,
192 192 ) -> Result<HashSet<Revision>, GraphError> {
193 193 dagops::retain_heads(&self.graph, &mut self.bases)?;
194 194 Ok(self.bases)
195 195 }
196 196
197 197 /// Add some revisions to `self.bases`
198 198 ///
199 199 /// Takes care of keeping `self.max_base` up to date.
200 200 pub fn add_bases(
201 201 &mut self,
202 202 new_bases: impl IntoIterator<Item = Revision>,
203 203 ) {
204 204 let mut max_base = self.max_base;
205 205 self.bases.extend(
206 206 new_bases
207 207 .into_iter()
208 208 .filter(|&rev| rev != NULL_REVISION)
209 .map(|r| {
209 .inspect(|&r| {
210 210 if r > max_base {
211 211 max_base = r;
212 212 }
213 r
214 213 }),
215 214 );
216 215 self.max_base = max_base;
217 216 }
218 217
219 218 /// Remove all ancestors of self.bases from the revs set (in place)
220 219 pub fn remove_ancestors_from(
221 220 &mut self,
222 221 revs: &mut HashSet<Revision>,
223 222 ) -> Result<(), GraphError> {
224 223 revs.retain(|r| !self.bases.contains(r));
225 224 // the null revision is always an ancestor. Logically speaking
226 225 // it's debatable in case bases is empty, but the Python
227 226 // implementation always adds NULL_REVISION to bases, making it
228 227 // unconditionnally true.
229 228 revs.remove(&NULL_REVISION);
230 229 if revs.is_empty() {
231 230 return Ok(());
232 231 }
233 232 // anything in revs > start is definitely not an ancestor of bases
234 233 // revs <= start need to be investigated
235 234 if self.max_base == NULL_REVISION {
236 235 return Ok(());
237 236 }
238 237
239 238 // whatever happens, we'll keep at least keepcount of them
240 239 // knowing this gives us a earlier stop condition than
241 240 // going all the way to the root
242 241 let keepcount = revs.iter().filter(|r| **r > self.max_base).count();
243 242
244 243 let mut curr = self.max_base;
245 244 while curr != NULL_REVISION && revs.len() > keepcount {
246 245 if self.bases.contains(&curr) {
247 246 revs.remove(&curr);
248 247 self.add_parents(curr)?;
249 248 }
250 249 // We know this revision is safe because we've checked the bounds
251 250 // before.
252 251 curr = Revision(curr.0 - 1);
253 252 }
254 253 Ok(())
255 254 }
256 255
257 256 /// Add the parents of `rev` to `self.bases`
258 257 ///
259 258 /// This has no effect on `self.max_base`
260 259 #[inline]
261 260 fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> {
262 261 if rev == NULL_REVISION {
263 262 return Ok(());
264 263 }
265 264 for p in self.graph.parents(rev)?.iter().cloned() {
266 265 // No need to bother the set with inserting NULL_REVISION over and
267 266 // over
268 267 if p != NULL_REVISION {
269 268 self.bases.insert(p);
270 269 }
271 270 }
272 271 Ok(())
273 272 }
274 273
275 274 /// Return all the ancestors of revs that are not ancestors of self.bases
276 275 ///
277 276 /// This may include elements from revs.
278 277 ///
279 278 /// Equivalent to the revset (::revs - ::self.bases). Revs are returned in
280 279 /// revision number order, which is a topological order.
281 280 pub fn missing_ancestors(
282 281 &mut self,
283 282 revs: impl IntoIterator<Item = Revision>,
284 283 ) -> Result<Vec<Revision>, GraphError> {
285 284 // just for convenience and comparison with Python version
286 285 let bases_visit = &mut self.bases;
287 286 let mut revs: HashSet<Revision> = revs
288 287 .into_iter()
289 288 .filter(|r| !bases_visit.contains(r))
290 289 .collect();
291 290 let revs_visit = &mut revs;
292 291 let mut both_visit: HashSet<Revision> =
293 292 revs_visit.intersection(bases_visit).cloned().collect();
294 293 if revs_visit.is_empty() {
295 294 return Ok(Vec::new());
296 295 }
297 296 let max_revs = revs_visit.iter().cloned().max().unwrap();
298 297 let start = max(self.max_base, max_revs);
299 298
300 299 // TODO heuristics for with_capacity()?
301 300 let mut missing: Vec<Revision> = Vec::new();
302 301 for curr in (0..=start.0).rev() {
303 302 if revs_visit.is_empty() {
304 303 break;
305 304 }
306 305 if both_visit.remove(&Revision(curr)) {
307 306 // curr's parents might have made it into revs_visit through
308 307 // another path
309 308 for p in self.graph.parents(Revision(curr))?.iter().cloned() {
310 309 if p == NULL_REVISION {
311 310 continue;
312 311 }
313 312 revs_visit.remove(&p);
314 313 bases_visit.insert(p);
315 314 both_visit.insert(p);
316 315 }
317 316 } else if revs_visit.remove(&Revision(curr)) {
318 317 missing.push(Revision(curr));
319 318 for p in self.graph.parents(Revision(curr))?.iter().cloned() {
320 319 if p == NULL_REVISION {
321 320 continue;
322 321 }
323 322 if bases_visit.contains(&p) {
324 323 // p is already known to be an ancestor of revs_visit
325 324 revs_visit.remove(&p);
326 325 both_visit.insert(p);
327 326 } else if both_visit.contains(&p) {
328 327 // p should have been in bases_visit
329 328 revs_visit.remove(&p);
330 329 bases_visit.insert(p);
331 330 } else {
332 331 // visit later
333 332 revs_visit.insert(p);
334 333 }
335 334 }
336 335 } else if bases_visit.contains(&Revision(curr)) {
337 336 for p in self.graph.parents(Revision(curr))?.iter().cloned() {
338 337 if p == NULL_REVISION {
339 338 continue;
340 339 }
341 340 if revs_visit.remove(&p) || both_visit.contains(&p) {
342 341 // p is an ancestor of bases_visit, and is implicitly
343 342 // in revs_visit, which means p is ::revs & ::bases.
344 343 bases_visit.insert(p);
345 344 both_visit.insert(p);
346 345 } else {
347 346 bases_visit.insert(p);
348 347 }
349 348 }
350 349 }
351 350 }
352 351 missing.reverse();
353 352 Ok(missing)
354 353 }
355 354 }
356 355
357 356 #[cfg(test)]
358 357 mod tests {
359 358
360 359 use super::*;
361 360 use crate::{
362 361 testing::{SampleGraph, VecGraph},
363 362 BaseRevision,
364 363 };
365 364
366 365 impl From<BaseRevision> for Revision {
367 366 fn from(value: BaseRevision) -> Self {
368 367 if !cfg!(test) {
369 368 panic!("should only be used in tests")
370 369 }
371 370 Revision(value)
372 371 }
373 372 }
374 373
375 374 impl PartialEq<BaseRevision> for Revision {
376 375 fn eq(&self, other: &BaseRevision) -> bool {
377 376 if !cfg!(test) {
378 377 panic!("should only be used in tests")
379 378 }
380 379 self.0.eq(other)
381 380 }
382 381 }
383 382
384 383 impl PartialEq<u32> for Revision {
385 384 fn eq(&self, other: &u32) -> bool {
386 385 if !cfg!(test) {
387 386 panic!("should only be used in tests")
388 387 }
389 388 let check: Result<u32, _> = self.0.try_into();
390 389 match check {
391 390 Ok(value) => value.eq(other),
392 391 Err(_) => false,
393 392 }
394 393 }
395 394 }
396 395
397 396 fn list_ancestors<G: Graph>(
398 397 graph: G,
399 398 initrevs: Vec<Revision>,
400 399 stoprev: Revision,
401 400 inclusive: bool,
402 401 ) -> Vec<Revision> {
403 402 AncestorsIterator::new(graph, initrevs, stoprev, inclusive)
404 403 .unwrap()
405 404 .map(|res| res.unwrap())
406 405 .collect()
407 406 }
408 407
409 408 #[test]
410 409 /// Same tests as test-ancestor.py, without membership
411 410 /// (see also test-ancestor.py.out)
412 411 fn test_list_ancestor() {
413 412 assert_eq!(
414 413 list_ancestors(SampleGraph, vec![], 0.into(), false),
415 414 Vec::<Revision>::new()
416 415 );
417 416 assert_eq!(
418 417 list_ancestors(
419 418 SampleGraph,
420 419 vec![11.into(), 13.into()],
421 420 0.into(),
422 421 false
423 422 ),
424 423 vec![8, 7, 4, 3, 2, 1, 0]
425 424 );
426 425 // it works as well on references, because &Graph implements Graph
427 426 // this is needed as of this writing by RHGitaly
428 427 assert_eq!(
429 428 list_ancestors(
430 429 &SampleGraph,
431 430 vec![11.into(), 13.into()],
432 431 0.into(),
433 432 false
434 433 ),
435 434 vec![8, 7, 4, 3, 2, 1, 0]
436 435 );
437 436
438 437 assert_eq!(
439 438 list_ancestors(
440 439 SampleGraph,
441 440 vec![1.into(), 3.into()],
442 441 0.into(),
443 442 false
444 443 ),
445 444 vec![1, 0]
446 445 );
447 446 assert_eq!(
448 447 list_ancestors(
449 448 SampleGraph,
450 449 vec![11.into(), 13.into()],
451 450 0.into(),
452 451 true
453 452 ),
454 453 vec![13, 11, 8, 7, 4, 3, 2, 1, 0]
455 454 );
456 455 assert_eq!(
457 456 list_ancestors(
458 457 SampleGraph,
459 458 vec![11.into(), 13.into()],
460 459 6.into(),
461 460 false
462 461 ),
463 462 vec![8, 7]
464 463 );
465 464 assert_eq!(
466 465 list_ancestors(
467 466 SampleGraph,
468 467 vec![11.into(), 13.into()],
469 468 6.into(),
470 469 true
471 470 ),
472 471 vec![13, 11, 8, 7]
473 472 );
474 473 assert_eq!(
475 474 list_ancestors(
476 475 SampleGraph,
477 476 vec![11.into(), 13.into()],
478 477 11.into(),
479 478 true
480 479 ),
481 480 vec![13, 11]
482 481 );
483 482 assert_eq!(
484 483 list_ancestors(
485 484 SampleGraph,
486 485 vec![11.into(), 13.into()],
487 486 12.into(),
488 487 true
489 488 ),
490 489 vec![13]
491 490 );
492 491 assert_eq!(
493 492 list_ancestors(
494 493 SampleGraph,
495 494 vec![10.into(), 1.into()],
496 495 0.into(),
497 496 true
498 497 ),
499 498 vec![10, 5, 4, 2, 1, 0]
500 499 );
501 500 }
502 501
503 502 #[test]
504 503 /// Corner case that's not directly in test-ancestors.py, but
505 504 /// that happens quite often, as demonstrated by running the whole
506 505 /// suite.
507 506 /// For instance, run tests/test-obsolete-checkheads.t
508 507 fn test_nullrev_input() {
509 508 let mut iter = AncestorsIterator::new(
510 509 SampleGraph,
511 510 vec![Revision(-1)],
512 511 0.into(),
513 512 false,
514 513 )
515 514 .unwrap();
516 515 assert_eq!(iter.next(), None)
517 516 }
518 517
519 518 #[test]
520 519 fn test_contains() {
521 520 let mut lazy = AncestorsIterator::new(
522 521 SampleGraph,
523 522 vec![10.into(), 1.into()],
524 523 0.into(),
525 524 true,
526 525 )
527 526 .unwrap();
528 527 assert!(lazy.contains(1.into()).unwrap());
529 528 assert!(!lazy.contains(3.into()).unwrap());
530 529
531 530 let mut lazy = AncestorsIterator::new(
532 531 SampleGraph,
533 532 vec![0.into()],
534 533 0.into(),
535 534 false,
536 535 )
537 536 .unwrap();
538 537 assert!(!lazy.contains(NULL_REVISION).unwrap());
539 538 }
540 539
541 540 #[test]
542 541 fn test_peek() {
543 542 let mut iter = AncestorsIterator::new(
544 543 SampleGraph,
545 544 vec![10.into()],
546 545 0.into(),
547 546 true,
548 547 )
549 548 .unwrap();
550 549 // peek() gives us the next value
551 550 assert_eq!(iter.peek(), Some(10.into()));
552 551 // but it's not been consumed
553 552 assert_eq!(iter.next(), Some(Ok(10.into())));
554 553 // and iteration resumes normally
555 554 assert_eq!(iter.next(), Some(Ok(5.into())));
556 555
557 556 // let's drain the iterator to test peek() at the end
558 557 while iter.next().is_some() {}
559 558 assert_eq!(iter.peek(), None);
560 559 }
561 560
562 561 #[test]
563 562 fn test_empty() {
564 563 let mut iter = AncestorsIterator::new(
565 564 SampleGraph,
566 565 vec![10.into()],
567 566 0.into(),
568 567 true,
569 568 )
570 569 .unwrap();
571 570 assert!(!iter.is_empty());
572 571 while iter.next().is_some() {}
573 572 assert!(!iter.is_empty());
574 573
575 574 let iter = AncestorsIterator::new(SampleGraph, vec![], 0.into(), true)
576 575 .unwrap();
577 576 assert!(iter.is_empty());
578 577
579 578 // case where iter.seen == {NULL_REVISION}
580 579 let iter = AncestorsIterator::new(
581 580 SampleGraph,
582 581 vec![0.into()],
583 582 0.into(),
584 583 false,
585 584 )
586 585 .unwrap();
587 586 assert!(iter.is_empty());
588 587 }
589 588
590 589 /// A corrupted Graph, supporting error handling tests
591 590 #[derive(Clone, Debug)]
592 591 struct Corrupted;
593 592
594 593 impl Graph for Corrupted {
595 594 // FIXME what to do about this? Are we just not supposed to get them
596 595 // anymore?
597 596 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
598 597 match rev {
599 598 Revision(1) => Ok([0.into(), (-1).into()]),
600 599 r => Err(GraphError::ParentOutOfRange(r)),
601 600 }
602 601 }
603 602 }
604 603
605 604 #[test]
606 605 fn test_initrev_out_of_range() {
607 606 // inclusive=false looks up initrev's parents right away
608 607 match AncestorsIterator::new(
609 608 SampleGraph,
610 609 vec![25.into()],
611 610 0.into(),
612 611 false,
613 612 ) {
614 613 Ok(_) => panic!("Should have been ParentOutOfRange"),
615 614 Err(e) => assert_eq!(e, GraphError::ParentOutOfRange(25.into())),
616 615 }
617 616 }
618 617
619 618 #[test]
620 619 fn test_next_out_of_range() {
621 620 // inclusive=false looks up initrev's parents right away
622 621 let mut iter =
623 622 AncestorsIterator::new(Corrupted, vec![1.into()], 0.into(), false)
624 623 .unwrap();
625 624 assert_eq!(
626 625 iter.next(),
627 626 Some(Err(GraphError::ParentOutOfRange(0.into())))
628 627 );
629 628 }
630 629
631 630 #[test]
632 631 /// Test constructor, add/get bases and heads
633 632 fn test_missing_bases() -> Result<(), GraphError> {
634 633 let mut missing_ancestors = MissingAncestors::new(
635 634 SampleGraph,
636 635 [5.into(), 3.into(), 1.into(), 3.into()].iter().cloned(),
637 636 );
638 637 let mut as_vec: Vec<Revision> =
639 638 missing_ancestors.get_bases().iter().cloned().collect();
640 639 as_vec.sort_unstable();
641 640 assert_eq!(as_vec, [1, 3, 5]);
642 641 assert_eq!(missing_ancestors.max_base, 5);
643 642
644 643 missing_ancestors
645 644 .add_bases([3.into(), 7.into(), 8.into()].iter().cloned());
646 645 as_vec = missing_ancestors.get_bases().iter().cloned().collect();
647 646 as_vec.sort_unstable();
648 647 assert_eq!(as_vec, [1, 3, 5, 7, 8]);
649 648 assert_eq!(missing_ancestors.max_base, 8);
650 649
651 650 as_vec = missing_ancestors.bases_heads()?.iter().cloned().collect();
652 651 as_vec.sort_unstable();
653 652 assert_eq!(as_vec, [3, 5, 7, 8]);
654 653 Ok(())
655 654 }
656 655
657 656 fn assert_missing_remove(
658 657 bases: &[BaseRevision],
659 658 revs: &[BaseRevision],
660 659 expected: &[BaseRevision],
661 660 ) {
662 661 let mut missing_ancestors = MissingAncestors::new(
663 662 SampleGraph,
664 663 bases.iter().map(|r| Revision(*r)),
665 664 );
666 665 let mut revset: HashSet<Revision> =
667 666 revs.iter().map(|r| Revision(*r)).collect();
668 667 missing_ancestors
669 668 .remove_ancestors_from(&mut revset)
670 669 .unwrap();
671 670 let mut as_vec: Vec<Revision> = revset.into_iter().collect();
672 671 as_vec.sort_unstable();
673 672 assert_eq!(as_vec.as_slice(), expected);
674 673 }
675 674
676 675 #[test]
677 676 fn test_missing_remove() {
678 677 assert_missing_remove(
679 678 &[1, 2, 3, 4, 7],
680 679 Vec::from_iter(1..10).as_slice(),
681 680 &[5, 6, 8, 9],
682 681 );
683 682 assert_missing_remove(&[10], &[11, 12, 13, 14], &[11, 12, 13, 14]);
684 683 assert_missing_remove(&[7], &[1, 2, 3, 4, 5], &[3, 5]);
685 684 }
686 685
687 686 fn assert_missing_ancestors(
688 687 bases: &[BaseRevision],
689 688 revs: &[BaseRevision],
690 689 expected: &[BaseRevision],
691 690 ) {
692 691 let mut missing_ancestors = MissingAncestors::new(
693 692 SampleGraph,
694 693 bases.iter().map(|r| Revision(*r)),
695 694 );
696 695 let missing = missing_ancestors
697 696 .missing_ancestors(revs.iter().map(|r| Revision(*r)))
698 697 .unwrap();
699 698 assert_eq!(missing.as_slice(), expected);
700 699 }
701 700
702 701 #[test]
703 702 fn test_missing_ancestors() {
704 703 // examples taken from test-ancestors.py by having it run
705 704 // on the same graph (both naive and fast Python algs)
706 705 assert_missing_ancestors(&[10], &[11], &[3, 7, 11]);
707 706 assert_missing_ancestors(&[11], &[10], &[5, 10]);
708 707 assert_missing_ancestors(&[7], &[9, 11], &[3, 6, 9, 11]);
709 708 }
710 709
711 710 /// An interesting case found by a random generator similar to
712 711 /// the one in test-ancestor.py. An early version of Rust MissingAncestors
713 712 /// failed this, yet none of the integration tests of the whole suite
714 713 /// catched it.
715 714 #[allow(clippy::unnecessary_cast)]
716 715 #[test]
717 716 fn test_remove_ancestors_from_case1() {
718 717 const FAKE_NULL_REVISION: BaseRevision = -1;
719 718 assert_eq!(FAKE_NULL_REVISION, NULL_REVISION.0);
720 719 let graph: VecGraph = vec![
721 720 [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
722 721 [0, FAKE_NULL_REVISION],
723 722 [1, 0],
724 723 [2, 1],
725 724 [3, FAKE_NULL_REVISION],
726 725 [4, FAKE_NULL_REVISION],
727 726 [5, 1],
728 727 [2, FAKE_NULL_REVISION],
729 728 [7, FAKE_NULL_REVISION],
730 729 [8, FAKE_NULL_REVISION],
731 730 [9, FAKE_NULL_REVISION],
732 731 [10, 1],
733 732 [3, FAKE_NULL_REVISION],
734 733 [12, FAKE_NULL_REVISION],
735 734 [13, FAKE_NULL_REVISION],
736 735 [14, FAKE_NULL_REVISION],
737 736 [4, FAKE_NULL_REVISION],
738 737 [16, FAKE_NULL_REVISION],
739 738 [17, FAKE_NULL_REVISION],
740 739 [18, FAKE_NULL_REVISION],
741 740 [19, 11],
742 741 [20, FAKE_NULL_REVISION],
743 742 [21, FAKE_NULL_REVISION],
744 743 [22, FAKE_NULL_REVISION],
745 744 [23, FAKE_NULL_REVISION],
746 745 [2, FAKE_NULL_REVISION],
747 746 [3, FAKE_NULL_REVISION],
748 747 [26, 24],
749 748 [27, FAKE_NULL_REVISION],
750 749 [28, FAKE_NULL_REVISION],
751 750 [12, FAKE_NULL_REVISION],
752 751 [1, FAKE_NULL_REVISION],
753 752 [1, 9],
754 753 [32, FAKE_NULL_REVISION],
755 754 [33, FAKE_NULL_REVISION],
756 755 [34, 31],
757 756 [35, FAKE_NULL_REVISION],
758 757 [36, 26],
759 758 [37, FAKE_NULL_REVISION],
760 759 [38, FAKE_NULL_REVISION],
761 760 [39, FAKE_NULL_REVISION],
762 761 [40, FAKE_NULL_REVISION],
763 762 [41, FAKE_NULL_REVISION],
764 763 [42, 26],
765 764 [0, FAKE_NULL_REVISION],
766 765 [44, FAKE_NULL_REVISION],
767 766 [45, 4],
768 767 [40, FAKE_NULL_REVISION],
769 768 [47, FAKE_NULL_REVISION],
770 769 [36, 0],
771 770 [49, FAKE_NULL_REVISION],
772 771 [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
773 772 [51, FAKE_NULL_REVISION],
774 773 [52, FAKE_NULL_REVISION],
775 774 [53, FAKE_NULL_REVISION],
776 775 [14, FAKE_NULL_REVISION],
777 776 [55, FAKE_NULL_REVISION],
778 777 [15, FAKE_NULL_REVISION],
779 778 [23, FAKE_NULL_REVISION],
780 779 [58, FAKE_NULL_REVISION],
781 780 [59, FAKE_NULL_REVISION],
782 781 [2, FAKE_NULL_REVISION],
783 782 [61, 59],
784 783 [62, FAKE_NULL_REVISION],
785 784 [63, FAKE_NULL_REVISION],
786 785 [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
787 786 [65, FAKE_NULL_REVISION],
788 787 [66, FAKE_NULL_REVISION],
789 788 [67, FAKE_NULL_REVISION],
790 789 [68, FAKE_NULL_REVISION],
791 790 [37, 28],
792 791 [69, 25],
793 792 [71, FAKE_NULL_REVISION],
794 793 [72, FAKE_NULL_REVISION],
795 794 [50, 2],
796 795 [74, FAKE_NULL_REVISION],
797 796 [12, FAKE_NULL_REVISION],
798 797 [18, FAKE_NULL_REVISION],
799 798 [77, FAKE_NULL_REVISION],
800 799 [78, FAKE_NULL_REVISION],
801 800 [79, FAKE_NULL_REVISION],
802 801 [43, 33],
803 802 [81, FAKE_NULL_REVISION],
804 803 [82, FAKE_NULL_REVISION],
805 804 [83, FAKE_NULL_REVISION],
806 805 [84, 45],
807 806 [85, FAKE_NULL_REVISION],
808 807 [86, FAKE_NULL_REVISION],
809 808 [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
810 809 [88, FAKE_NULL_REVISION],
811 810 [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
812 811 [76, 83],
813 812 [44, FAKE_NULL_REVISION],
814 813 [92, FAKE_NULL_REVISION],
815 814 [93, FAKE_NULL_REVISION],
816 815 [9, FAKE_NULL_REVISION],
817 816 [95, 67],
818 817 [96, FAKE_NULL_REVISION],
819 818 [97, FAKE_NULL_REVISION],
820 819 [FAKE_NULL_REVISION, FAKE_NULL_REVISION],
821 820 ]
822 821 .into_iter()
823 822 .map(|[a, b]| [Revision(a), Revision(b)])
824 823 .collect();
825 824 let problem_rev = 28.into();
826 825 let problem_base = 70.into();
827 826 // making the problem obvious: problem_rev is a parent of problem_base
828 827 assert_eq!(graph.parents(problem_base).unwrap()[1], problem_rev);
829 828
830 829 let mut missing_ancestors: MissingAncestors<VecGraph> =
831 830 MissingAncestors::new(
832 831 graph,
833 832 [60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6]
834 833 .iter()
835 834 .map(|r| Revision(*r)),
836 835 );
837 836 assert!(missing_ancestors.bases.contains(&problem_base));
838 837
839 838 let mut revs: HashSet<Revision> =
840 839 [4, 12, 41, 28, 68, 38, 1, 30, 56, 44]
841 840 .iter()
842 841 .map(|r| Revision(*r))
843 842 .collect();
844 843 missing_ancestors.remove_ancestors_from(&mut revs).unwrap();
845 844 assert!(!revs.contains(&problem_rev));
846 845 }
847 846 }
@@ -1,250 +1,250
1 1 use crate::errors::HgError;
2 2 use crate::exit_codes;
3 3 use crate::repo::Repo;
4 4 use crate::revlog::path_encode::path_encode;
5 5 use crate::revlog::NodePrefix;
6 6 use crate::revlog::Revision;
7 7 use crate::revlog::RevlogEntry;
8 8 use crate::revlog::{Revlog, RevlogError};
9 9 use crate::utils::files::get_path_from_bytes;
10 10 use crate::utils::hg_path::HgPath;
11 11 use crate::utils::SliceExt;
12 12 use crate::Graph;
13 13 use crate::GraphError;
14 14 use crate::UncheckedRevision;
15 15 use std::path::PathBuf;
16 16
17 17 use super::options::RevlogOpenOptions;
18 18
19 19 /// A specialized `Revlog` to work with file data logs.
20 20 pub struct Filelog {
21 21 /// The generic `revlog` format.
22 22 revlog: Revlog,
23 23 }
24 24
25 25 impl Graph for Filelog {
26 26 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
27 27 self.revlog.parents(rev)
28 28 }
29 29 }
30 30
31 31 impl Filelog {
32 32 pub fn open_vfs(
33 33 store_vfs: &crate::vfs::VfsImpl,
34 34 file_path: &HgPath,
35 35 options: RevlogOpenOptions,
36 36 ) -> Result<Self, HgError> {
37 37 let index_path = store_path(file_path, b".i");
38 38 let data_path = store_path(file_path, b".d");
39 39 let revlog =
40 40 Revlog::open(store_vfs, index_path, Some(&data_path), options)?;
41 41 Ok(Self { revlog })
42 42 }
43 43
44 44 pub fn open(
45 45 repo: &Repo,
46 46 file_path: &HgPath,
47 47 options: RevlogOpenOptions,
48 48 ) -> Result<Self, HgError> {
49 49 Self::open_vfs(&repo.store_vfs(), file_path, options)
50 50 }
51 51
52 52 /// The given node ID is that of the file as found in a filelog, not of a
53 53 /// changeset.
54 54 pub fn data_for_node(
55 55 &self,
56 56 file_node: impl Into<NodePrefix>,
57 57 ) -> Result<FilelogRevisionData, RevlogError> {
58 58 let file_rev = self.revlog.rev_from_node(file_node.into())?;
59 59 self.data_for_unchecked_rev(file_rev.into())
60 60 }
61 61
62 62 /// The given revision is that of the file as found in a filelog, not of a
63 63 /// changeset.
64 64 pub fn data_for_unchecked_rev(
65 65 &self,
66 66 file_rev: UncheckedRevision,
67 67 ) -> Result<FilelogRevisionData, RevlogError> {
68 68 let data: Vec<u8> = self
69 69 .revlog
70 70 .get_data_for_unchecked_rev(file_rev)?
71 71 .into_owned();
72 72 Ok(FilelogRevisionData(data))
73 73 }
74 74
75 75 /// The given node ID is that of the file as found in a filelog, not of a
76 76 /// changeset.
77 77 pub fn entry_for_node(
78 78 &self,
79 79 file_node: impl Into<NodePrefix>,
80 80 ) -> Result<FilelogEntry, RevlogError> {
81 81 let file_rev = self.revlog.rev_from_node(file_node.into())?;
82 82 self.entry(file_rev)
83 83 }
84 84
85 85 /// The given revision is that of the file as found in a filelog, not of a
86 86 /// changeset.
87 87 pub fn entry_for_unchecked_rev(
88 88 &self,
89 89 file_rev: UncheckedRevision,
90 90 ) -> Result<FilelogEntry, RevlogError> {
91 91 Ok(FilelogEntry(
92 92 self.revlog.get_entry_for_unchecked_rev(file_rev)?,
93 93 ))
94 94 }
95 95
96 96 /// Same as [`Self::entry_for_unchecked_rev`] for a checked revision.
97 97 pub fn entry(
98 98 &self,
99 99 file_rev: Revision,
100 100 ) -> Result<FilelogEntry, RevlogError> {
101 101 Ok(FilelogEntry(self.revlog.get_entry(file_rev)?))
102 102 }
103 103 }
104 104
105 105 fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
106 106 let encoded_bytes =
107 107 path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
108 108 get_path_from_bytes(&encoded_bytes).into()
109 109 }
110 110
111 111 pub struct FilelogEntry<'a>(RevlogEntry<'a>);
112 112
113 113 impl FilelogEntry<'_> {
114 114 /// `self.data()` can be expensive, with decompression and delta
115 115 /// resolution.
116 116 ///
117 117 /// *Without* paying this cost, based on revlog index information
118 118 /// including `RevlogEntry::uncompressed_len`:
119 119 ///
120 120 /// * Returns `true` if the length that `self.data().file_data().len()`
121 121 /// would return is definitely **not equal** to `other_len`.
122 122 /// * Returns `false` if available information is inconclusive.
123 123 pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool {
124 124 // Relevant code that implement this behavior in Python code:
125 125 // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied,
126 126 // revlog.size, revlog.rawsize
127 127
128 128 // Let’s call `file_data_len` what would be returned by
129 129 // `self.data().file_data().len()`.
130 130
131 131 if self.0.is_censored() {
132 132 let file_data_len = 0;
133 133 return other_len != file_data_len;
134 134 }
135 135
136 136 if self.0.has_length_affecting_flag_processor() {
137 137 // We can’t conclude anything about `file_data_len`.
138 138 return false;
139 139 }
140 140
141 141 // Revlog revisions (usually) have metadata for the size of
142 142 // their data after decompression and delta resolution
143 143 // as would be returned by `Revlog::get_rev_data`.
144 144 //
145 145 // For filelogs this is the file’s contents preceded by an optional
146 146 // metadata block.
147 147 let uncompressed_len = if let Some(l) = self.0.uncompressed_len() {
148 148 l as u64
149 149 } else {
150 150 // The field was set to -1, the actual uncompressed len is unknown.
151 151 // We need to decompress to say more.
152 152 return false;
153 153 };
154 154 // `uncompressed_len = file_data_len + optional_metadata_len`,
155 155 // so `file_data_len <= uncompressed_len`.
156 156 if uncompressed_len < other_len {
157 157 // Transitively, `file_data_len < other_len`.
158 158 // So `other_len != file_data_len` definitely.
159 159 return true;
160 160 }
161 161
162 162 if uncompressed_len == other_len + 4 {
163 163 // It’s possible that `file_data_len == other_len` with an empty
164 164 // metadata block (2 start marker bytes + 2 end marker bytes).
165 165 // This happens when there wouldn’t otherwise be metadata, but
166 166 // the first 2 bytes of file data happen to match a start marker
167 167 // and would be ambiguous.
168 168 return false;
169 169 }
170 170
171 171 if !self.0.has_p1() {
172 172 // There may or may not be copy metadata, so we can’t deduce more
173 173 // about `file_data_len` without computing file data.
174 174 return false;
175 175 }
176 176
177 177 // Filelog ancestry is not meaningful in the way changelog ancestry is.
178 178 // It only provides hints to delta generation.
179 179 // p1 and p2 are set to null when making a copy or rename since
180 180 // contents are likely unrelatedto what might have previously existed
181 181 // at the destination path.
182 182 //
183 183 // Conversely, since here p1 is non-null, there is no copy metadata.
184 184 // Note that this reasoning may be invalidated in the presence of
185 185 // merges made by some previous versions of Mercurial that
186 186 // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528>
187 187 // and `tests/test-issue6528.t`.
188 188 //
189 189 // Since copy metadata is currently the only kind of metadata
190 190 // kept in revlog data of filelogs,
191 191 // this `FilelogEntry` does not have such metadata:
192 192 let file_data_len = uncompressed_len;
193 193
194 194 file_data_len != other_len
195 195 }
196 196
197 197 pub fn data(&self) -> Result<FilelogRevisionData, HgError> {
198 198 let data = self.0.data();
199 199 match data {
200 200 Ok(data) => Ok(FilelogRevisionData(data.into_owned())),
201 201 // Errors other than `HgError` should not happen at this point
202 202 Err(e) => match e {
203 203 RevlogError::Other(hg_error) => Err(hg_error),
204 204 revlog_error => Err(HgError::abort(
205 205 revlog_error.to_string(),
206 206 exit_codes::ABORT,
207 207 None,
208 208 )),
209 209 },
210 210 }
211 211 }
212 212 }
213 213
214 214 /// The data for one revision in a filelog, uncompressed and delta-resolved.
215 215 pub struct FilelogRevisionData(Vec<u8>);
216 216
217 217 impl FilelogRevisionData {
218 218 /// Split into metadata and data
219 219 pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> {
220 const DELIMITER: &[u8; 2] = &[b'\x01', b'\n'];
220 const DELIMITER: &[u8; 2] = b"\x01\n";
221 221
222 222 if let Some(rest) = self.0.drop_prefix(DELIMITER) {
223 223 if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) {
224 224 Ok((Some(metadata), data))
225 225 } else {
226 226 Err(HgError::corrupted(
227 227 "Missing metadata end delimiter in filelog entry",
228 228 ))
229 229 }
230 230 } else {
231 231 Ok((None, &self.0))
232 232 }
233 233 }
234 234
235 235 /// Returns the file contents at this revision, stripped of any metadata
236 236 pub fn file_data(&self) -> Result<&[u8], HgError> {
237 237 let (_metadata, data) = self.split()?;
238 238 Ok(data)
239 239 }
240 240
241 241 /// Consume the entry, and convert it into data, discarding any metadata,
242 242 /// if present.
243 243 pub fn into_file_data(self) -> Result<Vec<u8>, HgError> {
244 244 if let (Some(_metadata), data) = self.split()? {
245 245 Ok(data.to_owned())
246 246 } else {
247 247 Ok(self.0)
248 248 }
249 249 }
250 250 }
@@ -1,1351 +1,1350
1 1 //! A layer of lower-level revlog functionality to encapsulate most of the
2 2 //! IO work and expensive operations.
3 3 use std::{
4 4 borrow::Cow,
5 5 cell::RefCell,
6 6 io::{ErrorKind, Seek, SeekFrom, Write},
7 7 ops::Deref,
8 8 path::PathBuf,
9 9 sync::{Arc, Mutex},
10 10 };
11 11
12 12 use schnellru::{ByMemoryUsage, LruMap};
13 13 use sha1::{Digest, Sha1};
14 14
15 15 use crate::{
16 16 errors::{HgError, IoResultExt},
17 17 exit_codes,
18 18 transaction::Transaction,
19 19 vfs::Vfs,
20 20 };
21 21
22 22 use super::{
23 23 compression::{
24 24 uncompressed_zstd_data, CompressionConfig, Compressor, NoneCompressor,
25 25 ZlibCompressor, ZstdCompressor, ZLIB_BYTE, ZSTD_BYTE,
26 26 },
27 27 file_io::{DelayedBuffer, FileHandle, RandomAccessFile, WriteHandles},
28 28 index::{Index, IndexHeader, INDEX_ENTRY_SIZE},
29 29 node::{NODE_BYTES_LENGTH, NULL_NODE},
30 30 options::{RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig},
31 31 BaseRevision, Node, Revision, RevlogEntry, RevlogError, RevlogIndex,
32 32 UncheckedRevision, NULL_REVISION, NULL_REVLOG_ENTRY_FLAGS,
33 33 };
34 34
35 35 /// Matches the `_InnerRevlog` class in the Python code, as an arbitrary
36 36 /// boundary to incrementally rewrite higher-level revlog functionality in
37 37 /// Rust.
38 38 pub struct InnerRevlog {
39 39 /// When index and data are not interleaved: bytes of the revlog index.
40 40 /// When index and data are interleaved (inline revlog): bytes of the
41 41 /// revlog index and data.
42 42 pub index: Index,
43 43 /// The store vfs that is used to interact with the filesystem
44 44 vfs: Box<dyn Vfs>,
45 45 /// The index file path, relative to the vfs root
46 46 pub index_file: PathBuf,
47 47 /// The data file path, relative to the vfs root (same as `index_file`
48 48 /// if inline)
49 49 data_file: PathBuf,
50 50 /// Data config that applies to this revlog
51 51 data_config: RevlogDataConfig,
52 52 /// Delta config that applies to this revlog
53 53 delta_config: RevlogDeltaConfig,
54 54 /// Feature config that applies to this revlog
55 55 feature_config: RevlogFeatureConfig,
56 56 /// A view into this revlog's data file
57 57 segment_file: RandomAccessFile,
58 58 /// A cache of uncompressed chunks that have previously been restored.
59 59 /// Its eviction policy is defined in [`Self::new`].
60 60 uncompressed_chunk_cache: Option<UncompressedChunkCache>,
61 61 /// Used to keep track of the actual target during diverted writes
62 62 /// for the changelog
63 63 original_index_file: Option<PathBuf>,
64 64 /// Write handles to the index and data files
65 65 /// XXX why duplicate from `index` and `segment_file`?
66 66 writing_handles: Option<WriteHandles>,
67 67 /// See [`DelayedBuffer`].
68 68 delayed_buffer: Option<Arc<Mutex<DelayedBuffer>>>,
69 69 /// Whether this revlog is inline. XXX why duplicate from `index`?
70 70 pub inline: bool,
71 71 /// A cache of the last revision, which is usually accessed multiple
72 72 /// times.
73 73 pub last_revision_cache: Mutex<Option<SingleRevisionCache>>,
74 74 }
75 75
76 76 impl InnerRevlog {
77 77 pub fn new(
78 78 vfs: Box<dyn Vfs>,
79 79 index: Index,
80 80 index_file: PathBuf,
81 81 data_file: PathBuf,
82 82 data_config: RevlogDataConfig,
83 83 delta_config: RevlogDeltaConfig,
84 84 feature_config: RevlogFeatureConfig,
85 85 ) -> Self {
86 86 assert!(index_file.is_relative());
87 87 assert!(data_file.is_relative());
88 88 let segment_file = RandomAccessFile::new(
89 89 dyn_clone::clone_box(&*vfs),
90 90 if index.is_inline() {
91 91 index_file.to_owned()
92 92 } else {
93 93 data_file.to_owned()
94 94 },
95 95 );
96 96
97 97 let uncompressed_chunk_cache =
98 98 data_config.uncompressed_cache_factor.map(
99 99 // Arbitrary initial value
100 100 // TODO check if using a hasher specific to integers is useful
101 101 |_factor| RefCell::new(LruMap::with_memory_budget(65536)),
102 102 );
103 103
104 104 let inline = index.is_inline();
105 105 Self {
106 106 index,
107 107 vfs,
108 108 index_file,
109 109 data_file,
110 110 data_config,
111 111 delta_config,
112 112 feature_config,
113 113 segment_file,
114 114 uncompressed_chunk_cache,
115 115 original_index_file: None,
116 116 writing_handles: None,
117 117 delayed_buffer: None,
118 118 inline,
119 119 last_revision_cache: Mutex::new(None),
120 120 }
121 121 }
122 122
123 123 /// Return number of entries of the revlog index
124 124 pub fn len(&self) -> usize {
125 125 self.index.len()
126 126 }
127 127
128 128 /// Return `true` if this revlog has no entries
129 129 pub fn is_empty(&self) -> bool {
130 130 self.len() == 0
131 131 }
132 132
133 133 /// Return whether this revlog is inline (mixed index and data)
134 134 pub fn is_inline(&self) -> bool {
135 135 self.inline
136 136 }
137 137
138 138 /// Clear all caches from this revlog
139 139 pub fn clear_cache(&mut self) {
140 140 assert!(!self.is_delaying());
141 141 if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
142 142 // We don't clear the allocation here because it's probably faster.
143 143 // We could change our minds later if this ends up being a problem
144 144 // with regards to memory consumption.
145 145 cache.borrow_mut().clear();
146 146 }
147 147 }
148 148
149 149 /// Return an entry for the null revision
150 150 pub fn make_null_entry(&self) -> RevlogEntry {
151 151 RevlogEntry {
152 152 revlog: self,
153 153 rev: NULL_REVISION,
154 154 uncompressed_len: 0,
155 155 p1: NULL_REVISION,
156 156 p2: NULL_REVISION,
157 157 flags: NULL_REVLOG_ENTRY_FLAGS,
158 158 hash: NULL_NODE,
159 159 }
160 160 }
161 161
162 162 /// Return the [`RevlogEntry`] for a [`Revision`] that is known to exist
163 163 pub fn get_entry(
164 164 &self,
165 165 rev: Revision,
166 166 ) -> Result<RevlogEntry, RevlogError> {
167 167 if rev == NULL_REVISION {
168 168 return Ok(self.make_null_entry());
169 169 }
170 170 let index_entry = self
171 171 .index
172 172 .get_entry(rev)
173 173 .ok_or_else(|| RevlogError::InvalidRevision(rev.to_string()))?;
174 174 let p1 =
175 175 self.index.check_revision(index_entry.p1()).ok_or_else(|| {
176 176 RevlogError::corrupted(format!(
177 177 "p1 for rev {} is invalid",
178 178 rev
179 179 ))
180 180 })?;
181 181 let p2 =
182 182 self.index.check_revision(index_entry.p2()).ok_or_else(|| {
183 183 RevlogError::corrupted(format!(
184 184 "p2 for rev {} is invalid",
185 185 rev
186 186 ))
187 187 })?;
188 188 let entry = RevlogEntry {
189 189 revlog: self,
190 190 rev,
191 191 uncompressed_len: index_entry.uncompressed_len(),
192 192 p1,
193 193 p2,
194 194 flags: index_entry.flags(),
195 195 hash: *index_entry.hash(),
196 196 };
197 197 Ok(entry)
198 198 }
199 199
200 200 /// Return the [`RevlogEntry`] for `rev`. If `rev` fails to check, this
201 201 /// returns a [`RevlogError`].
202 202 pub fn get_entry_for_unchecked_rev(
203 203 &self,
204 204 rev: UncheckedRevision,
205 205 ) -> Result<RevlogEntry, RevlogError> {
206 206 if rev == NULL_REVISION.into() {
207 207 return Ok(self.make_null_entry());
208 208 }
209 209 let rev = self.index.check_revision(rev).ok_or_else(|| {
210 210 RevlogError::corrupted(format!("rev {} is invalid", rev))
211 211 })?;
212 212 self.get_entry(rev)
213 213 }
214 214
215 215 /// Is the revlog currently delaying the visibility of written data?
216 216 ///
217 217 /// The delaying mechanism can be either in-memory or written on disk in a
218 218 /// side-file.
219 219 pub fn is_delaying(&self) -> bool {
220 220 self.delayed_buffer.is_some() || self.original_index_file.is_some()
221 221 }
222 222
223 223 /// The offset of the data chunk for this revision
224 224 #[inline(always)]
225 225 pub fn start(&self, rev: Revision) -> usize {
226 226 self.index.start(
227 227 rev,
228 228 &self
229 229 .index
230 230 .get_entry(rev)
231 231 .unwrap_or_else(|| self.index.make_null_entry()),
232 232 )
233 233 }
234 234
235 235 /// The length of the data chunk for this revision
236 236 /// TODO rename this method and others to more explicit names than the
237 237 /// existing ones that were copied over from Python
238 238 #[inline(always)]
239 239 pub fn length(&self, rev: Revision) -> usize {
240 240 self.index
241 241 .get_entry(rev)
242 242 .unwrap_or_else(|| self.index.make_null_entry())
243 243 .compressed_len() as usize
244 244 }
245 245
246 246 /// The end of the data chunk for this revision
247 247 #[inline(always)]
248 248 pub fn end(&self, rev: Revision) -> usize {
249 249 self.start(rev) + self.length(rev)
250 250 }
251 251
252 252 /// Return the delta parent of the given revision
253 253 pub fn delta_parent(&self, rev: Revision) -> Revision {
254 254 let base = self
255 255 .index
256 256 .get_entry(rev)
257 257 .unwrap()
258 258 .base_revision_or_base_of_delta_chain();
259 259 if base.0 == rev.0 {
260 260 NULL_REVISION
261 261 } else if self.delta_config.general_delta {
262 262 Revision(base.0)
263 263 } else {
264 264 Revision(rev.0 - 1)
265 265 }
266 266 }
267 267
268 268 /// Return whether `rev` points to a snapshot revision (i.e. does not have
269 269 /// a delta base).
270 270 pub fn is_snapshot(&self, rev: Revision) -> Result<bool, RevlogError> {
271 271 if !self.delta_config.sparse_revlog {
272 272 return Ok(self.delta_parent(rev) == NULL_REVISION);
273 273 }
274 274 self.index.is_snapshot_unchecked(rev)
275 275 }
276 276
277 277 /// Return the delta chain for `rev` according to this revlog's config.
278 278 /// See [`Index::delta_chain`] for more information.
279 279 pub fn delta_chain(
280 280 &self,
281 281 rev: Revision,
282 282 stop_rev: Option<Revision>,
283 283 ) -> Result<(Vec<Revision>, bool), HgError> {
284 284 self.index.delta_chain(
285 285 rev,
286 286 stop_rev,
287 287 self.delta_config.general_delta.into(),
288 288 )
289 289 }
290 290
291 291 fn compressor(&self) -> Result<Box<dyn Compressor>, HgError> {
292 292 // TODO cache the compressor?
293 293 Ok(match self.feature_config.compression_engine {
294 294 CompressionConfig::Zlib { level } => {
295 295 Box::new(ZlibCompressor::new(level))
296 296 }
297 297 CompressionConfig::Zstd { level, threads } => {
298 298 Box::new(ZstdCompressor::new(level, threads))
299 299 }
300 300 CompressionConfig::None => Box::new(NoneCompressor),
301 301 })
302 302 }
303 303
304 304 /// Generate a possibly-compressed representation of data.
305 305 /// Returns `None` if the data was not compressed.
306 306 pub fn compress<'data>(
307 307 &self,
308 308 data: &'data [u8],
309 309 ) -> Result<Option<Cow<'data, [u8]>>, RevlogError> {
310 310 if data.is_empty() {
311 311 return Ok(Some(data.into()));
312 312 }
313 313 let res = self.compressor()?.compress(data)?;
314 314 if let Some(compressed) = res {
315 315 // The revlog compressor added the header in the returned data.
316 316 return Ok(Some(compressed.into()));
317 317 }
318 318
319 319 if data[0] == b'\0' {
320 320 return Ok(Some(data.into()));
321 321 }
322 322 Ok(None)
323 323 }
324 324
325 325 /// Decompress a revlog chunk.
326 326 ///
327 327 /// The chunk is expected to begin with a header identifying the
328 328 /// format type so it can be routed to an appropriate decompressor.
329 329 pub fn decompress<'a>(
330 330 &'a self,
331 331 data: &'a [u8],
332 332 ) -> Result<Cow<[u8]>, RevlogError> {
333 333 if data.is_empty() {
334 334 return Ok(data.into());
335 335 }
336 336
337 337 // Revlogs are read much more frequently than they are written and many
338 338 // chunks only take microseconds to decompress, so performance is
339 339 // important here.
340 340
341 341 let header = data[0];
342 342 match header {
343 343 // Settings don't matter as they only affect compression
344 344 ZLIB_BYTE => Ok(ZlibCompressor::new(0).decompress(data)?.into()),
345 345 // Settings don't matter as they only affect compression
346 346 ZSTD_BYTE => {
347 347 Ok(ZstdCompressor::new(0, 0).decompress(data)?.into())
348 348 }
349 349 b'\0' => Ok(data.into()),
350 350 b'u' => Ok((&data[1..]).into()),
351 351 other => Err(HgError::UnsupportedFeature(format!(
352 352 "unknown compression header '{}'",
353 353 other
354 354 ))
355 355 .into()),
356 356 }
357 357 }
358 358
359 359 /// Obtain a segment of raw data corresponding to a range of revisions.
360 360 ///
361 361 /// Requests for data may be satisfied by a cache.
362 362 ///
363 363 /// Returns a 2-tuple of (offset, data) for the requested range of
364 364 /// revisions. Offset is the integer offset from the beginning of the
365 365 /// revlog and data is a slice of the raw byte data.
366 366 ///
367 367 /// Callers will need to call `self.start(rev)` and `self.length(rev)`
368 368 /// to determine where each revision's data begins and ends.
369 369 pub fn get_segment_for_revs(
370 370 &self,
371 371 start_rev: Revision,
372 372 end_rev: Revision,
373 373 ) -> Result<(usize, Vec<u8>), HgError> {
374 374 let start = if start_rev == NULL_REVISION {
375 375 0
376 376 } else {
377 377 let start_entry = self
378 378 .index
379 379 .get_entry(start_rev)
380 380 .expect("null revision segment");
381 381 self.index.start(start_rev, &start_entry)
382 382 };
383 383 let end_entry = self
384 384 .index
385 385 .get_entry(end_rev)
386 386 .expect("null revision segment");
387 387 let end = self.index.start(end_rev, &end_entry) + self.length(end_rev);
388 388
389 389 let length = end - start;
390 390
391 391 // XXX should we use mmap instead of doing this for platforms that
392 392 // support madvise/populate?
393 393 Ok((start, self.segment_file.read_chunk(start, length)?))
394 394 }
395 395
396 396 /// Return the uncompressed raw data for `rev`
397 397 pub fn chunk_for_rev(&self, rev: Revision) -> Result<Arc<[u8]>, HgError> {
398 398 if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
399 399 if let Some(chunk) = cache.borrow_mut().get(&rev) {
400 400 return Ok(chunk.clone());
401 401 }
402 402 }
403 403 // TODO revlogv2 should check the compression mode
404 404 let data = self.get_segment_for_revs(rev, rev)?.1;
405 405 let uncompressed = self.decompress(&data).map_err(|e| {
406 406 HgError::abort(
407 407 format!("revlog decompression error: {}", e),
408 408 exit_codes::ABORT,
409 409 None,
410 410 )
411 411 })?;
412 412 let uncompressed: Arc<[u8]> = Arc::from(uncompressed.into_owned());
413 413 if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
414 414 cache.borrow_mut().insert(rev, uncompressed.clone());
415 415 }
416 416 Ok(uncompressed)
417 417 }
418 418
419 419 /// Execute `func` within a read context for the data file, meaning that
420 420 /// the read handle will be taken and discarded after the operation.
421 421 pub fn with_read<R>(
422 422 &self,
423 423 func: impl FnOnce() -> Result<R, RevlogError>,
424 424 ) -> Result<R, RevlogError> {
425 425 self.enter_reading_context()?;
426 426 let res = func();
427 427 self.exit_reading_context();
428 428 res.map_err(Into::into)
429 429 }
430 430
431 431 /// `pub` only for use in hg-cpython
432 432 #[doc(hidden)]
433 433 pub fn enter_reading_context(&self) -> Result<(), HgError> {
434 434 if self.is_empty() {
435 435 // Nothing to be read
436 436 return Ok(());
437 437 }
438 438 if self.delayed_buffer.is_some() && self.is_inline() {
439 439 return Err(HgError::abort(
440 440 "revlog with delayed write should not be inline",
441 441 exit_codes::ABORT,
442 442 None,
443 443 ));
444 444 }
445 445 self.segment_file.get_read_handle()?;
446 446 Ok(())
447 447 }
448 448
449 449 /// `pub` only for use in hg-cpython
450 450 #[doc(hidden)]
451 451 pub fn exit_reading_context(&self) {
452 452 self.segment_file.exit_reading_context()
453 453 }
454 454
455 455 /// Fill the buffer returned by `get_buffer` with the possibly un-validated
456 456 /// raw text for a revision. It can be already validated if it comes
457 457 /// from the cache.
458 458 pub fn raw_text<G, T>(
459 459 &self,
460 460 rev: Revision,
461 461 get_buffer: G,
462 462 ) -> Result<(), RevlogError>
463 463 where
464 464 G: FnOnce(
465 465 usize,
466 466 &mut dyn FnMut(
467 467 &mut dyn RevisionBuffer<Target = T>,
468 468 ) -> Result<(), RevlogError>,
469 469 ) -> Result<(), RevlogError>,
470 470 {
471 471 let entry = &self.get_entry(rev)?;
472 472 let raw_size = entry.uncompressed_len();
473 473 let mut mutex_guard = self
474 474 .last_revision_cache
475 475 .lock()
476 476 .expect("lock should not be held");
477 477 let cached_rev = if let Some((_node, rev, data)) = &*mutex_guard {
478 478 Some((*rev, data.deref().as_ref()))
479 479 } else {
480 480 None
481 481 };
482 482 if let Some(cache) = &self.uncompressed_chunk_cache {
483 483 let cache = &mut cache.borrow_mut();
484 484 if let Some(size) = raw_size {
485 485 // Dynamically update the uncompressed_chunk_cache size to the
486 486 // largest revision we've seen in this revlog.
487 487 // Do it *before* restoration in case the current revision
488 488 // is the largest.
489 489 let factor = self
490 490 .data_config
491 491 .uncompressed_cache_factor
492 492 .expect("cache should not exist without factor");
493 493 let candidate_size = (size as f64 * factor) as usize;
494 494 let limiter_mut = cache.limiter_mut();
495 495 if candidate_size > limiter_mut.max_memory_usage() {
496 496 std::mem::swap(
497 497 limiter_mut,
498 498 &mut ByMemoryUsage::new(candidate_size),
499 499 );
500 500 }
501 501 }
502 502 }
503 503 entry.rawdata(cached_rev, get_buffer)?;
504 504 // drop cache to save memory, the caller is expected to update
505 505 // the revision cache after validating the text
506 506 mutex_guard.take();
507 507 Ok(())
508 508 }
509 509
510 510 /// Only `pub` for `hg-cpython`.
511 511 /// Obtain decompressed raw data for the specified revisions that are
512 512 /// assumed to be in ascending order.
513 513 ///
514 514 /// Returns a list with decompressed data for each requested revision.
515 515 #[doc(hidden)]
516 516 pub fn chunks(
517 517 &self,
518 518 revs: Vec<Revision>,
519 519 target_size: Option<u64>,
520 520 ) -> Result<Vec<Arc<[u8]>>, RevlogError> {
521 521 if revs.is_empty() {
522 522 return Ok(vec![]);
523 523 }
524 524 let mut fetched_revs = vec![];
525 525 let mut chunks = Vec::with_capacity(revs.len());
526 526
527 527 match self.uncompressed_chunk_cache.as_ref() {
528 528 Some(cache) => {
529 529 let mut cache = cache.borrow_mut();
530 530 for rev in revs.iter() {
531 531 match cache.get(rev) {
532 532 Some(hit) => chunks.push((*rev, hit.to_owned())),
533 533 None => fetched_revs.push(*rev),
534 534 }
535 535 }
536 536 }
537 537 None => fetched_revs = revs,
538 538 }
539 539
540 540 let already_cached = chunks.len();
541 541
542 542 let sliced_chunks = if fetched_revs.is_empty() {
543 543 vec![]
544 544 } else if !self.data_config.with_sparse_read || self.is_inline() {
545 545 vec![fetched_revs]
546 546 } else {
547 547 self.slice_chunk(&fetched_revs, target_size)?
548 548 };
549 549
550 550 self.with_read(|| {
551 551 for revs_chunk in sliced_chunks {
552 552 let first_rev = revs_chunk[0];
553 553 // Skip trailing revisions with empty diff
554 554 let last_rev_idx = revs_chunk
555 555 .iter()
556 556 .rposition(|r| self.length(*r) != 0)
557 557 .unwrap_or(revs_chunk.len() - 1);
558 558
559 559 let last_rev = revs_chunk[last_rev_idx];
560 560
561 561 let (offset, data) =
562 562 self.get_segment_for_revs(first_rev, last_rev)?;
563 563
564 564 let revs_chunk = &revs_chunk[..=last_rev_idx];
565 565
566 566 for rev in revs_chunk {
567 567 let chunk_start = self.start(*rev);
568 568 let chunk_length = self.length(*rev);
569 569 // TODO revlogv2 should check the compression mode
570 570 let bytes = &data[chunk_start - offset..][..chunk_length];
571 571 let chunk = if !bytes.is_empty() && bytes[0] == ZSTD_BYTE {
572 572 // If we're using `zstd`, we want to try a more
573 573 // specialized decompression
574 574 let entry = self.index.get_entry(*rev).unwrap();
575 575 let is_delta = entry
576 576 .base_revision_or_base_of_delta_chain()
577 577 != (*rev).into();
578 578 let uncompressed = uncompressed_zstd_data(
579 579 bytes,
580 580 is_delta,
581 581 entry.uncompressed_len(),
582 582 )?;
583 583 Cow::Owned(uncompressed)
584 584 } else {
585 585 // Otherwise just fallback to generic decompression.
586 586 self.decompress(bytes)?
587 587 };
588 588
589 589 chunks.push((*rev, chunk.into()));
590 590 }
591 591 }
592 592 Ok(())
593 593 })?;
594 594
595 595 if let Some(cache) = self.uncompressed_chunk_cache.as_ref() {
596 596 let mut cache = cache.borrow_mut();
597 597 for (rev, chunk) in chunks.iter().skip(already_cached) {
598 598 cache.insert(*rev, chunk.clone());
599 599 }
600 600 }
601 601 // Use stable sort here since it's *mostly* sorted
602 602 chunks.sort_by(|a, b| a.0.cmp(&b.0));
603 603 Ok(chunks.into_iter().map(|(_r, chunk)| chunk).collect())
604 604 }
605 605
606 606 /// Slice revs to reduce the amount of unrelated data to be read from disk.
607 607 ///
608 608 /// ``revs`` is sliced into groups that should be read in one time.
609 609 /// Assume that revs are sorted.
610 610 ///
611 611 /// The initial chunk is sliced until the overall density
612 612 /// (payload/chunks-span ratio) is above
613 613 /// `revlog.data_config.sr_density_threshold`.
614 614 /// No gap smaller than `revlog.data_config.sr_min_gap_size` is skipped.
615 615 ///
616 616 /// If `target_size` is set, no chunk larger than `target_size`
617 617 /// will be returned.
618 618 /// For consistency with other slicing choices, this limit won't go lower
619 619 /// than `revlog.data_config.sr_min_gap_size`.
620 620 ///
621 621 /// If individual revision chunks are larger than this limit, they will
622 622 /// still be raised individually.
623 623 pub fn slice_chunk(
624 624 &self,
625 625 revs: &[Revision],
626 626 target_size: Option<u64>,
627 627 ) -> Result<Vec<Vec<Revision>>, RevlogError> {
628 628 let target_size =
629 629 target_size.map(|size| size.max(self.data_config.sr_min_gap_size));
630 630
631 631 let target_density = self.data_config.sr_density_threshold;
632 632 let min_gap_size = self.data_config.sr_min_gap_size as usize;
633 633 let to_density = self.index.slice_chunk_to_density(
634 634 revs,
635 635 target_density,
636 636 min_gap_size,
637 637 );
638 638
639 639 let mut sliced = vec![];
640 640
641 641 for chunk in to_density {
642 642 sliced.extend(
643 643 self.slice_chunk_to_size(&chunk, target_size)?
644 644 .into_iter()
645 645 .map(ToOwned::to_owned),
646 646 );
647 647 }
648 648
649 649 Ok(sliced)
650 650 }
651 651
652 652 /// Slice revs to match the target size
653 653 ///
654 654 /// This is intended to be used on chunks that density slicing selected,
655 655 /// but that are still too large compared to the read guarantee of revlogs.
656 656 /// This might happen when the "minimal gap size" interrupted the slicing
657 657 /// or when chains are built in a way that create large blocks next to
658 658 /// each other.
659 659 fn slice_chunk_to_size<'a>(
660 660 &self,
661 661 revs: &'a [Revision],
662 662 target_size: Option<u64>,
663 663 ) -> Result<Vec<&'a [Revision]>, RevlogError> {
664 664 let mut start_data = self.start(revs[0]);
665 665 let end_data = self.end(revs[revs.len() - 1]);
666 666 let full_span = end_data - start_data;
667 667
668 668 let nothing_to_do = target_size
669 669 .map(|size| full_span <= size as usize)
670 670 .unwrap_or(true);
671 671
672 672 if nothing_to_do {
673 673 return Ok(vec![revs]);
674 674 }
675 675 let target_size = target_size.expect("target_size is set") as usize;
676 676
677 677 let mut start_rev_idx = 0;
678 678 let mut end_rev_idx = 1;
679 679 let mut chunks = vec![];
680 680
681 681 for (idx, rev) in revs.iter().enumerate().skip(1) {
682 682 let span = self.end(*rev) - start_data;
683 683 let is_snapshot = self.is_snapshot(*rev)?;
684 684 if span <= target_size && is_snapshot {
685 685 end_rev_idx = idx + 1;
686 686 } else {
687 687 let chunk =
688 688 self.trim_chunk(revs, start_rev_idx, Some(end_rev_idx));
689 689 if !chunk.is_empty() {
690 690 chunks.push(chunk);
691 691 }
692 692 start_rev_idx = idx;
693 693 start_data = self.start(*rev);
694 694 end_rev_idx = idx + 1;
695 695 }
696 696 if !is_snapshot {
697 697 break;
698 698 }
699 699 }
700 700
701 701 // For the others, we use binary slicing to quickly converge towards
702 702 // valid chunks (otherwise, we might end up looking for the start/end
703 703 // of many revisions). This logic is not looking for the perfect
704 704 // slicing point, it quickly converges towards valid chunks.
705 705 let number_of_items = revs.len();
706 706
707 707 while (end_data - start_data) > target_size {
708 708 end_rev_idx = number_of_items;
709 709 if number_of_items - start_rev_idx <= 1 {
710 710 // Protect against individual chunks larger than the limit
711 711 break;
712 712 }
713 713 let mut local_end_data = self.end(revs[end_rev_idx - 1]);
714 714 let mut span = local_end_data - start_data;
715 715 while span > target_size {
716 716 if end_rev_idx - start_rev_idx <= 1 {
717 717 // Protect against individual chunks larger than the limit
718 718 break;
719 719 }
720 720 end_rev_idx -= (end_rev_idx - start_rev_idx) / 2;
721 721 local_end_data = self.end(revs[end_rev_idx - 1]);
722 722 span = local_end_data - start_data;
723 723 }
724 724 let chunk =
725 725 self.trim_chunk(revs, start_rev_idx, Some(end_rev_idx));
726 726 if !chunk.is_empty() {
727 727 chunks.push(chunk);
728 728 }
729 729 start_rev_idx = end_rev_idx;
730 730 start_data = self.start(revs[start_rev_idx]);
731 731 }
732 732
733 733 let chunk = self.trim_chunk(revs, start_rev_idx, None);
734 734 if !chunk.is_empty() {
735 735 chunks.push(chunk);
736 736 }
737 737
738 738 Ok(chunks)
739 739 }
740 740
741 741 /// Returns `revs[startidx..endidx]` without empty trailing revs
742 742 fn trim_chunk<'a>(
743 743 &self,
744 744 revs: &'a [Revision],
745 745 start_rev_idx: usize,
746 746 end_rev_idx: Option<usize>,
747 747 ) -> &'a [Revision] {
748 748 let mut end_rev_idx = end_rev_idx.unwrap_or(revs.len());
749 749
750 750 // If we have a non-empty delta candidate, there is nothing to trim
751 751 if revs[end_rev_idx - 1].0 < self.len() as BaseRevision {
752 752 // Trim empty revs at the end, except the very first rev of a chain
753 753 while end_rev_idx > 1
754 754 && end_rev_idx > start_rev_idx
755 755 && self.length(revs[end_rev_idx - 1]) == 0
756 756 {
757 757 end_rev_idx -= 1
758 758 }
759 759 }
760 760
761 761 &revs[start_rev_idx..end_rev_idx]
762 762 }
763 763
764 764 /// Check the hash of some given data against the recorded hash.
765 765 pub fn check_hash(
766 766 &self,
767 767 p1: Revision,
768 768 p2: Revision,
769 769 expected: &[u8],
770 770 data: &[u8],
771 771 ) -> bool {
772 772 let e1 = self.index.get_entry(p1);
773 773 let h1 = match e1 {
774 774 Some(ref entry) => entry.hash(),
775 775 None => &NULL_NODE,
776 776 };
777 777 let e2 = self.index.get_entry(p2);
778 778 let h2 = match e2 {
779 779 Some(ref entry) => entry.hash(),
780 780 None => &NULL_NODE,
781 781 };
782 782
783 783 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
784 784 }
785 785
786 786 /// Returns whether we are currently in a [`Self::with_write`] context
787 787 pub fn is_writing(&self) -> bool {
788 788 self.writing_handles.is_some()
789 789 }
790 790
791 791 /// Open the revlog files for writing
792 792 ///
793 793 /// Adding content to a revlog should be done within this context.
794 794 /// TODO try using `BufRead` and `BufWrite` and see if performance improves
795 795 pub fn with_write<R>(
796 796 &mut self,
797 797 transaction: &mut impl Transaction,
798 798 data_end: Option<usize>,
799 799 func: impl FnOnce() -> R,
800 800 ) -> Result<R, HgError> {
801 801 if self.is_writing() {
802 802 return Ok(func());
803 803 }
804 804 self.enter_writing_context(data_end, transaction)
805 .map_err(|e| {
805 .inspect_err(|_| {
806 806 self.exit_writing_context();
807 e
808 807 })?;
809 808 let res = func();
810 809 self.exit_writing_context();
811 810 Ok(res)
812 811 }
813 812
814 813 /// `pub` only for use in hg-cpython
815 814 #[doc(hidden)]
816 815 pub fn exit_writing_context(&mut self) {
817 816 self.writing_handles.take();
818 817 self.segment_file.writing_handle.take();
819 818 self.segment_file.reading_handle.take();
820 819 }
821 820
822 821 /// `pub` only for use in hg-cpython
823 822 #[doc(hidden)]
824 823 pub fn python_writing_handles(&self) -> Option<&WriteHandles> {
825 824 self.writing_handles.as_ref()
826 825 }
827 826
828 827 /// `pub` only for use in hg-cpython
829 828 #[doc(hidden)]
830 829 pub fn enter_writing_context(
831 830 &mut self,
832 831 data_end: Option<usize>,
833 832 transaction: &mut impl Transaction,
834 833 ) -> Result<(), HgError> {
835 834 let data_size = if self.is_empty() {
836 835 0
837 836 } else {
838 837 self.end(Revision((self.len() - 1) as BaseRevision))
839 838 };
840 839 let data_handle = if !self.is_inline() {
841 840 let data_handle = match self.vfs.open(&self.data_file) {
842 841 Ok(mut f) => {
843 842 if let Some(end) = data_end {
844 843 f.seek(SeekFrom::Start(end as u64))
845 844 .when_reading_file(&self.data_file)?;
846 845 } else {
847 846 f.seek(SeekFrom::End(0))
848 847 .when_reading_file(&self.data_file)?;
849 848 }
850 849 f
851 850 }
852 851 Err(e) => match e {
853 852 HgError::IoError { error, context } => {
854 853 if error.kind() != ErrorKind::NotFound {
855 854 return Err(HgError::IoError { error, context });
856 855 }
857 856 self.vfs.create(&self.data_file, true)?
858 857 }
859 858 e => return Err(e),
860 859 },
861 860 };
862 861 transaction.add(&self.data_file, data_size);
863 862 Some(FileHandle::from_file(
864 863 data_handle,
865 864 dyn_clone::clone_box(&*self.vfs),
866 865 &self.data_file,
867 866 ))
868 867 } else {
869 868 None
870 869 };
871 870 let index_size = self.len() * INDEX_ENTRY_SIZE;
872 871 let index_handle = self.index_write_handle()?;
873 872 if self.is_inline() {
874 873 transaction.add(&self.index_file, data_size);
875 874 } else {
876 875 transaction.add(&self.index_file, index_size);
877 876 }
878 877 self.writing_handles = Some(WriteHandles {
879 878 index_handle: index_handle.clone(),
880 879 data_handle: data_handle.clone(),
881 880 });
882 881 *self.segment_file.reading_handle.borrow_mut() = if self.is_inline() {
883 882 Some(index_handle)
884 883 } else {
885 884 data_handle
886 885 };
887 886 Ok(())
888 887 }
889 888
890 889 /// Get a write handle to the index, sought to the end of its data.
891 890 fn index_write_handle(&self) -> Result<FileHandle, HgError> {
892 891 let res = if self.delayed_buffer.is_none() {
893 892 if self.data_config.check_ambig {
894 893 self.vfs.open_check_ambig(&self.index_file)
895 894 } else {
896 895 self.vfs.open(&self.index_file)
897 896 }
898 897 } else {
899 898 self.vfs.open(&self.index_file)
900 899 };
901 900 match res {
902 901 Ok(mut handle) => {
903 902 handle
904 903 .seek(SeekFrom::End(0))
905 904 .when_reading_file(&self.index_file)?;
906 905 Ok(
907 906 if let Some(delayed_buffer) = self.delayed_buffer.as_ref()
908 907 {
909 908 FileHandle::from_file_delayed(
910 909 handle,
911 910 dyn_clone::clone_box(&*self.vfs),
912 911 &self.index_file,
913 912 delayed_buffer.clone(),
914 913 )?
915 914 } else {
916 915 FileHandle::from_file(
917 916 handle,
918 917 dyn_clone::clone_box(&*self.vfs),
919 918 &self.index_file,
920 919 )
921 920 },
922 921 )
923 922 }
924 923 Err(e) => match e {
925 924 HgError::IoError { error, context } => {
926 925 if error.kind() != ErrorKind::NotFound {
927 926 return Err(HgError::IoError { error, context });
928 927 };
929 928 if let Some(delayed_buffer) = self.delayed_buffer.as_ref()
930 929 {
931 930 FileHandle::new_delayed(
932 931 dyn_clone::clone_box(&*self.vfs),
933 932 &self.index_file,
934 933 true,
935 934 delayed_buffer.clone(),
936 935 )
937 936 } else {
938 937 FileHandle::new(
939 938 dyn_clone::clone_box(&*self.vfs),
940 939 &self.index_file,
941 940 true,
942 941 true,
943 942 )
944 943 }
945 944 }
946 945 e => Err(e),
947 946 },
948 947 }
949 948 }
950 949
951 950 /// Split the data of an inline revlog into an index and a data file
952 951 pub fn split_inline(
953 952 &mut self,
954 953 header: IndexHeader,
955 954 new_index_file_path: Option<PathBuf>,
956 955 ) -> Result<PathBuf, RevlogError> {
957 956 assert!(self.delayed_buffer.is_none());
958 957 let existing_handles = self.writing_handles.is_some();
959 958 if let Some(handles) = &mut self.writing_handles {
960 959 handles.index_handle.flush()?;
961 960 self.writing_handles.take();
962 961 self.segment_file.writing_handle.take();
963 962 }
964 963 let mut new_data_file_handle =
965 964 self.vfs.create(&self.data_file, true)?;
966 965 // Drop any potential data, possibly redundant with the VFS impl.
967 966 new_data_file_handle
968 967 .set_len(0)
969 968 .when_writing_file(&self.data_file)?;
970 969
971 970 self.with_read(|| -> Result<(), RevlogError> {
972 971 for r in 0..self.index.len() {
973 972 let rev = Revision(r as BaseRevision);
974 973 let rev_segment = self.get_segment_for_revs(rev, rev)?.1;
975 974 new_data_file_handle
976 975 .write_all(&rev_segment)
977 976 .when_writing_file(&self.data_file)?;
978 977 }
979 978 new_data_file_handle
980 979 .flush()
981 980 .when_writing_file(&self.data_file)?;
982 981 Ok(())
983 982 })?;
984 983
985 984 if let Some(index_path) = new_index_file_path {
986 985 self.index_file = index_path
987 986 }
988 987
989 988 let mut new_index_handle = self.vfs.create(&self.index_file, true)?;
990 989 let mut new_data = Vec::with_capacity(self.len() * INDEX_ENTRY_SIZE);
991 990 for r in 0..self.len() {
992 991 let rev = Revision(r as BaseRevision);
993 992 let entry = self.index.entry_binary(rev).unwrap_or_else(|| {
994 993 panic!(
995 994 "entry {} should exist in {}",
996 995 r,
997 996 self.index_file.display()
998 997 )
999 998 });
1000 999 if r == 0 {
1001 1000 new_data.extend(header.header_bytes);
1002 1001 }
1003 1002 new_data.extend(entry);
1004 1003 }
1005 1004 new_index_handle
1006 1005 .write_all(&new_data)
1007 1006 .when_writing_file(&self.index_file)?;
1008 1007 // Replace the index with a new one because the buffer contains inline
1009 1008 // data
1010 1009 self.index = Index::new(Box::new(new_data), header)?;
1011 1010 self.inline = false;
1012 1011
1013 1012 self.segment_file = RandomAccessFile::new(
1014 1013 dyn_clone::clone_box(&*self.vfs),
1015 1014 self.data_file.to_owned(),
1016 1015 );
1017 1016 if existing_handles {
1018 1017 // Switched from inline to conventional, reopen the index
1019 1018 let new_data_handle = Some(FileHandle::from_file(
1020 1019 new_data_file_handle,
1021 1020 dyn_clone::clone_box(&*self.vfs),
1022 1021 &self.data_file,
1023 1022 ));
1024 1023 self.writing_handles = Some(WriteHandles {
1025 1024 index_handle: self.index_write_handle()?,
1026 1025 data_handle: new_data_handle.clone(),
1027 1026 });
1028 1027 *self.segment_file.writing_handle.borrow_mut() = new_data_handle;
1029 1028 }
1030 1029
1031 1030 Ok(self.index_file.to_owned())
1032 1031 }
1033 1032
1034 1033 /// Write a new entry to this revlog.
1035 1034 /// - `entry` is the index bytes
1036 1035 /// - `header_and_data` is the compression header and the revision data
1037 1036 /// - `offset` is the position in the data file to write to
1038 1037 /// - `index_end` is the overwritten position in the index in revlog-v2,
1039 1038 /// since the format may allow a rewrite of garbage data at the end.
1040 1039 /// - `data_end` is the overwritten position in the data-file in revlog-v2,
1041 1040 /// since the format may allow a rewrite of garbage data at the end.
1042 1041 ///
1043 1042 /// XXX Why do we have `data_end` *and* `offset`? Same question in Python
1044 1043 pub fn write_entry(
1045 1044 &mut self,
1046 1045 mut transaction: impl Transaction,
1047 1046 entry: &[u8],
1048 1047 header_and_data: (&[u8], &[u8]),
1049 1048 mut offset: usize,
1050 1049 index_end: Option<u64>,
1051 1050 data_end: Option<u64>,
1052 1051 ) -> Result<(u64, Option<u64>), HgError> {
1053 1052 let current_revision = self.len() - 1;
1054 1053 let canonical_index_file = self.canonical_index_file();
1055 1054
1056 1055 let is_inline = self.is_inline();
1057 1056 let handles = match &mut self.writing_handles {
1058 1057 None => {
1059 1058 return Err(HgError::abort(
1060 1059 "adding revision outside of the `with_write` context",
1061 1060 exit_codes::ABORT,
1062 1061 None,
1063 1062 ));
1064 1063 }
1065 1064 Some(handles) => handles,
1066 1065 };
1067 1066 let index_handle = &mut handles.index_handle;
1068 1067 let data_handle = &mut handles.data_handle;
1069 1068 if let Some(end) = index_end {
1070 1069 index_handle
1071 1070 .seek(SeekFrom::Start(end))
1072 1071 .when_reading_file(&self.index_file)?;
1073 1072 } else {
1074 1073 index_handle
1075 1074 .seek(SeekFrom::End(0))
1076 1075 .when_reading_file(&self.index_file)?;
1077 1076 }
1078 1077 if let Some(data_handle) = data_handle {
1079 1078 if let Some(end) = data_end {
1080 1079 data_handle
1081 1080 .seek(SeekFrom::Start(end))
1082 1081 .when_reading_file(&self.data_file)?;
1083 1082 } else {
1084 1083 data_handle
1085 1084 .seek(SeekFrom::End(0))
1086 1085 .when_reading_file(&self.data_file)?;
1087 1086 }
1088 1087 }
1089 1088 let (header, data) = header_and_data;
1090 1089
1091 1090 if !is_inline {
1092 1091 transaction.add(&self.data_file, offset);
1093 1092 transaction
1094 1093 .add(&canonical_index_file, current_revision * entry.len());
1095 1094 let data_handle = data_handle
1096 1095 .as_mut()
1097 1096 .expect("data handle should exist when not inline");
1098 1097 if !header.is_empty() {
1099 1098 data_handle.write_all(header)?;
1100 1099 }
1101 1100 data_handle.write_all(data)?;
1102 1101 match &mut self.delayed_buffer {
1103 1102 Some(buf) => {
1104 1103 buf.lock()
1105 1104 .expect("propagate the panic")
1106 1105 .buffer
1107 1106 .write_all(entry)
1108 1107 .expect("write to delay buffer should succeed");
1109 1108 }
1110 1109 None => index_handle.write_all(entry)?,
1111 1110 }
1112 1111 } else if self.delayed_buffer.is_some() {
1113 1112 return Err(HgError::abort(
1114 1113 "invalid delayed write on inline revlog",
1115 1114 exit_codes::ABORT,
1116 1115 None,
1117 1116 ));
1118 1117 } else {
1119 1118 offset += current_revision * entry.len();
1120 1119 transaction.add(&canonical_index_file, offset);
1121 1120 index_handle.write_all(entry)?;
1122 1121 index_handle.write_all(header)?;
1123 1122 index_handle.write_all(data)?;
1124 1123 }
1125 1124 let data_position = match data_handle {
1126 1125 Some(h) => Some(h.position()?),
1127 1126 None => None,
1128 1127 };
1129 1128 Ok((index_handle.position()?, data_position))
1130 1129 }
1131 1130
1132 1131 /// Return the real target index file and not the temporary when diverting
1133 1132 pub fn canonical_index_file(&self) -> PathBuf {
1134 1133 self.original_index_file
1135 1134 .as_ref()
1136 1135 .map(ToOwned::to_owned)
1137 1136 .unwrap_or_else(|| self.index_file.to_owned())
1138 1137 }
1139 1138
1140 1139 /// Return the path to the diverted index
1141 1140 fn diverted_index(&self) -> PathBuf {
1142 1141 self.index_file.with_extension("i.a")
1143 1142 }
1144 1143
1145 1144 /// True if we're in a [`Self::with_write`] or [`Self::with_read`] context
1146 1145 pub fn is_open(&self) -> bool {
1147 1146 self.segment_file.is_open()
1148 1147 }
1149 1148
1150 1149 /// Set this revlog to delay its writes to a buffer
1151 1150 pub fn delay(&mut self) -> Result<Option<PathBuf>, HgError> {
1152 1151 assert!(!self.is_open());
1153 1152 if self.is_inline() {
1154 1153 return Err(HgError::abort(
1155 1154 "revlog with delayed write should not be inline",
1156 1155 exit_codes::ABORT,
1157 1156 None,
1158 1157 ));
1159 1158 }
1160 1159 if self.delayed_buffer.is_some() || self.original_index_file.is_some()
1161 1160 {
1162 1161 // Delay or divert already happening
1163 1162 return Ok(None);
1164 1163 }
1165 1164 if self.is_empty() {
1166 1165 self.original_index_file = Some(self.index_file.to_owned());
1167 1166 self.index_file = self.diverted_index();
1168 1167 if self.vfs.exists(&self.index_file) {
1169 1168 self.vfs.unlink(&self.index_file)?;
1170 1169 }
1171 1170 Ok(Some(self.index_file.to_owned()))
1172 1171 } else {
1173 1172 self.delayed_buffer =
1174 1173 Some(Arc::new(Mutex::new(DelayedBuffer::default())));
1175 1174 Ok(None)
1176 1175 }
1177 1176 }
1178 1177
1179 1178 /// Write the pending data (in memory) if any to the diverted index file
1180 1179 /// (on disk temporary file)
1181 1180 pub fn write_pending(
1182 1181 &mut self,
1183 1182 ) -> Result<(Option<PathBuf>, bool), HgError> {
1184 1183 assert!(!self.is_open());
1185 1184 if self.is_inline() {
1186 1185 return Err(HgError::abort(
1187 1186 "revlog with delayed write should not be inline",
1188 1187 exit_codes::ABORT,
1189 1188 None,
1190 1189 ));
1191 1190 }
1192 1191 if self.original_index_file.is_some() {
1193 1192 return Ok((None, true));
1194 1193 }
1195 1194 let mut any_pending = false;
1196 1195 let pending_index_file = self.diverted_index();
1197 1196 if self.vfs.exists(&pending_index_file) {
1198 1197 self.vfs.unlink(&pending_index_file)?;
1199 1198 }
1200 1199 self.vfs.copy(&self.index_file, &pending_index_file)?;
1201 1200 if let Some(delayed_buffer) = self.delayed_buffer.take() {
1202 1201 let mut index_file_handle = self.vfs.open(&pending_index_file)?;
1203 1202 index_file_handle
1204 1203 .seek(SeekFrom::End(0))
1205 1204 .when_writing_file(&pending_index_file)?;
1206 1205 let delayed_data =
1207 1206 &delayed_buffer.lock().expect("propagate the panic").buffer;
1208 1207 index_file_handle
1209 1208 .write_all(delayed_data)
1210 1209 .when_writing_file(&pending_index_file)?;
1211 1210 any_pending = true;
1212 1211 }
1213 1212 self.original_index_file = Some(self.index_file.to_owned());
1214 1213 self.index_file = pending_index_file;
1215 1214 Ok((Some(self.index_file.to_owned()), any_pending))
1216 1215 }
1217 1216
1218 1217 /// Overwrite the canonical file with the diverted file, or write out the
1219 1218 /// delayed buffer.
1220 1219 /// Returns an error if the revlog is neither diverted nor delayed.
1221 1220 pub fn finalize_pending(&mut self) -> Result<PathBuf, HgError> {
1222 1221 assert!(!self.is_open());
1223 1222 if self.is_inline() {
1224 1223 return Err(HgError::abort(
1225 1224 "revlog with delayed write should not be inline",
1226 1225 exit_codes::ABORT,
1227 1226 None,
1228 1227 ));
1229 1228 }
1230 1229 match (
1231 1230 self.delayed_buffer.as_ref(),
1232 1231 self.original_index_file.as_ref(),
1233 1232 ) {
1234 1233 (None, None) => {
1235 1234 return Err(HgError::abort(
1236 1235 "neither delay nor divert found on this revlog",
1237 1236 exit_codes::ABORT,
1238 1237 None,
1239 1238 ));
1240 1239 }
1241 1240 (Some(delay), None) => {
1242 1241 let mut index_file_handle = self.vfs.open(&self.index_file)?;
1243 1242 index_file_handle
1244 1243 .seek(SeekFrom::End(0))
1245 1244 .when_writing_file(&self.index_file)?;
1246 1245 index_file_handle
1247 1246 .write_all(
1248 1247 &delay.lock().expect("propagate the panic").buffer,
1249 1248 )
1250 1249 .when_writing_file(&self.index_file)?;
1251 1250 self.delayed_buffer = None;
1252 1251 }
1253 1252 (None, Some(divert)) => {
1254 1253 if self.vfs.exists(&self.index_file) {
1255 1254 self.vfs.rename(&self.index_file, divert, true)?;
1256 1255 }
1257 1256 divert.clone_into(&mut self.index_file);
1258 1257 self.original_index_file = None;
1259 1258 }
1260 1259 (Some(_), Some(_)) => unreachable!(
1261 1260 "{} is in an inconsistent state of both delay and divert",
1262 1261 self.canonical_index_file().display(),
1263 1262 ),
1264 1263 }
1265 1264 Ok(self.canonical_index_file())
1266 1265 }
1267 1266
1268 1267 /// `pub` only for `hg-cpython`. This is made a different method than
1269 1268 /// [`Revlog::index`] in case there is a different invariant that pops up
1270 1269 /// later.
1271 1270 #[doc(hidden)]
1272 1271 pub fn shared_index(&self) -> &Index {
1273 1272 &self.index
1274 1273 }
1275 1274 }
1276 1275
1277 1276 /// The use of a [`Refcell`] assumes that a given revlog will only
1278 1277 /// be accessed (read or write) by a single thread.
1279 1278 type UncompressedChunkCache =
1280 1279 RefCell<LruMap<Revision, Arc<[u8]>, ByMemoryUsage>>;
1281 1280
1282 1281 /// The node, revision and data for the last revision we've seen. Speeds up
1283 1282 /// a lot of sequential operations of the revlog.
1284 1283 ///
1285 1284 /// The data is not just bytes since it can come from Python and we want to
1286 1285 /// avoid copies if possible.
1287 1286 type SingleRevisionCache =
1288 1287 (Node, Revision, Box<dyn Deref<Target = [u8]> + Send>);
1289 1288
1290 1289 /// A way of progressively filling a buffer with revision data, then return
1291 1290 /// that buffer. Used to abstract away Python-allocated code to reduce copying
1292 1291 /// for performance reasons.
1293 1292 pub trait RevisionBuffer {
1294 1293 /// The owned buffer type to return
1295 1294 type Target;
1296 1295 /// Copies the slice into the buffer
1297 1296 fn extend_from_slice(&mut self, slice: &[u8]);
1298 1297 /// Returns the now finished owned buffer
1299 1298 fn finish(self) -> Self::Target;
1300 1299 }
1301 1300
1302 1301 /// A simple vec-based buffer. This is uselessly complicated for the pure Rust
1303 1302 /// case, but it's the price to pay for Python compatibility.
1304 1303 #[derive(Debug)]
1305 1304 pub(super) struct CoreRevisionBuffer {
1306 1305 buf: Vec<u8>,
1307 1306 }
1308 1307
1309 1308 impl CoreRevisionBuffer {
1310 1309 pub fn new() -> Self {
1311 1310 Self { buf: vec![] }
1312 1311 }
1313 1312
1314 1313 #[inline]
1315 1314 pub fn resize(&mut self, size: usize) {
1316 1315 self.buf.reserve_exact(size - self.buf.capacity());
1317 1316 }
1318 1317 }
1319 1318
1320 1319 impl RevisionBuffer for CoreRevisionBuffer {
1321 1320 type Target = Vec<u8>;
1322 1321
1323 1322 #[inline]
1324 1323 fn extend_from_slice(&mut self, slice: &[u8]) {
1325 1324 self.buf.extend_from_slice(slice);
1326 1325 }
1327 1326
1328 1327 #[inline]
1329 1328 fn finish(self) -> Self::Target {
1330 1329 self.buf
1331 1330 }
1332 1331 }
1333 1332
1334 1333 /// Calculate the hash of a revision given its data and its parents.
1335 1334 pub fn hash(
1336 1335 data: &[u8],
1337 1336 p1_hash: &[u8],
1338 1337 p2_hash: &[u8],
1339 1338 ) -> [u8; NODE_BYTES_LENGTH] {
1340 1339 let mut hasher = Sha1::new();
1341 1340 let (a, b) = (p1_hash, p2_hash);
1342 1341 if a > b {
1343 1342 hasher.update(b);
1344 1343 hasher.update(a);
1345 1344 } else {
1346 1345 hasher.update(a);
1347 1346 hasher.update(b);
1348 1347 }
1349 1348 hasher.update(data);
1350 1349 *hasher.finalize().as_ref()
1351 1350 }
General Comments 0
You need to be logged in to leave comments. Login now