##// END OF EJS Templates
rust: do a clippy pass...
Raphaël Gomès -
r45500:26114bd6 default
parent child Browse files
Show More
@@ -1,787 +1,787 b''
1 1 // ancestors.rs
2 2 //
3 3 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Rust versions of generic DAG ancestors algorithms for Mercurial
9 9
10 10 use super::{Graph, GraphError, Revision, NULL_REVISION};
11 11 use crate::dagops;
12 12 use std::cmp::max;
13 13 use std::collections::{BinaryHeap, HashSet};
14 14
15 15 /// Iterator over the ancestors of a given list of revisions
16 16 /// This is a generic type, defined and implemented for any Graph, so that
17 17 /// it's easy to
18 18 ///
19 19 /// - unit test in pure Rust
20 20 /// - bind to main Mercurial code, potentially in several ways and have these
21 21 /// bindings evolve over time
22 22 pub struct AncestorsIterator<G: Graph> {
23 23 graph: G,
24 24 visit: BinaryHeap<Revision>,
25 25 seen: HashSet<Revision>,
26 26 stoprev: Revision,
27 27 }
28 28
29 29 /// Lazy ancestors set, backed by AncestorsIterator
30 30 pub struct LazyAncestors<G: Graph + Clone> {
31 31 graph: G,
32 32 containsiter: AncestorsIterator<G>,
33 33 initrevs: Vec<Revision>,
34 34 stoprev: Revision,
35 35 inclusive: bool,
36 36 }
37 37
38 38 pub struct MissingAncestors<G: Graph> {
39 39 graph: G,
40 40 bases: HashSet<Revision>,
41 41 max_base: Revision,
42 42 }
43 43
44 44 impl<G: Graph> AncestorsIterator<G> {
45 45 /// Constructor.
46 46 ///
47 47 /// if `inclusive` is true, then the init revisions are emitted in
48 48 /// particular, otherwise iteration starts from their parents.
49 49 pub fn new(
50 50 graph: G,
51 51 initrevs: impl IntoIterator<Item = Revision>,
52 52 stoprev: Revision,
53 53 inclusive: bool,
54 54 ) -> Result<Self, GraphError> {
55 55 let filtered_initrevs = initrevs.into_iter().filter(|&r| r >= stoprev);
56 56 if inclusive {
57 57 let visit: BinaryHeap<Revision> = filtered_initrevs.collect();
58 let seen = visit.iter().map(|&x| x).collect();
58 let seen = visit.iter().cloned().collect();
59 59 return Ok(AncestorsIterator {
60 visit: visit,
61 seen: seen,
62 stoprev: stoprev,
63 graph: graph,
60 visit,
61 seen,
62 stoprev,
63 graph,
64 64 });
65 65 }
66 66 let mut this = AncestorsIterator {
67 67 visit: BinaryHeap::new(),
68 68 seen: HashSet::new(),
69 stoprev: stoprev,
70 graph: graph,
69 stoprev,
70 graph,
71 71 };
72 72 this.seen.insert(NULL_REVISION);
73 73 for rev in filtered_initrevs {
74 74 for parent in this.graph.parents(rev)?.iter().cloned() {
75 75 this.conditionally_push_rev(parent);
76 76 }
77 77 }
78 78 Ok(this)
79 79 }
80 80
81 81 #[inline]
82 82 fn conditionally_push_rev(&mut self, rev: Revision) {
83 83 if self.stoprev <= rev && self.seen.insert(rev) {
84 84 self.visit.push(rev);
85 85 }
86 86 }
87 87
88 88 /// Consumes partially the iterator to tell if the given target
89 89 /// revision
90 90 /// is in the ancestors it emits.
91 91 /// This is meant for iterators actually dedicated to that kind of
92 92 /// purpose
93 93 pub fn contains(&mut self, target: Revision) -> Result<bool, GraphError> {
94 94 if self.seen.contains(&target) && target != NULL_REVISION {
95 95 return Ok(true);
96 96 }
97 97 for item in self {
98 98 let rev = item?;
99 99 if rev == target {
100 100 return Ok(true);
101 101 }
102 102 if rev < target {
103 103 return Ok(false);
104 104 }
105 105 }
106 106 Ok(false)
107 107 }
108 108
109 109 pub fn peek(&self) -> Option<Revision> {
110 self.visit.peek().map(|&r| r)
110 self.visit.peek().cloned()
111 111 }
112 112
113 113 /// Tell if the iterator is about an empty set
114 114 ///
115 115 /// The result does not depend whether the iterator has been consumed
116 116 /// or not.
117 117 /// This is mostly meant for iterators backing a lazy ancestors set
118 118 pub fn is_empty(&self) -> bool {
119 119 if self.visit.len() > 0 {
120 120 return false;
121 121 }
122 122 if self.seen.len() > 1 {
123 123 return false;
124 124 }
125 125 // at this point, the seen set is at most a singleton.
126 126 // If not `self.inclusive`, it's still possible that it has only
127 127 // the null revision
128 128 self.seen.is_empty() || self.seen.contains(&NULL_REVISION)
129 129 }
130 130 }
131 131
132 132 /// Main implementation for the iterator
133 133 ///
134 134 /// The algorithm is the same as in `_lazyancestorsiter()` from `ancestors.py`
135 135 /// with a few non crucial differences:
136 136 ///
137 137 /// - there's no filtering of invalid parent revisions. Actually, it should be
138 138 /// consistent and more efficient to filter them from the end caller.
139 139 /// - we don't have the optimization for adjacent revisions (i.e., the case
140 140 /// where `p1 == rev - 1`), because it amounts to update the first element of
141 141 /// the heap without sifting, which Rust's BinaryHeap doesn't let us do.
142 142 /// - we save a few pushes by comparing with `stoprev` before pushing
143 143 impl<G: Graph> Iterator for AncestorsIterator<G> {
144 144 type Item = Result<Revision, GraphError>;
145 145
146 146 fn next(&mut self) -> Option<Self::Item> {
147 147 let current = match self.visit.peek() {
148 148 None => {
149 149 return None;
150 150 }
151 151 Some(c) => *c,
152 152 };
153 153 let [p1, p2] = match self.graph.parents(current) {
154 154 Ok(ps) => ps,
155 155 Err(e) => return Some(Err(e)),
156 156 };
157 157 if p1 < self.stoprev || !self.seen.insert(p1) {
158 158 self.visit.pop();
159 159 } else {
160 160 *(self.visit.peek_mut().unwrap()) = p1;
161 161 };
162 162
163 163 self.conditionally_push_rev(p2);
164 164 Some(Ok(current))
165 165 }
166 166 }
167 167
168 168 impl<G: Graph + Clone> LazyAncestors<G> {
169 169 pub fn new(
170 170 graph: G,
171 171 initrevs: impl IntoIterator<Item = Revision>,
172 172 stoprev: Revision,
173 173 inclusive: bool,
174 174 ) -> Result<Self, GraphError> {
175 175 let v: Vec<Revision> = initrevs.into_iter().collect();
176 176 Ok(LazyAncestors {
177 177 graph: graph.clone(),
178 178 containsiter: AncestorsIterator::new(
179 179 graph,
180 180 v.iter().cloned(),
181 181 stoprev,
182 182 inclusive,
183 183 )?,
184 184 initrevs: v,
185 stoprev: stoprev,
186 inclusive: inclusive,
185 stoprev,
186 inclusive,
187 187 })
188 188 }
189 189
190 190 pub fn contains(&mut self, rev: Revision) -> Result<bool, GraphError> {
191 191 self.containsiter.contains(rev)
192 192 }
193 193
194 194 pub fn is_empty(&self) -> bool {
195 195 self.containsiter.is_empty()
196 196 }
197 197
198 198 pub fn iter(&self) -> AncestorsIterator<G> {
199 199 // the arguments being the same as for self.containsiter, we know
200 200 // for sure that AncestorsIterator constructor can't fail
201 201 AncestorsIterator::new(
202 202 self.graph.clone(),
203 203 self.initrevs.iter().cloned(),
204 204 self.stoprev,
205 205 self.inclusive,
206 206 )
207 207 .unwrap()
208 208 }
209 209 }
210 210
211 211 impl<G: Graph> MissingAncestors<G> {
212 212 pub fn new(graph: G, bases: impl IntoIterator<Item = Revision>) -> Self {
213 213 let mut created = MissingAncestors {
214 graph: graph,
214 graph,
215 215 bases: HashSet::new(),
216 216 max_base: NULL_REVISION,
217 217 };
218 218 created.add_bases(bases);
219 219 created
220 220 }
221 221
222 222 pub fn has_bases(&self) -> bool {
223 223 !self.bases.is_empty()
224 224 }
225 225
226 226 /// Return a reference to current bases.
227 227 ///
228 228 /// This is useful in unit tests, but also setdiscovery.py does
229 229 /// read the bases attribute of a ancestor.missingancestors instance.
230 230 pub fn get_bases<'a>(&'a self) -> &'a HashSet<Revision> {
231 231 &self.bases
232 232 }
233 233
234 234 /// Computes the relative heads of current bases.
235 235 ///
236 236 /// The object is still usable after this.
237 237 pub fn bases_heads(&self) -> Result<HashSet<Revision>, GraphError> {
238 238 dagops::heads(&self.graph, self.bases.iter())
239 239 }
240 240
241 241 /// Consumes the object and returns the relative heads of its bases.
242 242 pub fn into_bases_heads(
243 243 mut self,
244 244 ) -> Result<HashSet<Revision>, GraphError> {
245 245 dagops::retain_heads(&self.graph, &mut self.bases)?;
246 246 Ok(self.bases)
247 247 }
248 248
249 249 /// Add some revisions to `self.bases`
250 250 ///
251 251 /// Takes care of keeping `self.max_base` up to date.
252 252 pub fn add_bases(
253 253 &mut self,
254 254 new_bases: impl IntoIterator<Item = Revision>,
255 255 ) {
256 256 let mut max_base = self.max_base;
257 257 self.bases.extend(
258 258 new_bases
259 259 .into_iter()
260 260 .filter(|&rev| rev != NULL_REVISION)
261 261 .map(|r| {
262 262 if r > max_base {
263 263 max_base = r;
264 264 }
265 265 r
266 266 }),
267 267 );
268 268 self.max_base = max_base;
269 269 }
270 270
271 271 /// Remove all ancestors of self.bases from the revs set (in place)
272 272 pub fn remove_ancestors_from(
273 273 &mut self,
274 274 revs: &mut HashSet<Revision>,
275 275 ) -> Result<(), GraphError> {
276 276 revs.retain(|r| !self.bases.contains(r));
277 277 // the null revision is always an ancestor. Logically speaking
278 278 // it's debatable in case bases is empty, but the Python
279 279 // implementation always adds NULL_REVISION to bases, making it
280 280 // unconditionnally true.
281 281 revs.remove(&NULL_REVISION);
282 282 if revs.is_empty() {
283 283 return Ok(());
284 284 }
285 285 // anything in revs > start is definitely not an ancestor of bases
286 286 // revs <= start need to be investigated
287 287 if self.max_base == NULL_REVISION {
288 288 return Ok(());
289 289 }
290 290
291 291 // whatever happens, we'll keep at least keepcount of them
292 292 // knowing this gives us a earlier stop condition than
293 293 // going all the way to the root
294 294 let keepcount = revs.iter().filter(|r| **r > self.max_base).count();
295 295
296 296 let mut curr = self.max_base;
297 297 while curr != NULL_REVISION && revs.len() > keepcount {
298 298 if self.bases.contains(&curr) {
299 299 revs.remove(&curr);
300 300 self.add_parents(curr)?;
301 301 }
302 302 curr -= 1;
303 303 }
304 304 Ok(())
305 305 }
306 306
307 307 /// Add the parents of `rev` to `self.bases`
308 308 ///
309 309 /// This has no effect on `self.max_base`
310 310 #[inline]
311 311 fn add_parents(&mut self, rev: Revision) -> Result<(), GraphError> {
312 312 if rev == NULL_REVISION {
313 313 return Ok(());
314 314 }
315 315 for p in self.graph.parents(rev)?.iter().cloned() {
316 316 // No need to bother the set with inserting NULL_REVISION over and
317 317 // over
318 318 if p != NULL_REVISION {
319 319 self.bases.insert(p);
320 320 }
321 321 }
322 322 Ok(())
323 323 }
324 324
325 325 /// Return all the ancestors of revs that are not ancestors of self.bases
326 326 ///
327 327 /// This may include elements from revs.
328 328 ///
329 329 /// Equivalent to the revset (::revs - ::self.bases). Revs are returned in
330 330 /// revision number order, which is a topological order.
331 331 pub fn missing_ancestors(
332 332 &mut self,
333 333 revs: impl IntoIterator<Item = Revision>,
334 334 ) -> Result<Vec<Revision>, GraphError> {
335 335 // just for convenience and comparison with Python version
336 336 let bases_visit = &mut self.bases;
337 337 let mut revs: HashSet<Revision> = revs
338 338 .into_iter()
339 339 .filter(|r| !bases_visit.contains(r))
340 340 .collect();
341 341 let revs_visit = &mut revs;
342 342 let mut both_visit: HashSet<Revision> =
343 343 revs_visit.intersection(&bases_visit).cloned().collect();
344 344 if revs_visit.is_empty() {
345 345 return Ok(Vec::new());
346 346 }
347 347 let max_revs = revs_visit.iter().cloned().max().unwrap();
348 348 let start = max(self.max_base, max_revs);
349 349
350 350 // TODO heuristics for with_capacity()?
351 351 let mut missing: Vec<Revision> = Vec::new();
352 352 for curr in (0..=start).rev() {
353 353 if revs_visit.is_empty() {
354 354 break;
355 355 }
356 356 if both_visit.remove(&curr) {
357 357 // curr's parents might have made it into revs_visit through
358 358 // another path
359 359 for p in self.graph.parents(curr)?.iter().cloned() {
360 360 if p == NULL_REVISION {
361 361 continue;
362 362 }
363 363 revs_visit.remove(&p);
364 364 bases_visit.insert(p);
365 365 both_visit.insert(p);
366 366 }
367 367 } else if revs_visit.remove(&curr) {
368 368 missing.push(curr);
369 369 for p in self.graph.parents(curr)?.iter().cloned() {
370 370 if p == NULL_REVISION {
371 371 continue;
372 372 }
373 373 if bases_visit.contains(&p) {
374 374 // p is already known to be an ancestor of revs_visit
375 375 revs_visit.remove(&p);
376 376 both_visit.insert(p);
377 377 } else if both_visit.contains(&p) {
378 378 // p should have been in bases_visit
379 379 revs_visit.remove(&p);
380 380 bases_visit.insert(p);
381 381 } else {
382 382 // visit later
383 383 revs_visit.insert(p);
384 384 }
385 385 }
386 386 } else if bases_visit.contains(&curr) {
387 387 for p in self.graph.parents(curr)?.iter().cloned() {
388 388 if p == NULL_REVISION {
389 389 continue;
390 390 }
391 391 if revs_visit.remove(&p) || both_visit.contains(&p) {
392 392 // p is an ancestor of bases_visit, and is implicitly
393 393 // in revs_visit, which means p is ::revs & ::bases.
394 394 bases_visit.insert(p);
395 395 both_visit.insert(p);
396 396 } else {
397 397 bases_visit.insert(p);
398 398 }
399 399 }
400 400 }
401 401 }
402 402 missing.reverse();
403 403 Ok(missing)
404 404 }
405 405 }
406 406
407 407 #[cfg(test)]
408 408 mod tests {
409 409
410 410 use super::*;
411 411 use crate::testing::{SampleGraph, VecGraph};
412 412 use std::iter::FromIterator;
413 413
414 414 fn list_ancestors<G: Graph>(
415 415 graph: G,
416 416 initrevs: Vec<Revision>,
417 417 stoprev: Revision,
418 418 inclusive: bool,
419 419 ) -> Vec<Revision> {
420 420 AncestorsIterator::new(graph, initrevs, stoprev, inclusive)
421 421 .unwrap()
422 422 .map(|res| res.unwrap())
423 423 .collect()
424 424 }
425 425
426 426 #[test]
427 427 /// Same tests as test-ancestor.py, without membership
428 428 /// (see also test-ancestor.py.out)
429 429 fn test_list_ancestor() {
430 430 assert_eq!(list_ancestors(SampleGraph, vec![], 0, false), vec![]);
431 431 assert_eq!(
432 432 list_ancestors(SampleGraph, vec![11, 13], 0, false),
433 433 vec![8, 7, 4, 3, 2, 1, 0]
434 434 );
435 435 assert_eq!(
436 436 list_ancestors(SampleGraph, vec![1, 3], 0, false),
437 437 vec![1, 0]
438 438 );
439 439 assert_eq!(
440 440 list_ancestors(SampleGraph, vec![11, 13], 0, true),
441 441 vec![13, 11, 8, 7, 4, 3, 2, 1, 0]
442 442 );
443 443 assert_eq!(
444 444 list_ancestors(SampleGraph, vec![11, 13], 6, false),
445 445 vec![8, 7]
446 446 );
447 447 assert_eq!(
448 448 list_ancestors(SampleGraph, vec![11, 13], 6, true),
449 449 vec![13, 11, 8, 7]
450 450 );
451 451 assert_eq!(
452 452 list_ancestors(SampleGraph, vec![11, 13], 11, true),
453 453 vec![13, 11]
454 454 );
455 455 assert_eq!(
456 456 list_ancestors(SampleGraph, vec![11, 13], 12, true),
457 457 vec![13]
458 458 );
459 459 assert_eq!(
460 460 list_ancestors(SampleGraph, vec![10, 1], 0, true),
461 461 vec![10, 5, 4, 2, 1, 0]
462 462 );
463 463 }
464 464
465 465 #[test]
466 466 /// Corner case that's not directly in test-ancestors.py, but
467 467 /// that happens quite often, as demonstrated by running the whole
468 468 /// suite.
469 469 /// For instance, run tests/test-obsolete-checkheads.t
470 470 fn test_nullrev_input() {
471 471 let mut iter =
472 472 AncestorsIterator::new(SampleGraph, vec![-1], 0, false).unwrap();
473 473 assert_eq!(iter.next(), None)
474 474 }
475 475
476 476 #[test]
477 477 fn test_contains() {
478 478 let mut lazy =
479 479 AncestorsIterator::new(SampleGraph, vec![10, 1], 0, true).unwrap();
480 480 assert!(lazy.contains(1).unwrap());
481 481 assert!(!lazy.contains(3).unwrap());
482 482
483 483 let mut lazy =
484 484 AncestorsIterator::new(SampleGraph, vec![0], 0, false).unwrap();
485 485 assert!(!lazy.contains(NULL_REVISION).unwrap());
486 486 }
487 487
488 488 #[test]
489 489 fn test_peek() {
490 490 let mut iter =
491 491 AncestorsIterator::new(SampleGraph, vec![10], 0, true).unwrap();
492 492 // peek() gives us the next value
493 493 assert_eq!(iter.peek(), Some(10));
494 494 // but it's not been consumed
495 495 assert_eq!(iter.next(), Some(Ok(10)));
496 496 // and iteration resumes normally
497 497 assert_eq!(iter.next(), Some(Ok(5)));
498 498
499 499 // let's drain the iterator to test peek() at the end
500 500 while iter.next().is_some() {}
501 501 assert_eq!(iter.peek(), None);
502 502 }
503 503
504 504 #[test]
505 505 fn test_empty() {
506 506 let mut iter =
507 507 AncestorsIterator::new(SampleGraph, vec![10], 0, true).unwrap();
508 508 assert!(!iter.is_empty());
509 509 while iter.next().is_some() {}
510 510 assert!(!iter.is_empty());
511 511
512 512 let iter =
513 513 AncestorsIterator::new(SampleGraph, vec![], 0, true).unwrap();
514 514 assert!(iter.is_empty());
515 515
516 516 // case where iter.seen == {NULL_REVISION}
517 517 let iter =
518 518 AncestorsIterator::new(SampleGraph, vec![0], 0, false).unwrap();
519 519 assert!(iter.is_empty());
520 520 }
521 521
522 522 /// A corrupted Graph, supporting error handling tests
523 523 #[derive(Clone, Debug)]
524 524 struct Corrupted;
525 525
526 526 impl Graph for Corrupted {
527 527 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
528 528 match rev {
529 529 1 => Ok([0, -1]),
530 530 r => Err(GraphError::ParentOutOfRange(r)),
531 531 }
532 532 }
533 533 }
534 534
535 535 #[test]
536 536 fn test_initrev_out_of_range() {
537 537 // inclusive=false looks up initrev's parents right away
538 538 match AncestorsIterator::new(SampleGraph, vec![25], 0, false) {
539 539 Ok(_) => panic!("Should have been ParentOutOfRange"),
540 540 Err(e) => assert_eq!(e, GraphError::ParentOutOfRange(25)),
541 541 }
542 542 }
543 543
544 544 #[test]
545 545 fn test_next_out_of_range() {
546 546 // inclusive=false looks up initrev's parents right away
547 547 let mut iter =
548 548 AncestorsIterator::new(Corrupted, vec![1], 0, false).unwrap();
549 549 assert_eq!(iter.next(), Some(Err(GraphError::ParentOutOfRange(0))));
550 550 }
551 551
552 552 #[test]
553 553 fn test_lazy_iter_contains() {
554 554 let mut lazy =
555 555 LazyAncestors::new(SampleGraph, vec![11, 13], 0, false).unwrap();
556 556
557 557 let revs: Vec<Revision> = lazy.iter().map(|r| r.unwrap()).collect();
558 558 // compare with iterator tests on the same initial revisions
559 559 assert_eq!(revs, vec![8, 7, 4, 3, 2, 1, 0]);
560 560
561 561 // contains() results are correct, unaffected by the fact that
562 562 // we consumed entirely an iterator out of lazy
563 563 assert_eq!(lazy.contains(2), Ok(true));
564 564 assert_eq!(lazy.contains(9), Ok(false));
565 565 }
566 566
567 567 #[test]
568 568 fn test_lazy_contains_iter() {
569 569 let mut lazy =
570 570 LazyAncestors::new(SampleGraph, vec![11, 13], 0, false).unwrap(); // reminder: [8, 7, 4, 3, 2, 1, 0]
571 571
572 572 assert_eq!(lazy.contains(2), Ok(true));
573 573 assert_eq!(lazy.contains(6), Ok(false));
574 574
575 575 // after consumption of 2 by the inner iterator, results stay
576 576 // consistent
577 577 assert_eq!(lazy.contains(2), Ok(true));
578 578 assert_eq!(lazy.contains(5), Ok(false));
579 579
580 580 // iter() still gives us a fresh iterator
581 581 let revs: Vec<Revision> = lazy.iter().map(|r| r.unwrap()).collect();
582 582 assert_eq!(revs, vec![8, 7, 4, 3, 2, 1, 0]);
583 583 }
584 584
585 585 #[test]
586 586 /// Test constructor, add/get bases and heads
587 587 fn test_missing_bases() -> Result<(), GraphError> {
588 588 let mut missing_ancestors =
589 589 MissingAncestors::new(SampleGraph, [5, 3, 1, 3].iter().cloned());
590 590 let mut as_vec: Vec<Revision> =
591 591 missing_ancestors.get_bases().iter().cloned().collect();
592 592 as_vec.sort();
593 593 assert_eq!(as_vec, [1, 3, 5]);
594 594 assert_eq!(missing_ancestors.max_base, 5);
595 595
596 596 missing_ancestors.add_bases([3, 7, 8].iter().cloned());
597 597 as_vec = missing_ancestors.get_bases().iter().cloned().collect();
598 598 as_vec.sort();
599 599 assert_eq!(as_vec, [1, 3, 5, 7, 8]);
600 600 assert_eq!(missing_ancestors.max_base, 8);
601 601
602 602 as_vec = missing_ancestors.bases_heads()?.iter().cloned().collect();
603 603 as_vec.sort();
604 604 assert_eq!(as_vec, [3, 5, 7, 8]);
605 605 Ok(())
606 606 }
607 607
608 608 fn assert_missing_remove(
609 609 bases: &[Revision],
610 610 revs: &[Revision],
611 611 expected: &[Revision],
612 612 ) {
613 613 let mut missing_ancestors =
614 614 MissingAncestors::new(SampleGraph, bases.iter().cloned());
615 615 let mut revset: HashSet<Revision> = revs.iter().cloned().collect();
616 616 missing_ancestors
617 617 .remove_ancestors_from(&mut revset)
618 618 .unwrap();
619 619 let mut as_vec: Vec<Revision> = revset.into_iter().collect();
620 620 as_vec.sort();
621 621 assert_eq!(as_vec.as_slice(), expected);
622 622 }
623 623
624 624 #[test]
625 625 fn test_missing_remove() {
626 626 assert_missing_remove(
627 627 &[1, 2, 3, 4, 7],
628 628 Vec::from_iter(1..10).as_slice(),
629 629 &[5, 6, 8, 9],
630 630 );
631 631 assert_missing_remove(&[10], &[11, 12, 13, 14], &[11, 12, 13, 14]);
632 632 assert_missing_remove(&[7], &[1, 2, 3, 4, 5], &[3, 5]);
633 633 }
634 634
635 635 fn assert_missing_ancestors(
636 636 bases: &[Revision],
637 637 revs: &[Revision],
638 638 expected: &[Revision],
639 639 ) {
640 640 let mut missing_ancestors =
641 641 MissingAncestors::new(SampleGraph, bases.iter().cloned());
642 642 let missing = missing_ancestors
643 643 .missing_ancestors(revs.iter().cloned())
644 644 .unwrap();
645 645 assert_eq!(missing.as_slice(), expected);
646 646 }
647 647
648 648 #[test]
649 649 fn test_missing_ancestors() {
650 650 // examples taken from test-ancestors.py by having it run
651 651 // on the same graph (both naive and fast Python algs)
652 652 assert_missing_ancestors(&[10], &[11], &[3, 7, 11]);
653 653 assert_missing_ancestors(&[11], &[10], &[5, 10]);
654 654 assert_missing_ancestors(&[7], &[9, 11], &[3, 6, 9, 11]);
655 655 }
656 656
657 657 /// An interesting case found by a random generator similar to
658 658 /// the one in test-ancestor.py. An early version of Rust MissingAncestors
659 659 /// failed this, yet none of the integration tests of the whole suite
660 660 /// catched it.
661 661 #[test]
662 662 fn test_remove_ancestors_from_case1() {
663 663 let graph: VecGraph = vec![
664 664 [NULL_REVISION, NULL_REVISION],
665 665 [0, NULL_REVISION],
666 666 [1, 0],
667 667 [2, 1],
668 668 [3, NULL_REVISION],
669 669 [4, NULL_REVISION],
670 670 [5, 1],
671 671 [2, NULL_REVISION],
672 672 [7, NULL_REVISION],
673 673 [8, NULL_REVISION],
674 674 [9, NULL_REVISION],
675 675 [10, 1],
676 676 [3, NULL_REVISION],
677 677 [12, NULL_REVISION],
678 678 [13, NULL_REVISION],
679 679 [14, NULL_REVISION],
680 680 [4, NULL_REVISION],
681 681 [16, NULL_REVISION],
682 682 [17, NULL_REVISION],
683 683 [18, NULL_REVISION],
684 684 [19, 11],
685 685 [20, NULL_REVISION],
686 686 [21, NULL_REVISION],
687 687 [22, NULL_REVISION],
688 688 [23, NULL_REVISION],
689 689 [2, NULL_REVISION],
690 690 [3, NULL_REVISION],
691 691 [26, 24],
692 692 [27, NULL_REVISION],
693 693 [28, NULL_REVISION],
694 694 [12, NULL_REVISION],
695 695 [1, NULL_REVISION],
696 696 [1, 9],
697 697 [32, NULL_REVISION],
698 698 [33, NULL_REVISION],
699 699 [34, 31],
700 700 [35, NULL_REVISION],
701 701 [36, 26],
702 702 [37, NULL_REVISION],
703 703 [38, NULL_REVISION],
704 704 [39, NULL_REVISION],
705 705 [40, NULL_REVISION],
706 706 [41, NULL_REVISION],
707 707 [42, 26],
708 708 [0, NULL_REVISION],
709 709 [44, NULL_REVISION],
710 710 [45, 4],
711 711 [40, NULL_REVISION],
712 712 [47, NULL_REVISION],
713 713 [36, 0],
714 714 [49, NULL_REVISION],
715 715 [NULL_REVISION, NULL_REVISION],
716 716 [51, NULL_REVISION],
717 717 [52, NULL_REVISION],
718 718 [53, NULL_REVISION],
719 719 [14, NULL_REVISION],
720 720 [55, NULL_REVISION],
721 721 [15, NULL_REVISION],
722 722 [23, NULL_REVISION],
723 723 [58, NULL_REVISION],
724 724 [59, NULL_REVISION],
725 725 [2, NULL_REVISION],
726 726 [61, 59],
727 727 [62, NULL_REVISION],
728 728 [63, NULL_REVISION],
729 729 [NULL_REVISION, NULL_REVISION],
730 730 [65, NULL_REVISION],
731 731 [66, NULL_REVISION],
732 732 [67, NULL_REVISION],
733 733 [68, NULL_REVISION],
734 734 [37, 28],
735 735 [69, 25],
736 736 [71, NULL_REVISION],
737 737 [72, NULL_REVISION],
738 738 [50, 2],
739 739 [74, NULL_REVISION],
740 740 [12, NULL_REVISION],
741 741 [18, NULL_REVISION],
742 742 [77, NULL_REVISION],
743 743 [78, NULL_REVISION],
744 744 [79, NULL_REVISION],
745 745 [43, 33],
746 746 [81, NULL_REVISION],
747 747 [82, NULL_REVISION],
748 748 [83, NULL_REVISION],
749 749 [84, 45],
750 750 [85, NULL_REVISION],
751 751 [86, NULL_REVISION],
752 752 [NULL_REVISION, NULL_REVISION],
753 753 [88, NULL_REVISION],
754 754 [NULL_REVISION, NULL_REVISION],
755 755 [76, 83],
756 756 [44, NULL_REVISION],
757 757 [92, NULL_REVISION],
758 758 [93, NULL_REVISION],
759 759 [9, NULL_REVISION],
760 760 [95, 67],
761 761 [96, NULL_REVISION],
762 762 [97, NULL_REVISION],
763 763 [NULL_REVISION, NULL_REVISION],
764 764 ];
765 765 let problem_rev = 28 as Revision;
766 766 let problem_base = 70 as Revision;
767 767 // making the problem obvious: problem_rev is a parent of problem_base
768 768 assert_eq!(graph.parents(problem_base).unwrap()[1], problem_rev);
769 769
770 770 let mut missing_ancestors: MissingAncestors<VecGraph> =
771 771 MissingAncestors::new(
772 772 graph,
773 773 [60, 26, 70, 3, 96, 19, 98, 49, 97, 47, 1, 6]
774 774 .iter()
775 775 .cloned(),
776 776 );
777 777 assert!(missing_ancestors.bases.contains(&problem_base));
778 778
779 779 let mut revs: HashSet<Revision> =
780 780 [4, 12, 41, 28, 68, 38, 1, 30, 56, 44]
781 781 .iter()
782 782 .cloned()
783 783 .collect();
784 784 missing_ancestors.remove_ancestors_from(&mut revs).unwrap();
785 785 assert!(!revs.contains(&problem_rev));
786 786 }
787 787 }
@@ -1,275 +1,276 b''
1 1 // dagops.rs
2 2 //
3 3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Miscellaneous DAG operations
9 9 //!
10 10 //! # Terminology
11 11 //! - By *relative heads* of a collection of revision numbers (`Revision`), we
12 12 //! mean those revisions that have no children among the collection.
13 13 //! - Similarly *relative roots* of a collection of `Revision`, we mean those
14 14 //! whose parents, if any, don't belong to the collection.
15 15 use super::{Graph, GraphError, Revision, NULL_REVISION};
16 16 use crate::ancestors::AncestorsIterator;
17 17 use std::collections::{BTreeSet, HashSet};
18 18
19 fn remove_parents(
19 fn remove_parents<S: std::hash::BuildHasher>(
20 20 graph: &impl Graph,
21 21 rev: Revision,
22 set: &mut HashSet<Revision>,
22 set: &mut HashSet<Revision, S>,
23 23 ) -> Result<(), GraphError> {
24 24 for parent in graph.parents(rev)?.iter() {
25 25 if *parent != NULL_REVISION {
26 26 set.remove(parent);
27 27 }
28 28 }
29 29 Ok(())
30 30 }
31 31
32 32 /// Relative heads out of some revisions, passed as an iterator.
33 33 ///
34 34 /// These heads are defined as those revisions that have no children
35 35 /// among those emitted by the iterator.
36 36 ///
37 37 /// # Performance notes
38 38 /// Internally, this clones the iterator, and builds a `HashSet` out of it.
39 39 ///
40 40 /// This function takes an `Iterator` instead of `impl IntoIterator` to
41 41 /// guarantee that cloning the iterator doesn't result in cloning the full
42 42 /// construct it comes from.
43 43 pub fn heads<'a>(
44 44 graph: &impl Graph,
45 45 iter_revs: impl Clone + Iterator<Item = &'a Revision>,
46 46 ) -> Result<HashSet<Revision>, GraphError> {
47 47 let mut heads: HashSet<Revision> = iter_revs.clone().cloned().collect();
48 48 heads.remove(&NULL_REVISION);
49 49 for rev in iter_revs {
50 50 if *rev != NULL_REVISION {
51 51 remove_parents(graph, *rev, &mut heads)?;
52 52 }
53 53 }
54 54 Ok(heads)
55 55 }
56 56
57 57 /// Retain in `revs` only its relative heads.
58 58 ///
59 59 /// This is an in-place operation, so that control of the incoming
60 60 /// set is left to the caller.
61 61 /// - a direct Python binding would probably need to build its own `HashSet`
62 62 /// from an incoming iterable, even if its sole purpose is to extract the
63 63 /// heads.
64 64 /// - a Rust caller can decide whether cloning beforehand is appropriate
65 65 ///
66 66 /// # Performance notes
67 67 /// Internally, this function will store a full copy of `revs` in a `Vec`.
68 pub fn retain_heads(
68 pub fn retain_heads<S: std::hash::BuildHasher>(
69 69 graph: &impl Graph,
70 revs: &mut HashSet<Revision>,
70 revs: &mut HashSet<Revision, S>,
71 71 ) -> Result<(), GraphError> {
72 72 revs.remove(&NULL_REVISION);
73 73 // we need to construct an iterable copy of revs to avoid itering while
74 74 // mutating
75 75 let as_vec: Vec<Revision> = revs.iter().cloned().collect();
76 76 for rev in as_vec {
77 77 if rev != NULL_REVISION {
78 78 remove_parents(graph, rev, revs)?;
79 79 }
80 80 }
81 81 Ok(())
82 82 }
83 83
84 84 /// Roots of `revs`, passed as a `HashSet`
85 85 ///
86 86 /// They are returned in arbitrary order
87 pub fn roots<G: Graph>(
87 pub fn roots<G: Graph, S: std::hash::BuildHasher>(
88 88 graph: &G,
89 revs: &HashSet<Revision>,
89 revs: &HashSet<Revision, S>,
90 90 ) -> Result<Vec<Revision>, GraphError> {
91 91 let mut roots: Vec<Revision> = Vec::new();
92 92 for rev in revs {
93 93 if graph
94 94 .parents(*rev)?
95 95 .iter()
96 96 .filter(|p| **p != NULL_REVISION)
97 97 .all(|p| !revs.contains(p))
98 98 {
99 99 roots.push(*rev);
100 100 }
101 101 }
102 102 Ok(roots)
103 103 }
104 104
105 105 /// Compute the topological range between two collections of revisions
106 106 ///
107 107 /// This is equivalent to the revset `<roots>::<heads>`.
108 108 ///
109 109 /// Currently, the given `Graph` has to implement `Clone`, which means
110 110 /// actually cloning just a reference-counted Python pointer if
111 111 /// it's passed over through `rust-cpython`. This is due to the internal
112 112 /// use of `AncestorsIterator`
113 113 ///
114 114 /// # Algorithmic details
115 115 ///
116 116 /// This is a two-pass swipe inspired from what `reachableroots2` from
117 117 /// `mercurial.cext.parsers` does to obtain the same results.
118 118 ///
119 119 /// - first, we climb up the DAG from `heads` in topological order, keeping
120 120 /// them in the vector `heads_ancestors` vector, and adding any element of
121 121 /// `roots` we find among them to the resulting range.
122 122 /// - Then, we iterate on that recorded vector so that a revision is always
123 123 /// emitted after its parents and add all revisions whose parents are already
124 124 /// in the range to the results.
125 125 ///
126 126 /// # Performance notes
127 127 ///
128 128 /// The main difference with the C implementation is that
129 129 /// the latter uses a flat array with bit flags, instead of complex structures
130 130 /// like `HashSet`, making it faster in most scenarios. In theory, it's
131 131 /// possible that the present implementation could be more memory efficient
132 132 /// for very large repositories with many branches.
133 133 pub fn range(
134 134 graph: &(impl Graph + Clone),
135 135 roots: impl IntoIterator<Item = Revision>,
136 136 heads: impl IntoIterator<Item = Revision>,
137 137 ) -> Result<BTreeSet<Revision>, GraphError> {
138 138 let mut range = BTreeSet::new();
139 139 let roots: HashSet<Revision> = roots.into_iter().collect();
140 140 let min_root: Revision = match roots.iter().cloned().min() {
141 141 None => {
142 142 return Ok(range);
143 143 }
144 144 Some(r) => r,
145 145 };
146 146
147 147 // Internally, AncestorsIterator currently maintains a `HashSet`
148 148 // of all seen revision, which is also what we record, albeit in an ordered
149 149 // way. There's room for improvement on this duplication.
150 150 let ait = AncestorsIterator::new(graph.clone(), heads, min_root, true)?;
151 151 let mut heads_ancestors: Vec<Revision> = Vec::new();
152 152 for revres in ait {
153 153 let rev = revres?;
154 154 if roots.contains(&rev) {
155 155 range.insert(rev);
156 156 }
157 157 heads_ancestors.push(rev);
158 158 }
159 159
160 160 for rev in heads_ancestors.into_iter().rev() {
161 161 for parent in graph.parents(rev)?.iter() {
162 162 if *parent != NULL_REVISION && range.contains(parent) {
163 163 range.insert(rev);
164 164 }
165 165 }
166 166 }
167 167 Ok(range)
168 168 }
169 169
170 170 #[cfg(test)]
171 171 mod tests {
172 172
173 173 use super::*;
174 174 use crate::testing::SampleGraph;
175 175
176 176 /// Apply `retain_heads()` to the given slice and return as a sorted `Vec`
177 177 fn retain_heads_sorted(
178 178 graph: &impl Graph,
179 179 revs: &[Revision],
180 180 ) -> Result<Vec<Revision>, GraphError> {
181 181 let mut revs: HashSet<Revision> = revs.iter().cloned().collect();
182 182 retain_heads(graph, &mut revs)?;
183 183 let mut as_vec: Vec<Revision> = revs.iter().cloned().collect();
184 184 as_vec.sort();
185 185 Ok(as_vec)
186 186 }
187 187
188 188 #[test]
189 189 fn test_retain_heads() -> Result<(), GraphError> {
190 190 assert_eq!(retain_heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
191 191 assert_eq!(
192 192 retain_heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
193 193 vec![1, 6, 12]
194 194 );
195 195 assert_eq!(
196 196 retain_heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
197 197 vec![3, 5, 8, 9]
198 198 );
199 199 Ok(())
200 200 }
201 201
202 202 /// Apply `heads()` to the given slice and return as a sorted `Vec`
203 203 fn heads_sorted(
204 204 graph: &impl Graph,
205 205 revs: &[Revision],
206 206 ) -> Result<Vec<Revision>, GraphError> {
207 207 let heads = heads(graph, revs.iter())?;
208 208 let mut as_vec: Vec<Revision> = heads.iter().cloned().collect();
209 209 as_vec.sort();
210 210 Ok(as_vec)
211 211 }
212 212
213 213 #[test]
214 214 fn test_heads() -> Result<(), GraphError> {
215 215 assert_eq!(heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
216 216 assert_eq!(
217 217 heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
218 218 vec![1, 6, 12]
219 219 );
220 220 assert_eq!(
221 221 heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
222 222 vec![3, 5, 8, 9]
223 223 );
224 224 Ok(())
225 225 }
226 226
227 227 /// Apply `roots()` and sort the result for easier comparison
228 228 fn roots_sorted(
229 229 graph: &impl Graph,
230 230 revs: &[Revision],
231 231 ) -> Result<Vec<Revision>, GraphError> {
232 let mut as_vec = roots(graph, &revs.iter().cloned().collect())?;
232 let set: HashSet<_> = revs.iter().cloned().collect();
233 let mut as_vec = roots(graph, &set)?;
233 234 as_vec.sort();
234 235 Ok(as_vec)
235 236 }
236 237
237 238 #[test]
238 239 fn test_roots() -> Result<(), GraphError> {
239 240 assert_eq!(roots_sorted(&SampleGraph, &[4, 5, 6])?, vec![4]);
240 241 assert_eq!(
241 242 roots_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
242 243 vec![0, 4, 12]
243 244 );
244 245 assert_eq!(
245 246 roots_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
246 247 vec![1, 8]
247 248 );
248 249 Ok(())
249 250 }
250 251
251 252 /// Apply `range()` and convert the result into a Vec for easier comparison
252 253 fn range_vec(
253 254 graph: impl Graph + Clone,
254 255 roots: &[Revision],
255 256 heads: &[Revision],
256 257 ) -> Result<Vec<Revision>, GraphError> {
257 258 range(&graph, roots.iter().cloned(), heads.iter().cloned())
258 259 .map(|bs| bs.into_iter().collect())
259 260 }
260 261
261 262 #[test]
262 263 fn test_range() -> Result<(), GraphError> {
263 264 assert_eq!(range_vec(SampleGraph, &[0], &[4])?, vec![0, 1, 2, 4]);
264 265 assert_eq!(range_vec(SampleGraph, &[0], &[8])?, vec![]);
265 266 assert_eq!(
266 267 range_vec(SampleGraph, &[5, 6], &[10, 11, 13])?,
267 268 vec![5, 10]
268 269 );
269 270 assert_eq!(
270 271 range_vec(SampleGraph, &[5, 6], &[10, 12])?,
271 272 vec![5, 6, 9, 10, 12]
272 273 );
273 274 Ok(())
274 275 }
275 276 }
@@ -1,418 +1,422 b''
1 1 // dirs_multiset.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! A multiset of directory names.
9 9 //!
10 10 //! Used to counts the references to directories in a manifest or dirstate.
11 11 use crate::{
12 12 dirstate::EntryState,
13 13 utils::{
14 14 files,
15 15 hg_path::{HgPath, HgPathBuf, HgPathError},
16 16 },
17 17 DirstateEntry, DirstateMapError, FastHashMap,
18 18 };
19 19 use std::collections::{hash_map, hash_map::Entry, HashMap, HashSet};
20 20
21 21 // could be encapsulated if we care API stability more seriously
22 22 pub type DirsMultisetIter<'a> = hash_map::Keys<'a, HgPathBuf, u32>;
23 23
24 24 #[derive(PartialEq, Debug)]
25 25 pub struct DirsMultiset {
26 26 inner: FastHashMap<HgPathBuf, u32>,
27 27 }
28 28
29 29 impl DirsMultiset {
30 30 /// Initializes the multiset from a dirstate.
31 31 ///
32 32 /// If `skip_state` is provided, skips dirstate entries with equal state.
33 33 pub fn from_dirstate(
34 34 dirstate: &FastHashMap<HgPathBuf, DirstateEntry>,
35 35 skip_state: Option<EntryState>,
36 36 ) -> Result<Self, DirstateMapError> {
37 37 let mut multiset = DirsMultiset {
38 38 inner: FastHashMap::default(),
39 39 };
40 40
41 41 for (filename, DirstateEntry { state, .. }) in dirstate {
42 42 // This `if` is optimized out of the loop
43 43 if let Some(skip) = skip_state {
44 44 if skip != *state {
45 45 multiset.add_path(filename)?;
46 46 }
47 47 } else {
48 48 multiset.add_path(filename)?;
49 49 }
50 50 }
51 51
52 52 Ok(multiset)
53 53 }
54 54
55 55 /// Initializes the multiset from a manifest.
56 56 pub fn from_manifest(
57 57 manifest: &[impl AsRef<HgPath>],
58 58 ) -> Result<Self, DirstateMapError> {
59 59 let mut multiset = DirsMultiset {
60 60 inner: FastHashMap::default(),
61 61 };
62 62
63 63 for filename in manifest {
64 64 multiset.add_path(filename.as_ref())?;
65 65 }
66 66
67 67 Ok(multiset)
68 68 }
69 69
70 70 /// Increases the count of deepest directory contained in the path.
71 71 ///
72 72 /// If the directory is not yet in the map, adds its parents.
73 73 pub fn add_path(
74 74 &mut self,
75 75 path: impl AsRef<HgPath>,
76 76 ) -> Result<(), DirstateMapError> {
77 77 for subpath in files::find_dirs(path.as_ref()) {
78 78 if subpath.as_bytes().last() == Some(&b'/') {
79 79 // TODO Remove this once PathAuditor is certified
80 80 // as the only entrypoint for path data
81 81 let second_slash_index = subpath.len() - 1;
82 82
83 83 return Err(DirstateMapError::InvalidPath(
84 84 HgPathError::ConsecutiveSlashes {
85 85 bytes: path.as_ref().as_bytes().to_owned(),
86 86 second_slash_index,
87 87 },
88 88 ));
89 89 }
90 90 if let Some(val) = self.inner.get_mut(subpath) {
91 91 *val += 1;
92 92 break;
93 93 }
94 94 self.inner.insert(subpath.to_owned(), 1);
95 95 }
96 96 Ok(())
97 97 }
98 98
99 99 /// Decreases the count of deepest directory contained in the path.
100 100 ///
101 101 /// If it is the only reference, decreases all parents until one is
102 102 /// removed.
103 103 /// If the directory is not in the map, something horrible has happened.
104 104 pub fn delete_path(
105 105 &mut self,
106 106 path: impl AsRef<HgPath>,
107 107 ) -> Result<(), DirstateMapError> {
108 108 for subpath in files::find_dirs(path.as_ref()) {
109 109 match self.inner.entry(subpath.to_owned()) {
110 110 Entry::Occupied(mut entry) => {
111 let val = entry.get().clone();
111 let val = *entry.get();
112 112 if val > 1 {
113 113 entry.insert(val - 1);
114 114 break;
115 115 }
116 116 entry.remove();
117 117 }
118 118 Entry::Vacant(_) => {
119 119 return Err(DirstateMapError::PathNotFound(
120 120 path.as_ref().to_owned(),
121 121 ))
122 122 }
123 123 };
124 124 }
125 125
126 126 Ok(())
127 127 }
128 128
129 129 pub fn contains(&self, key: impl AsRef<HgPath>) -> bool {
130 130 self.inner.contains_key(key.as_ref())
131 131 }
132 132
133 133 pub fn iter(&self) -> DirsMultisetIter {
134 134 self.inner.keys()
135 135 }
136 136
137 137 pub fn len(&self) -> usize {
138 138 self.inner.len()
139 139 }
140
141 pub fn is_empty(&self) -> bool {
142 self.len() == 0
143 }
140 144 }
141 145
142 146 /// This is basically a reimplementation of `DirsMultiset` that stores the
143 147 /// children instead of just a count of them, plus a small optional
144 148 /// optimization to avoid some directories we don't need.
145 149 #[derive(PartialEq, Debug)]
146 150 pub struct DirsChildrenMultiset<'a> {
147 151 inner: FastHashMap<&'a HgPath, HashSet<&'a HgPath>>,
148 152 only_include: Option<HashSet<&'a HgPath>>,
149 153 }
150 154
151 155 impl<'a> DirsChildrenMultiset<'a> {
152 156 pub fn new(
153 157 paths: impl Iterator<Item = &'a HgPathBuf>,
154 158 only_include: Option<&'a HashSet<impl AsRef<HgPath> + 'a>>,
155 159 ) -> Self {
156 160 let mut new = Self {
157 161 inner: HashMap::default(),
158 162 only_include: only_include
159 .map(|s| s.iter().map(|p| p.as_ref()).collect()),
163 .map(|s| s.iter().map(AsRef::as_ref).collect()),
160 164 };
161 165
162 166 for path in paths {
163 167 new.add_path(path)
164 168 }
165 169
166 170 new
167 171 }
168 172 fn add_path(&mut self, path: &'a (impl AsRef<HgPath> + 'a)) {
169 173 if path.as_ref().is_empty() {
170 174 return;
171 175 }
172 176 for (directory, basename) in files::find_dirs_with_base(path.as_ref())
173 177 {
174 178 if !self.is_dir_included(directory) {
175 179 continue;
176 180 }
177 181 self.inner
178 182 .entry(directory)
179 183 .and_modify(|e| {
180 184 e.insert(basename);
181 185 })
182 186 .or_insert_with(|| {
183 187 let mut set = HashSet::new();
184 188 set.insert(basename);
185 189 set
186 190 });
187 191 }
188 192 }
189 193 fn is_dir_included(&self, dir: impl AsRef<HgPath>) -> bool {
190 194 match &self.only_include {
191 195 None => false,
192 196 Some(i) => i.contains(dir.as_ref()),
193 197 }
194 198 }
195 199
196 200 pub fn get(
197 201 &self,
198 202 path: impl AsRef<HgPath>,
199 203 ) -> Option<&HashSet<&'a HgPath>> {
200 204 self.inner.get(path.as_ref())
201 205 }
202 206 }
203 207
204 208 #[cfg(test)]
205 209 mod tests {
206 210 use super::*;
207 211
208 212 #[test]
209 213 fn test_delete_path_path_not_found() {
210 214 let manifest: Vec<HgPathBuf> = vec![];
211 215 let mut map = DirsMultiset::from_manifest(&manifest).unwrap();
212 216 let path = HgPathBuf::from_bytes(b"doesnotexist/");
213 217 assert_eq!(
214 218 Err(DirstateMapError::PathNotFound(path.to_owned())),
215 219 map.delete_path(&path)
216 220 );
217 221 }
218 222
219 223 #[test]
220 224 fn test_delete_path_empty_path() {
221 225 let mut map =
222 226 DirsMultiset::from_manifest(&vec![HgPathBuf::new()]).unwrap();
223 227 let path = HgPath::new(b"");
224 228 assert_eq!(Ok(()), map.delete_path(path));
225 229 assert_eq!(
226 230 Err(DirstateMapError::PathNotFound(path.to_owned())),
227 231 map.delete_path(path)
228 232 );
229 233 }
230 234
231 235 #[test]
232 236 fn test_delete_path_successful() {
233 237 let mut map = DirsMultiset {
234 238 inner: [("", 5), ("a", 3), ("a/b", 2), ("a/c", 1)]
235 239 .iter()
236 240 .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
237 241 .collect(),
238 242 };
239 243
240 244 assert_eq!(Ok(()), map.delete_path(HgPath::new(b"a/b/")));
241 245 eprintln!("{:?}", map);
242 246 assert_eq!(Ok(()), map.delete_path(HgPath::new(b"a/b/")));
243 247 eprintln!("{:?}", map);
244 248 assert_eq!(
245 249 Err(DirstateMapError::PathNotFound(HgPathBuf::from_bytes(
246 250 b"a/b/"
247 251 ))),
248 252 map.delete_path(HgPath::new(b"a/b/"))
249 253 );
250 254
251 255 assert_eq!(2, *map.inner.get(HgPath::new(b"a")).unwrap());
252 256 assert_eq!(1, *map.inner.get(HgPath::new(b"a/c")).unwrap());
253 257 eprintln!("{:?}", map);
254 258 assert_eq!(Ok(()), map.delete_path(HgPath::new(b"a/")));
255 259 eprintln!("{:?}", map);
256 260
257 261 assert_eq!(Ok(()), map.delete_path(HgPath::new(b"a/c/")));
258 262 assert_eq!(
259 263 Err(DirstateMapError::PathNotFound(HgPathBuf::from_bytes(
260 264 b"a/c/"
261 265 ))),
262 266 map.delete_path(HgPath::new(b"a/c/"))
263 267 );
264 268 }
265 269
266 270 #[test]
267 271 fn test_add_path_empty_path() {
268 272 let manifest: Vec<HgPathBuf> = vec![];
269 273 let mut map = DirsMultiset::from_manifest(&manifest).unwrap();
270 274 let path = HgPath::new(b"");
271 275 map.add_path(path).unwrap();
272 276
273 277 assert_eq!(1, map.len());
274 278 }
275 279
276 280 #[test]
277 281 fn test_add_path_successful() {
278 282 let manifest: Vec<HgPathBuf> = vec![];
279 283 let mut map = DirsMultiset::from_manifest(&manifest).unwrap();
280 284
281 285 map.add_path(HgPath::new(b"a/")).unwrap();
282 286 assert_eq!(1, *map.inner.get(HgPath::new(b"a")).unwrap());
283 287 assert_eq!(1, *map.inner.get(HgPath::new(b"")).unwrap());
284 288 assert_eq!(2, map.len());
285 289
286 290 // Non directory should be ignored
287 291 map.add_path(HgPath::new(b"a")).unwrap();
288 292 assert_eq!(1, *map.inner.get(HgPath::new(b"a")).unwrap());
289 293 assert_eq!(2, map.len());
290 294
291 295 // Non directory will still add its base
292 296 map.add_path(HgPath::new(b"a/b")).unwrap();
293 297 assert_eq!(2, *map.inner.get(HgPath::new(b"a")).unwrap());
294 298 assert_eq!(2, map.len());
295 299
296 300 // Duplicate path works
297 301 map.add_path(HgPath::new(b"a/")).unwrap();
298 302 assert_eq!(3, *map.inner.get(HgPath::new(b"a")).unwrap());
299 303
300 304 // Nested dir adds to its base
301 305 map.add_path(HgPath::new(b"a/b/")).unwrap();
302 306 assert_eq!(4, *map.inner.get(HgPath::new(b"a")).unwrap());
303 307 assert_eq!(1, *map.inner.get(HgPath::new(b"a/b")).unwrap());
304 308
305 309 // but not its base's base, because it already existed
306 310 map.add_path(HgPath::new(b"a/b/c/")).unwrap();
307 311 assert_eq!(4, *map.inner.get(HgPath::new(b"a")).unwrap());
308 312 assert_eq!(2, *map.inner.get(HgPath::new(b"a/b")).unwrap());
309 313
310 314 map.add_path(HgPath::new(b"a/c/")).unwrap();
311 315 assert_eq!(1, *map.inner.get(HgPath::new(b"a/c")).unwrap());
312 316
313 317 let expected = DirsMultiset {
314 318 inner: [("", 2), ("a", 5), ("a/b", 2), ("a/b/c", 1), ("a/c", 1)]
315 319 .iter()
316 320 .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
317 321 .collect(),
318 322 };
319 323 assert_eq!(map, expected);
320 324 }
321 325
322 326 #[test]
323 327 fn test_dirsmultiset_new_empty() {
324 328 let manifest: Vec<HgPathBuf> = vec![];
325 329 let new = DirsMultiset::from_manifest(&manifest).unwrap();
326 330 let expected = DirsMultiset {
327 331 inner: FastHashMap::default(),
328 332 };
329 333 assert_eq!(expected, new);
330 334
331 335 let new = DirsMultiset::from_dirstate(&FastHashMap::default(), None)
332 336 .unwrap();
333 337 let expected = DirsMultiset {
334 338 inner: FastHashMap::default(),
335 339 };
336 340 assert_eq!(expected, new);
337 341 }
338 342
339 343 #[test]
340 344 fn test_dirsmultiset_new_no_skip() {
341 345 let input_vec: Vec<HgPathBuf> = ["a/", "b/", "a/c", "a/d/"]
342 346 .iter()
343 347 .map(|e| HgPathBuf::from_bytes(e.as_bytes()))
344 348 .collect();
345 349 let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
346 350 .iter()
347 351 .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
348 352 .collect();
349 353
350 354 let new = DirsMultiset::from_manifest(&input_vec).unwrap();
351 355 let expected = DirsMultiset {
352 356 inner: expected_inner,
353 357 };
354 358 assert_eq!(expected, new);
355 359
356 360 let input_map = ["a/", "b/", "a/c", "a/d/"]
357 361 .iter()
358 362 .map(|f| {
359 363 (
360 364 HgPathBuf::from_bytes(f.as_bytes()),
361 365 DirstateEntry {
362 366 state: EntryState::Normal,
363 367 mode: 0,
364 368 mtime: 0,
365 369 size: 0,
366 370 },
367 371 )
368 372 })
369 373 .collect();
370 374 let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
371 375 .iter()
372 376 .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
373 377 .collect();
374 378
375 379 let new = DirsMultiset::from_dirstate(&input_map, None).unwrap();
376 380 let expected = DirsMultiset {
377 381 inner: expected_inner,
378 382 };
379 383 assert_eq!(expected, new);
380 384 }
381 385
382 386 #[test]
383 387 fn test_dirsmultiset_new_skip() {
384 388 let input_map = [
385 389 ("a/", EntryState::Normal),
386 390 ("a/b/", EntryState::Normal),
387 391 ("a/c", EntryState::Removed),
388 392 ("a/d/", EntryState::Merged),
389 393 ]
390 394 .iter()
391 395 .map(|(f, state)| {
392 396 (
393 397 HgPathBuf::from_bytes(f.as_bytes()),
394 398 DirstateEntry {
395 399 state: *state,
396 400 mode: 0,
397 401 mtime: 0,
398 402 size: 0,
399 403 },
400 404 )
401 405 })
402 406 .collect();
403 407
404 408 // "a" incremented with "a/c" and "a/d/"
405 409 let expected_inner = [("", 1), ("a", 2), ("a/d", 1)]
406 410 .iter()
407 411 .map(|(k, v)| (HgPathBuf::from_bytes(k.as_bytes()), *v))
408 412 .collect();
409 413
410 414 let new =
411 415 DirsMultiset::from_dirstate(&input_map, Some(EntryState::Normal))
412 416 .unwrap();
413 417 let expected = DirsMultiset {
414 418 inner: expected_inner,
415 419 };
416 420 assert_eq!(expected, new);
417 421 }
418 422 }
@@ -1,497 +1,497 b''
1 1 // dirstate_map.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use crate::{
9 9 dirstate::{parsers::PARENT_SIZE, EntryState, SIZE_FROM_OTHER_PARENT},
10 10 pack_dirstate, parse_dirstate,
11 11 utils::{
12 12 files::normalize_case,
13 13 hg_path::{HgPath, HgPathBuf},
14 14 },
15 15 CopyMap, DirsMultiset, DirstateEntry, DirstateError, DirstateMapError,
16 16 DirstateParents, DirstateParseError, FastHashMap, StateMap,
17 17 };
18 18 use core::borrow::Borrow;
19 19 use std::collections::HashSet;
20 20 use std::convert::TryInto;
21 21 use std::iter::FromIterator;
22 22 use std::ops::Deref;
23 23 use std::time::Duration;
24 24
25 25 pub type FileFoldMap = FastHashMap<HgPathBuf, HgPathBuf>;
26 26
27 27 const NULL_ID: [u8; 20] = [0; 20];
28 28 const MTIME_UNSET: i32 = -1;
29 29
30 30 #[derive(Default)]
31 31 pub struct DirstateMap {
32 32 state_map: StateMap,
33 33 pub copy_map: CopyMap,
34 34 file_fold_map: Option<FileFoldMap>,
35 35 pub dirs: Option<DirsMultiset>,
36 36 pub all_dirs: Option<DirsMultiset>,
37 37 non_normal_set: Option<HashSet<HgPathBuf>>,
38 38 other_parent_set: Option<HashSet<HgPathBuf>>,
39 39 parents: Option<DirstateParents>,
40 40 dirty_parents: bool,
41 41 }
42 42
43 43 /// Should only really be used in python interface code, for clarity
44 44 impl Deref for DirstateMap {
45 45 type Target = StateMap;
46 46
47 47 fn deref(&self) -> &Self::Target {
48 48 &self.state_map
49 49 }
50 50 }
51 51
52 52 impl FromIterator<(HgPathBuf, DirstateEntry)> for DirstateMap {
53 53 fn from_iter<I: IntoIterator<Item = (HgPathBuf, DirstateEntry)>>(
54 54 iter: I,
55 55 ) -> Self {
56 56 Self {
57 57 state_map: iter.into_iter().collect(),
58 58 ..Self::default()
59 59 }
60 60 }
61 61 }
62 62
63 63 impl DirstateMap {
64 64 pub fn new() -> Self {
65 65 Self::default()
66 66 }
67 67
68 68 pub fn clear(&mut self) {
69 69 self.state_map.clear();
70 70 self.copy_map.clear();
71 71 self.file_fold_map = None;
72 72 self.non_normal_set = None;
73 73 self.other_parent_set = None;
74 74 self.set_parents(&DirstateParents {
75 75 p1: NULL_ID,
76 76 p2: NULL_ID,
77 77 })
78 78 }
79 79
80 80 /// Add a tracked file to the dirstate
81 81 pub fn add_file(
82 82 &mut self,
83 83 filename: &HgPath,
84 84 old_state: EntryState,
85 85 entry: DirstateEntry,
86 86 ) -> Result<(), DirstateMapError> {
87 87 if old_state == EntryState::Unknown || old_state == EntryState::Removed
88 88 {
89 89 if let Some(ref mut dirs) = self.dirs {
90 90 dirs.add_path(filename)?;
91 91 }
92 92 }
93 93 if old_state == EntryState::Unknown {
94 94 if let Some(ref mut all_dirs) = self.all_dirs {
95 95 all_dirs.add_path(filename)?;
96 96 }
97 97 }
98 98 self.state_map.insert(filename.to_owned(), entry.to_owned());
99 99
100 100 if entry.state != EntryState::Normal || entry.mtime == MTIME_UNSET {
101 101 self.get_non_normal_other_parent_entries()
102 102 .0
103 103 .insert(filename.to_owned());
104 104 }
105 105
106 106 if entry.size == SIZE_FROM_OTHER_PARENT {
107 107 self.get_non_normal_other_parent_entries()
108 108 .1
109 109 .insert(filename.to_owned());
110 110 }
111 111 Ok(())
112 112 }
113 113
114 114 /// Mark a file as removed in the dirstate.
115 115 ///
116 116 /// The `size` parameter is used to store sentinel values that indicate
117 117 /// the file's previous state. In the future, we should refactor this
118 118 /// to be more explicit about what that state is.
119 119 pub fn remove_file(
120 120 &mut self,
121 121 filename: &HgPath,
122 122 old_state: EntryState,
123 123 size: i32,
124 124 ) -> Result<(), DirstateMapError> {
125 125 if old_state != EntryState::Unknown && old_state != EntryState::Removed
126 126 {
127 127 if let Some(ref mut dirs) = self.dirs {
128 128 dirs.delete_path(filename)?;
129 129 }
130 130 }
131 131 if old_state == EntryState::Unknown {
132 132 if let Some(ref mut all_dirs) = self.all_dirs {
133 133 all_dirs.add_path(filename)?;
134 134 }
135 135 }
136 136
137 137 if let Some(ref mut file_fold_map) = self.file_fold_map {
138 138 file_fold_map.remove(&normalize_case(filename));
139 139 }
140 140 self.state_map.insert(
141 141 filename.to_owned(),
142 142 DirstateEntry {
143 143 state: EntryState::Removed,
144 144 mode: 0,
145 145 size,
146 146 mtime: 0,
147 147 },
148 148 );
149 149 self.get_non_normal_other_parent_entries()
150 150 .0
151 151 .insert(filename.to_owned());
152 152 Ok(())
153 153 }
154 154
155 155 /// Remove a file from the dirstate.
156 156 /// Returns `true` if the file was previously recorded.
157 157 pub fn drop_file(
158 158 &mut self,
159 159 filename: &HgPath,
160 160 old_state: EntryState,
161 161 ) -> Result<bool, DirstateMapError> {
162 162 let exists = self.state_map.remove(filename).is_some();
163 163
164 164 if exists {
165 165 if old_state != EntryState::Removed {
166 166 if let Some(ref mut dirs) = self.dirs {
167 167 dirs.delete_path(filename)?;
168 168 }
169 169 }
170 170 if let Some(ref mut all_dirs) = self.all_dirs {
171 171 all_dirs.delete_path(filename)?;
172 172 }
173 173 }
174 174 if let Some(ref mut file_fold_map) = self.file_fold_map {
175 175 file_fold_map.remove(&normalize_case(filename));
176 176 }
177 177 self.get_non_normal_other_parent_entries()
178 178 .0
179 179 .remove(filename);
180 180
181 181 Ok(exists)
182 182 }
183 183
184 184 pub fn clear_ambiguous_times(
185 185 &mut self,
186 186 filenames: Vec<HgPathBuf>,
187 187 now: i32,
188 188 ) {
189 189 for filename in filenames {
190 190 let mut changed = false;
191 191 self.state_map
192 192 .entry(filename.to_owned())
193 193 .and_modify(|entry| {
194 194 if entry.state == EntryState::Normal && entry.mtime == now
195 195 {
196 196 changed = true;
197 197 *entry = DirstateEntry {
198 198 mtime: MTIME_UNSET,
199 199 ..*entry
200 200 };
201 201 }
202 202 });
203 203 if changed {
204 204 self.get_non_normal_other_parent_entries()
205 205 .0
206 206 .insert(filename.to_owned());
207 207 }
208 208 }
209 209 }
210 210
211 211 pub fn non_normal_entries_remove(
212 212 &mut self,
213 213 key: impl AsRef<HgPath>,
214 214 ) -> bool {
215 215 self.get_non_normal_other_parent_entries()
216 216 .0
217 217 .remove(key.as_ref())
218 218 }
219 219 pub fn non_normal_entries_union(
220 220 &mut self,
221 221 other: HashSet<HgPathBuf>,
222 222 ) -> Vec<HgPathBuf> {
223 223 self.get_non_normal_other_parent_entries()
224 224 .0
225 225 .union(&other)
226 .map(|e| e.to_owned())
226 .map(ToOwned::to_owned)
227 227 .collect()
228 228 }
229 229
230 230 pub fn get_non_normal_other_parent_entries(
231 231 &mut self,
232 232 ) -> (&mut HashSet<HgPathBuf>, &mut HashSet<HgPathBuf>) {
233 233 self.set_non_normal_other_parent_entries(false);
234 234 (
235 235 self.non_normal_set.as_mut().unwrap(),
236 236 self.other_parent_set.as_mut().unwrap(),
237 237 )
238 238 }
239 239
240 240 /// Useful to get immutable references to those sets in contexts where
241 241 /// you only have an immutable reference to the `DirstateMap`, like when
242 242 /// sharing references with Python.
243 243 ///
244 244 /// TODO, get rid of this along with the other "setter/getter" stuff when
245 245 /// a nice typestate plan is defined.
246 246 ///
247 247 /// # Panics
248 248 ///
249 249 /// Will panic if either set is `None`.
250 250 pub fn get_non_normal_other_parent_entries_panic(
251 251 &self,
252 252 ) -> (&HashSet<HgPathBuf>, &HashSet<HgPathBuf>) {
253 253 (
254 254 self.non_normal_set.as_ref().unwrap(),
255 255 self.other_parent_set.as_ref().unwrap(),
256 256 )
257 257 }
258 258
259 259 pub fn set_non_normal_other_parent_entries(&mut self, force: bool) {
260 260 if !force
261 261 && self.non_normal_set.is_some()
262 262 && self.other_parent_set.is_some()
263 263 {
264 264 return;
265 265 }
266 266 let mut non_normal = HashSet::new();
267 267 let mut other_parent = HashSet::new();
268 268
269 269 for (
270 270 filename,
271 271 DirstateEntry {
272 272 state, size, mtime, ..
273 273 },
274 274 ) in self.state_map.iter()
275 275 {
276 276 if *state != EntryState::Normal || *mtime == MTIME_UNSET {
277 277 non_normal.insert(filename.to_owned());
278 278 }
279 279 if *state == EntryState::Normal && *size == SIZE_FROM_OTHER_PARENT
280 280 {
281 281 other_parent.insert(filename.to_owned());
282 282 }
283 283 }
284 284 self.non_normal_set = Some(non_normal);
285 285 self.other_parent_set = Some(other_parent);
286 286 }
287 287
288 288 /// Both of these setters and their uses appear to be the simplest way to
289 289 /// emulate a Python lazy property, but it is ugly and unidiomatic.
290 290 /// TODO One day, rewriting this struct using the typestate might be a
291 291 /// good idea.
292 292 pub fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> {
293 293 if self.all_dirs.is_none() {
294 294 self.all_dirs =
295 295 Some(DirsMultiset::from_dirstate(&self.state_map, None)?);
296 296 }
297 297 Ok(())
298 298 }
299 299
300 300 pub fn set_dirs(&mut self) -> Result<(), DirstateMapError> {
301 301 if self.dirs.is_none() {
302 302 self.dirs = Some(DirsMultiset::from_dirstate(
303 303 &self.state_map,
304 304 Some(EntryState::Removed),
305 305 )?);
306 306 }
307 307 Ok(())
308 308 }
309 309
310 310 pub fn has_tracked_dir(
311 311 &mut self,
312 312 directory: &HgPath,
313 313 ) -> Result<bool, DirstateMapError> {
314 314 self.set_dirs()?;
315 315 Ok(self.dirs.as_ref().unwrap().contains(directory))
316 316 }
317 317
318 318 pub fn has_dir(
319 319 &mut self,
320 320 directory: &HgPath,
321 321 ) -> Result<bool, DirstateMapError> {
322 322 self.set_all_dirs()?;
323 323 Ok(self.all_dirs.as_ref().unwrap().contains(directory))
324 324 }
325 325
326 326 pub fn parents(
327 327 &mut self,
328 328 file_contents: &[u8],
329 329 ) -> Result<&DirstateParents, DirstateError> {
330 330 if let Some(ref parents) = self.parents {
331 331 return Ok(parents);
332 332 }
333 333 let parents;
334 334 if file_contents.len() == PARENT_SIZE * 2 {
335 335 parents = DirstateParents {
336 336 p1: file_contents[..PARENT_SIZE].try_into().unwrap(),
337 337 p2: file_contents[PARENT_SIZE..PARENT_SIZE * 2]
338 338 .try_into()
339 339 .unwrap(),
340 340 };
341 341 } else if file_contents.is_empty() {
342 342 parents = DirstateParents {
343 343 p1: NULL_ID,
344 344 p2: NULL_ID,
345 345 };
346 346 } else {
347 347 return Err(DirstateError::Parse(DirstateParseError::Damaged));
348 348 }
349 349
350 350 self.parents = Some(parents);
351 351 Ok(self.parents.as_ref().unwrap())
352 352 }
353 353
354 354 pub fn set_parents(&mut self, parents: &DirstateParents) {
355 355 self.parents = Some(parents.clone());
356 356 self.dirty_parents = true;
357 357 }
358 358
359 359 pub fn read(
360 360 &mut self,
361 361 file_contents: &[u8],
362 362 ) -> Result<Option<DirstateParents>, DirstateError> {
363 363 if file_contents.is_empty() {
364 364 return Ok(None);
365 365 }
366 366
367 367 let parents = parse_dirstate(
368 368 &mut self.state_map,
369 369 &mut self.copy_map,
370 370 file_contents,
371 371 )?;
372 372
373 373 if !self.dirty_parents {
374 374 self.set_parents(&parents);
375 375 }
376 376
377 377 Ok(Some(parents))
378 378 }
379 379
380 380 pub fn pack(
381 381 &mut self,
382 382 parents: DirstateParents,
383 383 now: Duration,
384 384 ) -> Result<Vec<u8>, DirstateError> {
385 385 let packed =
386 386 pack_dirstate(&mut self.state_map, &self.copy_map, parents, now)?;
387 387
388 388 self.dirty_parents = false;
389 389
390 390 self.set_non_normal_other_parent_entries(true);
391 391 Ok(packed)
392 392 }
393 393
394 394 pub fn build_file_fold_map(&mut self) -> &FileFoldMap {
395 395 if let Some(ref file_fold_map) = self.file_fold_map {
396 396 return file_fold_map;
397 397 }
398 398 let mut new_file_fold_map = FileFoldMap::default();
399 399 for (filename, DirstateEntry { state, .. }) in self.state_map.borrow()
400 400 {
401 401 if *state == EntryState::Removed {
402 402 new_file_fold_map
403 403 .insert(normalize_case(filename), filename.to_owned());
404 404 }
405 405 }
406 406 self.file_fold_map = Some(new_file_fold_map);
407 407 self.file_fold_map.as_ref().unwrap()
408 408 }
409 409 }
410 410
411 411 #[cfg(test)]
412 412 mod tests {
413 413 use super::*;
414 414
415 415 #[test]
416 416 fn test_dirs_multiset() {
417 417 let mut map = DirstateMap::new();
418 418 assert!(map.dirs.is_none());
419 419 assert!(map.all_dirs.is_none());
420 420
421 421 assert_eq!(map.has_dir(HgPath::new(b"nope")).unwrap(), false);
422 422 assert!(map.all_dirs.is_some());
423 423 assert!(map.dirs.is_none());
424 424
425 425 assert_eq!(map.has_tracked_dir(HgPath::new(b"nope")).unwrap(), false);
426 426 assert!(map.dirs.is_some());
427 427 }
428 428
429 429 #[test]
430 430 fn test_add_file() {
431 431 let mut map = DirstateMap::new();
432 432
433 433 assert_eq!(0, map.len());
434 434
435 435 map.add_file(
436 436 HgPath::new(b"meh"),
437 437 EntryState::Normal,
438 438 DirstateEntry {
439 439 state: EntryState::Normal,
440 440 mode: 1337,
441 441 mtime: 1337,
442 442 size: 1337,
443 443 },
444 444 )
445 445 .unwrap();
446 446
447 447 assert_eq!(1, map.len());
448 448 assert_eq!(0, map.get_non_normal_other_parent_entries().0.len());
449 449 assert_eq!(0, map.get_non_normal_other_parent_entries().1.len());
450 450 }
451 451
452 452 #[test]
453 453 fn test_non_normal_other_parent_entries() {
454 454 let mut map: DirstateMap = [
455 455 (b"f1", (EntryState::Removed, 1337, 1337, 1337)),
456 456 (b"f2", (EntryState::Normal, 1337, 1337, -1)),
457 457 (b"f3", (EntryState::Normal, 1337, 1337, 1337)),
458 458 (b"f4", (EntryState::Normal, 1337, -2, 1337)),
459 459 (b"f5", (EntryState::Added, 1337, 1337, 1337)),
460 460 (b"f6", (EntryState::Added, 1337, 1337, -1)),
461 461 (b"f7", (EntryState::Merged, 1337, 1337, -1)),
462 462 (b"f8", (EntryState::Merged, 1337, 1337, 1337)),
463 463 (b"f9", (EntryState::Merged, 1337, -2, 1337)),
464 464 (b"fa", (EntryState::Added, 1337, -2, 1337)),
465 465 (b"fb", (EntryState::Removed, 1337, -2, 1337)),
466 466 ]
467 467 .iter()
468 468 .map(|(fname, (state, mode, size, mtime))| {
469 469 (
470 470 HgPathBuf::from_bytes(fname.as_ref()),
471 471 DirstateEntry {
472 472 state: *state,
473 473 mode: *mode,
474 474 size: *size,
475 475 mtime: *mtime,
476 476 },
477 477 )
478 478 })
479 479 .collect();
480 480
481 481 let mut non_normal = [
482 482 b"f1", b"f2", b"f5", b"f6", b"f7", b"f8", b"f9", b"fa", b"fb",
483 483 ]
484 484 .iter()
485 485 .map(|x| HgPathBuf::from_bytes(x.as_ref()))
486 486 .collect();
487 487
488 488 let mut other_parent = HashSet::new();
489 489 other_parent.insert(HgPathBuf::from_bytes(b"f4"));
490 490 let entries = map.get_non_normal_other_parent_entries();
491 491
492 492 assert_eq!(
493 493 (&mut non_normal, &mut other_parent),
494 494 (entries.0, entries.1)
495 495 );
496 496 }
497 497 }
@@ -1,435 +1,435 b''
1 1 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
2 2 //
3 3 // This software may be used and distributed according to the terms of the
4 4 // GNU General Public License version 2 or any later version.
5 5
6 6 use crate::utils::hg_path::HgPath;
7 7 use crate::{
8 8 dirstate::{CopyMap, EntryState, StateMap},
9 9 DirstateEntry, DirstatePackError, DirstateParents, DirstateParseError,
10 10 };
11 11 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
12 12 use micro_timer::timed;
13 13 use std::convert::{TryFrom, TryInto};
14 14 use std::io::Cursor;
15 15 use std::time::Duration;
16 16
17 17 /// Parents are stored in the dirstate as byte hashes.
18 18 pub const PARENT_SIZE: usize = 20;
19 19 /// Dirstate entries have a static part of 8 + 32 + 32 + 32 + 32 bits.
20 20 const MIN_ENTRY_SIZE: usize = 17;
21 21
22 22 // TODO parse/pack: is mutate-on-loop better for performance?
23 23
24 24 #[timed]
25 25 pub fn parse_dirstate(
26 26 state_map: &mut StateMap,
27 27 copy_map: &mut CopyMap,
28 28 contents: &[u8],
29 29 ) -> Result<DirstateParents, DirstateParseError> {
30 30 if contents.len() < PARENT_SIZE * 2 {
31 31 return Err(DirstateParseError::TooLittleData);
32 32 }
33 33
34 34 let mut curr_pos = PARENT_SIZE * 2;
35 35 let parents = DirstateParents {
36 36 p1: contents[..PARENT_SIZE].try_into().unwrap(),
37 37 p2: contents[PARENT_SIZE..curr_pos].try_into().unwrap(),
38 38 };
39 39
40 40 while curr_pos < contents.len() {
41 41 if curr_pos + MIN_ENTRY_SIZE > contents.len() {
42 42 return Err(DirstateParseError::Overflow);
43 43 }
44 44 let entry_bytes = &contents[curr_pos..];
45 45
46 46 let mut cursor = Cursor::new(entry_bytes);
47 47 let state = EntryState::try_from(cursor.read_u8()?)?;
48 48 let mode = cursor.read_i32::<BigEndian>()?;
49 49 let size = cursor.read_i32::<BigEndian>()?;
50 50 let mtime = cursor.read_i32::<BigEndian>()?;
51 51 let path_len = cursor.read_i32::<BigEndian>()? as usize;
52 52
53 53 if path_len > contents.len() - curr_pos {
54 54 return Err(DirstateParseError::Overflow);
55 55 }
56 56
57 57 // Slice instead of allocating a Vec needed for `read_exact`
58 58 let path = &entry_bytes[MIN_ENTRY_SIZE..MIN_ENTRY_SIZE + (path_len)];
59 59
60 60 let (path, copy) = match memchr::memchr(0, path) {
61 61 None => (path, None),
62 62 Some(i) => (&path[..i], Some(&path[(i + 1)..])),
63 63 };
64 64
65 65 if let Some(copy_path) = copy {
66 66 copy_map.insert(
67 67 HgPath::new(path).to_owned(),
68 68 HgPath::new(copy_path).to_owned(),
69 69 );
70 70 };
71 71 state_map.insert(
72 72 HgPath::new(path).to_owned(),
73 73 DirstateEntry {
74 74 state,
75 75 mode,
76 76 size,
77 77 mtime,
78 78 },
79 79 );
80 80 curr_pos = curr_pos + MIN_ENTRY_SIZE + (path_len);
81 81 }
82 82
83 83 Ok(parents)
84 84 }
85 85
86 86 /// `now` is the duration in seconds since the Unix epoch
87 87 pub fn pack_dirstate(
88 88 state_map: &mut StateMap,
89 89 copy_map: &CopyMap,
90 90 parents: DirstateParents,
91 91 now: Duration,
92 92 ) -> Result<Vec<u8>, DirstatePackError> {
93 93 // TODO move away from i32 before 2038.
94 94 let now: i32 = now.as_secs().try_into().expect("time overflow");
95 95
96 96 let expected_size: usize = state_map
97 97 .iter()
98 98 .map(|(filename, _)| {
99 99 let mut length = MIN_ENTRY_SIZE + filename.len();
100 100 if let Some(copy) = copy_map.get(filename) {
101 101 length += copy.len() + 1;
102 102 }
103 103 length
104 104 })
105 105 .sum();
106 106 let expected_size = expected_size + PARENT_SIZE * 2;
107 107
108 108 let mut packed = Vec::with_capacity(expected_size);
109 109 let mut new_state_map = vec![];
110 110
111 111 packed.extend(&parents.p1);
112 112 packed.extend(&parents.p2);
113 113
114 114 for (filename, entry) in state_map.iter() {
115 115 let new_filename = filename.to_owned();
116 116 let mut new_mtime: i32 = entry.mtime;
117 117 if entry.state == EntryState::Normal && entry.mtime == now {
118 118 // The file was last modified "simultaneously" with the current
119 119 // write to dirstate (i.e. within the same second for file-
120 120 // systems with a granularity of 1 sec). This commonly happens
121 121 // for at least a couple of files on 'update'.
122 122 // The user could change the file without changing its size
123 123 // within the same second. Invalidate the file's mtime in
124 124 // dirstate, forcing future 'status' calls to compare the
125 125 // contents of the file if the size is the same. This prevents
126 126 // mistakenly treating such files as clean.
127 127 new_mtime = -1;
128 128 new_state_map.push((
129 129 filename.to_owned(),
130 130 DirstateEntry {
131 131 mtime: new_mtime,
132 132 ..*entry
133 133 },
134 134 ));
135 135 }
136 136 let mut new_filename = new_filename.into_vec();
137 137 if let Some(copy) = copy_map.get(filename) {
138 new_filename.push('\0' as u8);
138 new_filename.push(b'\0');
139 139 new_filename.extend(copy.bytes());
140 140 }
141 141
142 142 packed.write_u8(entry.state.into())?;
143 143 packed.write_i32::<BigEndian>(entry.mode)?;
144 144 packed.write_i32::<BigEndian>(entry.size)?;
145 145 packed.write_i32::<BigEndian>(new_mtime)?;
146 146 packed.write_i32::<BigEndian>(new_filename.len() as i32)?;
147 147 packed.extend(new_filename)
148 148 }
149 149
150 150 if packed.len() != expected_size {
151 151 return Err(DirstatePackError::BadSize(expected_size, packed.len()));
152 152 }
153 153
154 154 state_map.extend(new_state_map);
155 155
156 156 Ok(packed)
157 157 }
158 158
159 159 #[cfg(test)]
160 160 mod tests {
161 161 use super::*;
162 162 use crate::{utils::hg_path::HgPathBuf, FastHashMap};
163 163
164 164 #[test]
165 165 fn test_pack_dirstate_empty() {
166 166 let mut state_map: StateMap = FastHashMap::default();
167 167 let copymap = FastHashMap::default();
168 168 let parents = DirstateParents {
169 169 p1: *b"12345678910111213141",
170 170 p2: *b"00000000000000000000",
171 171 };
172 172 let now = Duration::new(15000000, 0);
173 173 let expected = b"1234567891011121314100000000000000000000".to_vec();
174 174
175 175 assert_eq!(
176 176 expected,
177 177 pack_dirstate(&mut state_map, &copymap, parents, now).unwrap()
178 178 );
179 179
180 180 assert!(state_map.is_empty())
181 181 }
182 182 #[test]
183 183 fn test_pack_dirstate_one_entry() {
184 184 let expected_state_map: StateMap = [(
185 185 HgPathBuf::from_bytes(b"f1"),
186 186 DirstateEntry {
187 187 state: EntryState::Normal,
188 188 mode: 0o644,
189 189 size: 0,
190 190 mtime: 791231220,
191 191 },
192 192 )]
193 193 .iter()
194 194 .cloned()
195 195 .collect();
196 196 let mut state_map = expected_state_map.clone();
197 197
198 198 let copymap = FastHashMap::default();
199 199 let parents = DirstateParents {
200 200 p1: *b"12345678910111213141",
201 201 p2: *b"00000000000000000000",
202 202 };
203 203 let now = Duration::new(15000000, 0);
204 204 let expected = [
205 205 49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50, 49,
206 206 51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
207 207 48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0, 0, 0, 0, 47,
208 208 41, 58, 244, 0, 0, 0, 2, 102, 49,
209 209 ]
210 210 .to_vec();
211 211
212 212 assert_eq!(
213 213 expected,
214 214 pack_dirstate(&mut state_map, &copymap, parents, now).unwrap()
215 215 );
216 216
217 217 assert_eq!(expected_state_map, state_map);
218 218 }
219 219 #[test]
220 220 fn test_pack_dirstate_one_entry_with_copy() {
221 221 let expected_state_map: StateMap = [(
222 222 HgPathBuf::from_bytes(b"f1"),
223 223 DirstateEntry {
224 224 state: EntryState::Normal,
225 225 mode: 0o644,
226 226 size: 0,
227 227 mtime: 791231220,
228 228 },
229 229 )]
230 230 .iter()
231 231 .cloned()
232 232 .collect();
233 233 let mut state_map = expected_state_map.clone();
234 234 let mut copymap = FastHashMap::default();
235 235 copymap.insert(
236 236 HgPathBuf::from_bytes(b"f1"),
237 237 HgPathBuf::from_bytes(b"copyname"),
238 238 );
239 239 let parents = DirstateParents {
240 240 p1: *b"12345678910111213141",
241 241 p2: *b"00000000000000000000",
242 242 };
243 243 let now = Duration::new(15000000, 0);
244 244 let expected = [
245 245 49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50, 49,
246 246 51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
247 247 48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0, 0, 0, 0, 47,
248 248 41, 58, 244, 0, 0, 0, 11, 102, 49, 0, 99, 111, 112, 121, 110, 97,
249 249 109, 101,
250 250 ]
251 251 .to_vec();
252 252
253 253 assert_eq!(
254 254 expected,
255 255 pack_dirstate(&mut state_map, &copymap, parents, now).unwrap()
256 256 );
257 257 assert_eq!(expected_state_map, state_map);
258 258 }
259 259
260 260 #[test]
261 261 fn test_parse_pack_one_entry_with_copy() {
262 262 let mut state_map: StateMap = [(
263 263 HgPathBuf::from_bytes(b"f1"),
264 264 DirstateEntry {
265 265 state: EntryState::Normal,
266 266 mode: 0o644,
267 267 size: 0,
268 268 mtime: 791231220,
269 269 },
270 270 )]
271 271 .iter()
272 272 .cloned()
273 273 .collect();
274 274 let mut copymap = FastHashMap::default();
275 275 copymap.insert(
276 276 HgPathBuf::from_bytes(b"f1"),
277 277 HgPathBuf::from_bytes(b"copyname"),
278 278 );
279 279 let parents = DirstateParents {
280 280 p1: *b"12345678910111213141",
281 281 p2: *b"00000000000000000000",
282 282 };
283 283 let now = Duration::new(15000000, 0);
284 284 let result =
285 285 pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
286 286 .unwrap();
287 287
288 288 let mut new_state_map: StateMap = FastHashMap::default();
289 289 let mut new_copy_map: CopyMap = FastHashMap::default();
290 290 let new_parents = parse_dirstate(
291 291 &mut new_state_map,
292 292 &mut new_copy_map,
293 293 result.as_slice(),
294 294 )
295 295 .unwrap();
296 296 assert_eq!(
297 297 (parents, state_map, copymap),
298 298 (new_parents, new_state_map, new_copy_map)
299 299 )
300 300 }
301 301
302 302 #[test]
303 303 fn test_parse_pack_multiple_entries_with_copy() {
304 304 let mut state_map: StateMap = [
305 305 (
306 306 HgPathBuf::from_bytes(b"f1"),
307 307 DirstateEntry {
308 308 state: EntryState::Normal,
309 309 mode: 0o644,
310 310 size: 0,
311 311 mtime: 791231220,
312 312 },
313 313 ),
314 314 (
315 315 HgPathBuf::from_bytes(b"f2"),
316 316 DirstateEntry {
317 317 state: EntryState::Merged,
318 318 mode: 0o777,
319 319 size: 1000,
320 320 mtime: 791231220,
321 321 },
322 322 ),
323 323 (
324 324 HgPathBuf::from_bytes(b"f3"),
325 325 DirstateEntry {
326 326 state: EntryState::Removed,
327 327 mode: 0o644,
328 328 size: 234553,
329 329 mtime: 791231220,
330 330 },
331 331 ),
332 332 (
333 333 HgPathBuf::from_bytes(b"f4\xF6"),
334 334 DirstateEntry {
335 335 state: EntryState::Added,
336 336 mode: 0o644,
337 337 size: -1,
338 338 mtime: -1,
339 339 },
340 340 ),
341 341 ]
342 342 .iter()
343 343 .cloned()
344 344 .collect();
345 345 let mut copymap = FastHashMap::default();
346 346 copymap.insert(
347 347 HgPathBuf::from_bytes(b"f1"),
348 348 HgPathBuf::from_bytes(b"copyname"),
349 349 );
350 350 copymap.insert(
351 351 HgPathBuf::from_bytes(b"f4\xF6"),
352 352 HgPathBuf::from_bytes(b"copyname2"),
353 353 );
354 354 let parents = DirstateParents {
355 355 p1: *b"12345678910111213141",
356 356 p2: *b"00000000000000000000",
357 357 };
358 358 let now = Duration::new(15000000, 0);
359 359 let result =
360 360 pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
361 361 .unwrap();
362 362
363 363 let mut new_state_map: StateMap = FastHashMap::default();
364 364 let mut new_copy_map: CopyMap = FastHashMap::default();
365 365 let new_parents = parse_dirstate(
366 366 &mut new_state_map,
367 367 &mut new_copy_map,
368 368 result.as_slice(),
369 369 )
370 370 .unwrap();
371 371 assert_eq!(
372 372 (parents, state_map, copymap),
373 373 (new_parents, new_state_map, new_copy_map)
374 374 )
375 375 }
376 376
377 377 #[test]
378 378 /// https://www.mercurial-scm.org/repo/hg/rev/af3f26b6bba4
379 379 fn test_parse_pack_one_entry_with_copy_and_time_conflict() {
380 380 let mut state_map: StateMap = [(
381 381 HgPathBuf::from_bytes(b"f1"),
382 382 DirstateEntry {
383 383 state: EntryState::Normal,
384 384 mode: 0o644,
385 385 size: 0,
386 386 mtime: 15000000,
387 387 },
388 388 )]
389 389 .iter()
390 390 .cloned()
391 391 .collect();
392 392 let mut copymap = FastHashMap::default();
393 393 copymap.insert(
394 394 HgPathBuf::from_bytes(b"f1"),
395 395 HgPathBuf::from_bytes(b"copyname"),
396 396 );
397 397 let parents = DirstateParents {
398 398 p1: *b"12345678910111213141",
399 399 p2: *b"00000000000000000000",
400 400 };
401 401 let now = Duration::new(15000000, 0);
402 402 let result =
403 403 pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
404 404 .unwrap();
405 405
406 406 let mut new_state_map: StateMap = FastHashMap::default();
407 407 let mut new_copy_map: CopyMap = FastHashMap::default();
408 408 let new_parents = parse_dirstate(
409 409 &mut new_state_map,
410 410 &mut new_copy_map,
411 411 result.as_slice(),
412 412 )
413 413 .unwrap();
414 414
415 415 assert_eq!(
416 416 (
417 417 parents,
418 418 [(
419 419 HgPathBuf::from_bytes(b"f1"),
420 420 DirstateEntry {
421 421 state: EntryState::Normal,
422 422 mode: 0o644,
423 423 size: 0,
424 424 mtime: -1
425 425 }
426 426 )]
427 427 .iter()
428 428 .cloned()
429 429 .collect::<StateMap>(),
430 430 copymap,
431 431 ),
432 432 (new_parents, new_state_map, new_copy_map)
433 433 )
434 434 }
435 435 }
@@ -1,956 +1,953 b''
1 1 // status.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Rust implementation of dirstate.status (dirstate.py).
9 9 //! It is currently missing a lot of functionality compared to the Python one
10 10 //! and will only be triggered in narrow cases.
11 11
12 12 use crate::{
13 13 dirstate::SIZE_FROM_OTHER_PARENT,
14 14 filepatterns::PatternFileWarning,
15 15 matchers::{get_ignore_function, Matcher, VisitChildrenSet},
16 16 utils::{
17 17 files::{find_dirs, HgMetadata},
18 18 hg_path::{
19 19 hg_path_to_path_buf, os_string_to_hg_path_buf, HgPath, HgPathBuf,
20 20 HgPathError,
21 21 },
22 22 path_auditor::PathAuditor,
23 23 },
24 24 CopyMap, DirstateEntry, DirstateMap, EntryState, FastHashMap,
25 25 PatternError,
26 26 };
27 27 use lazy_static::lazy_static;
28 28 use micro_timer::timed;
29 29 use rayon::prelude::*;
30 30 use std::{
31 31 borrow::Cow,
32 32 collections::HashSet,
33 33 fs::{read_dir, DirEntry},
34 34 io::ErrorKind,
35 35 ops::Deref,
36 36 path::{Path, PathBuf},
37 37 };
38 38
39 39 /// Wrong type of file from a `BadMatch`
40 40 /// Note: a lot of those don't exist on all platforms.
41 41 #[derive(Debug, Copy, Clone)]
42 42 pub enum BadType {
43 43 CharacterDevice,
44 44 BlockDevice,
45 45 FIFO,
46 46 Socket,
47 47 Directory,
48 48 Unknown,
49 49 }
50 50
51 51 impl ToString for BadType {
52 52 fn to_string(&self) -> String {
53 53 match self {
54 54 BadType::CharacterDevice => "character device",
55 55 BadType::BlockDevice => "block device",
56 56 BadType::FIFO => "fifo",
57 57 BadType::Socket => "socket",
58 58 BadType::Directory => "directory",
59 59 BadType::Unknown => "unknown",
60 60 }
61 61 .to_string()
62 62 }
63 63 }
64 64
65 65 /// Was explicitly matched but cannot be found/accessed
66 66 #[derive(Debug, Copy, Clone)]
67 67 pub enum BadMatch {
68 68 OsError(i32),
69 69 BadType(BadType),
70 70 }
71 71
72 72 /// Marker enum used to dispatch new status entries into the right collections.
73 73 /// Is similar to `crate::EntryState`, but represents the transient state of
74 74 /// entries during the lifetime of a command.
75 75 #[derive(Debug, Copy, Clone)]
76 76 enum Dispatch {
77 77 Unsure,
78 78 Modified,
79 79 Added,
80 80 Removed,
81 81 Deleted,
82 82 Clean,
83 83 Unknown,
84 84 Ignored,
85 85 /// Empty dispatch, the file is not worth listing
86 86 None,
87 87 /// Was explicitly matched but cannot be found/accessed
88 88 Bad(BadMatch),
89 89 Directory {
90 90 /// True if the directory used to be a file in the dmap so we can say
91 91 /// that it's been removed.
92 92 was_file: bool,
93 93 },
94 94 }
95 95
96 96 type IoResult<T> = std::io::Result<T>;
97 97 /// `Box<dyn Trait>` is syntactic sugar for `Box<dyn Trait, 'static>`, so add
98 98 /// an explicit lifetime here to not fight `'static` bounds "out of nowhere".
99 99 type IgnoreFnType<'a> = Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>;
100 100
101 101 /// Dates and times that are outside the 31-bit signed range are compared
102 102 /// modulo 2^31. This should prevent hg from behaving badly with very large
103 103 /// files or corrupt dates while still having a high probability of detecting
104 104 /// changes. (issue2608)
105 105 /// TODO I haven't found a way of having `b` be `Into<i32>`, since `From<u64>`
106 106 /// is not defined for `i32`, and there is no `As` trait. This forces the
107 107 /// caller to cast `b` as `i32`.
108 108 fn mod_compare(a: i32, b: i32) -> bool {
109 109 a & i32::max_value() != b & i32::max_value()
110 110 }
111 111
112 112 /// Return a sorted list containing information about the entries
113 113 /// in the directory.
114 114 ///
115 115 /// * `skip_dot_hg` - Return an empty vec if `path` contains a `.hg` directory
116 116 fn list_directory(
117 117 path: impl AsRef<Path>,
118 118 skip_dot_hg: bool,
119 119 ) -> std::io::Result<Vec<(HgPathBuf, DirEntry)>> {
120 120 let mut results = vec![];
121 121 let entries = read_dir(path.as_ref())?;
122 122
123 123 for entry in entries {
124 124 let entry = entry?;
125 125 let filename = os_string_to_hg_path_buf(entry.file_name())?;
126 126 let file_type = entry.file_type()?;
127 127 if skip_dot_hg && filename.as_bytes() == b".hg" && file_type.is_dir() {
128 128 return Ok(vec![]);
129 129 } else {
130 results.push((HgPathBuf::from(filename), entry))
130 results.push((filename, entry))
131 131 }
132 132 }
133 133
134 134 results.sort_unstable_by_key(|e| e.0.clone());
135 135 Ok(results)
136 136 }
137 137
138 138 /// The file corresponding to the dirstate entry was found on the filesystem.
139 139 fn dispatch_found(
140 140 filename: impl AsRef<HgPath>,
141 141 entry: DirstateEntry,
142 142 metadata: HgMetadata,
143 143 copy_map: &CopyMap,
144 144 options: StatusOptions,
145 145 ) -> Dispatch {
146 146 let DirstateEntry {
147 147 state,
148 148 mode,
149 149 mtime,
150 150 size,
151 151 } = entry;
152 152
153 153 let HgMetadata {
154 154 st_mode,
155 155 st_size,
156 156 st_mtime,
157 157 ..
158 158 } = metadata;
159 159
160 160 match state {
161 161 EntryState::Normal => {
162 162 let size_changed = mod_compare(size, st_size as i32);
163 163 let mode_changed =
164 164 (mode ^ st_mode as i32) & 0o100 != 0o000 && options.check_exec;
165 165 let metadata_changed = size >= 0 && (size_changed || mode_changed);
166 166 let other_parent = size == SIZE_FROM_OTHER_PARENT;
167
167 168 if metadata_changed
168 169 || other_parent
169 170 || copy_map.contains_key(filename.as_ref())
170 171 {
171 172 Dispatch::Modified
172 } else if mod_compare(mtime, st_mtime as i32) {
173 Dispatch::Unsure
174 } else if st_mtime == options.last_normal_time {
173 } else if mod_compare(mtime, st_mtime as i32)
174 || st_mtime == options.last_normal_time
175 {
175 176 // the file may have just been marked as normal and
176 177 // it may have changed in the same second without
177 178 // changing its size. This can happen if we quickly
178 179 // do multiple commits. Force lookup, so we don't
179 180 // miss such a racy file change.
180 181 Dispatch::Unsure
181 182 } else if options.list_clean {
182 183 Dispatch::Clean
183 184 } else {
184 185 Dispatch::None
185 186 }
186 187 }
187 188 EntryState::Merged => Dispatch::Modified,
188 189 EntryState::Added => Dispatch::Added,
189 190 EntryState::Removed => Dispatch::Removed,
190 191 EntryState::Unknown => Dispatch::Unknown,
191 192 }
192 193 }
193 194
194 195 /// The file corresponding to this Dirstate entry is missing.
195 196 fn dispatch_missing(state: EntryState) -> Dispatch {
196 197 match state {
197 198 // File was removed from the filesystem during commands
198 199 EntryState::Normal | EntryState::Merged | EntryState::Added => {
199 200 Dispatch::Deleted
200 201 }
201 202 // File was removed, everything is normal
202 203 EntryState::Removed => Dispatch::Removed,
203 204 // File is unknown to Mercurial, everything is normal
204 205 EntryState::Unknown => Dispatch::Unknown,
205 206 }
206 207 }
207 208
208 209 lazy_static! {
209 210 static ref DEFAULT_WORK: HashSet<&'static HgPath> = {
210 211 let mut h = HashSet::new();
211 212 h.insert(HgPath::new(b""));
212 213 h
213 214 };
214 215 }
215 216
216 217 /// Get stat data about the files explicitly specified by match.
217 218 /// TODO subrepos
218 219 #[timed]
219 220 fn walk_explicit<'a>(
220 221 files: Option<&'a HashSet<&HgPath>>,
221 222 dmap: &'a DirstateMap,
222 223 root_dir: impl AsRef<Path> + Sync + Send + 'a,
223 224 options: StatusOptions,
224 225 traversed_sender: crossbeam::Sender<HgPathBuf>,
225 226 ) -> impl ParallelIterator<Item = IoResult<(&'a HgPath, Dispatch)>> {
226 227 files
227 228 .unwrap_or(&DEFAULT_WORK)
228 229 .par_iter()
229 .map(move |filename| {
230 .map(move |&filename| {
230 231 // TODO normalization
231 let normalized = filename.as_ref();
232 let normalized = filename;
232 233
233 234 let buf = match hg_path_to_path_buf(normalized) {
234 235 Ok(x) => x,
235 236 Err(e) => return Some(Err(e.into())),
236 237 };
237 238 let target = root_dir.as_ref().join(buf);
238 239 let st = target.symlink_metadata();
239 240 let in_dmap = dmap.get(normalized);
240 241 match st {
241 242 Ok(meta) => {
242 243 let file_type = meta.file_type();
243 244 return if file_type.is_file() || file_type.is_symlink() {
244 245 if let Some(entry) = in_dmap {
245 246 return Some(Ok((
246 247 normalized,
247 248 dispatch_found(
248 249 &normalized,
249 250 *entry,
250 251 HgMetadata::from_metadata(meta),
251 252 &dmap.copy_map,
252 253 options,
253 254 ),
254 255 )));
255 256 }
256 257 Some(Ok((normalized, Dispatch::Unknown)))
258 } else if file_type.is_dir() {
259 if options.collect_traversed_dirs {
260 traversed_sender
261 .send(normalized.to_owned())
262 .expect("receiver should outlive sender");
263 }
264 Some(Ok((
265 normalized,
266 Dispatch::Directory {
267 was_file: in_dmap.is_some(),
268 },
269 )))
257 270 } else {
258 if file_type.is_dir() {
259 if options.collect_traversed_dirs {
260 traversed_sender
261 .send(normalized.to_owned())
262 .expect("receiver should outlive sender");
263 }
264 Some(Ok((
265 normalized,
266 Dispatch::Directory {
267 was_file: in_dmap.is_some(),
268 },
269 )))
270 } else {
271 Some(Ok((
272 normalized,
273 Dispatch::Bad(BadMatch::BadType(
274 // TODO do more than unknown
275 // Support for all `BadType` variant
276 // varies greatly between platforms.
277 // So far, no tests check the type and
278 // this should be good enough for most
279 // users.
280 BadType::Unknown,
281 )),
282 )))
283 }
271 Some(Ok((
272 normalized,
273 Dispatch::Bad(BadMatch::BadType(
274 // TODO do more than unknown
275 // Support for all `BadType` variant
276 // varies greatly between platforms.
277 // So far, no tests check the type and
278 // this should be good enough for most
279 // users.
280 BadType::Unknown,
281 )),
282 )))
284 283 };
285 284 }
286 285 Err(_) => {
287 286 if let Some(entry) = in_dmap {
288 287 return Some(Ok((
289 288 normalized,
290 289 dispatch_missing(entry.state),
291 290 )));
292 291 }
293 292 }
294 293 };
295 294 None
296 295 })
297 296 .flatten()
298 297 }
299 298
300 299 #[derive(Debug, Copy, Clone)]
301 300 pub struct StatusOptions {
302 301 /// Remember the most recent modification timeslot for status, to make
303 302 /// sure we won't miss future size-preserving file content modifications
304 303 /// that happen within the same timeslot.
305 304 pub last_normal_time: i64,
306 305 /// Whether we are on a filesystem with UNIX-like exec flags
307 306 pub check_exec: bool,
308 307 pub list_clean: bool,
309 308 pub list_unknown: bool,
310 309 pub list_ignored: bool,
311 310 /// Whether to collect traversed dirs for applying a callback later.
312 311 /// Used by `hg purge` for example.
313 312 pub collect_traversed_dirs: bool,
314 313 }
315 314
316 315 /// Dispatch a single entry (file, folder, symlink...) found during `traverse`.
317 316 /// If the entry is a folder that needs to be traversed, it will be handled
318 317 /// in a separate thread.
319 318 fn handle_traversed_entry<'a>(
320 319 scope: &rayon::Scope<'a>,
321 320 files_sender: &'a crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
322 321 matcher: &'a (impl Matcher + Sync),
323 322 root_dir: impl AsRef<Path> + Sync + Send + Copy + 'a,
324 323 dmap: &'a DirstateMap,
325 324 old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>,
326 325 ignore_fn: &'a IgnoreFnType,
327 326 dir_ignore_fn: &'a IgnoreFnType,
328 327 options: StatusOptions,
329 328 filename: HgPathBuf,
330 329 dir_entry: DirEntry,
331 330 traversed_sender: crossbeam::Sender<HgPathBuf>,
332 331 ) -> IoResult<()> {
333 332 let file_type = dir_entry.file_type()?;
334 333 let entry_option = dmap.get(&filename);
335 334
336 335 if filename.as_bytes() == b".hg" {
337 336 // Could be a directory or a symlink
338 337 return Ok(());
339 338 }
340 339
341 340 if file_type.is_dir() {
342 341 handle_traversed_dir(
343 342 scope,
344 343 files_sender,
345 344 matcher,
346 345 root_dir,
347 346 dmap,
348 347 old_results,
349 348 ignore_fn,
350 349 dir_ignore_fn,
351 350 options,
352 351 entry_option,
353 352 filename,
354 353 traversed_sender,
355 354 );
356 355 } else if file_type.is_file() || file_type.is_symlink() {
357 356 if let Some(entry) = entry_option {
358 357 if matcher.matches_everything() || matcher.matches(&filename) {
359 358 let metadata = dir_entry.metadata()?;
360 359 files_sender
361 360 .send(Ok((
362 361 filename.to_owned(),
363 362 dispatch_found(
364 363 &filename,
365 364 *entry,
366 365 HgMetadata::from_metadata(metadata),
367 366 &dmap.copy_map,
368 367 options,
369 368 ),
370 369 )))
371 370 .unwrap();
372 371 }
373 372 } else if (matcher.matches_everything() || matcher.matches(&filename))
374 373 && !ignore_fn(&filename)
375 374 {
376 375 if (options.list_ignored || matcher.exact_match(&filename))
377 376 && dir_ignore_fn(&filename)
378 377 {
379 378 if options.list_ignored {
380 379 files_sender
381 380 .send(Ok((filename.to_owned(), Dispatch::Ignored)))
382 381 .unwrap();
383 382 }
384 } else {
385 if options.list_unknown {
386 files_sender
387 .send(Ok((filename.to_owned(), Dispatch::Unknown)))
388 .unwrap();
389 }
383 } else if options.list_unknown {
384 files_sender
385 .send(Ok((filename.to_owned(), Dispatch::Unknown)))
386 .unwrap();
390 387 }
391 388 } else if ignore_fn(&filename) && options.list_ignored {
392 389 files_sender
393 390 .send(Ok((filename.to_owned(), Dispatch::Ignored)))
394 391 .unwrap();
395 392 }
396 393 } else if let Some(entry) = entry_option {
397 394 // Used to be a file or a folder, now something else.
398 395 if matcher.matches_everything() || matcher.matches(&filename) {
399 396 files_sender
400 397 .send(Ok((filename.to_owned(), dispatch_missing(entry.state))))
401 398 .unwrap();
402 399 }
403 400 }
404 401
405 402 Ok(())
406 403 }
407 404
408 405 /// A directory was found in the filesystem and needs to be traversed
409 406 fn handle_traversed_dir<'a>(
410 407 scope: &rayon::Scope<'a>,
411 408 files_sender: &'a crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
412 409 matcher: &'a (impl Matcher + Sync),
413 410 root_dir: impl AsRef<Path> + Sync + Send + Copy + 'a,
414 411 dmap: &'a DirstateMap,
415 412 old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>,
416 413 ignore_fn: &'a IgnoreFnType,
417 414 dir_ignore_fn: &'a IgnoreFnType,
418 415 options: StatusOptions,
419 416 entry_option: Option<&'a DirstateEntry>,
420 417 directory: HgPathBuf,
421 418 traversed_sender: crossbeam::Sender<HgPathBuf>,
422 419 ) {
423 420 scope.spawn(move |_| {
424 421 // Nested `if` until `rust-lang/rust#53668` is stable
425 422 if let Some(entry) = entry_option {
426 423 // Used to be a file, is now a folder
427 424 if matcher.matches_everything() || matcher.matches(&directory) {
428 425 files_sender
429 426 .send(Ok((
430 427 directory.to_owned(),
431 428 dispatch_missing(entry.state),
432 429 )))
433 430 .unwrap();
434 431 }
435 432 }
436 433 // Do we need to traverse it?
437 434 if !ignore_fn(&directory) || options.list_ignored {
438 435 traverse_dir(
439 436 files_sender,
440 437 matcher,
441 438 root_dir,
442 439 dmap,
443 440 directory,
444 441 &old_results,
445 442 ignore_fn,
446 443 dir_ignore_fn,
447 444 options,
448 445 traversed_sender,
449 446 )
450 447 .unwrap_or_else(|e| files_sender.send(Err(e)).unwrap())
451 448 }
452 449 });
453 450 }
454 451
455 452 /// Decides whether the directory needs to be listed, and if so handles the
456 453 /// entries in a separate thread.
457 454 fn traverse_dir<'a>(
458 455 files_sender: &crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
459 456 matcher: &'a (impl Matcher + Sync),
460 457 root_dir: impl AsRef<Path> + Sync + Send + Copy,
461 458 dmap: &'a DirstateMap,
462 459 directory: impl AsRef<HgPath>,
463 460 old_results: &FastHashMap<Cow<'a, HgPath>, Dispatch>,
464 461 ignore_fn: &IgnoreFnType,
465 462 dir_ignore_fn: &IgnoreFnType,
466 463 options: StatusOptions,
467 464 traversed_sender: crossbeam::Sender<HgPathBuf>,
468 465 ) -> IoResult<()> {
469 466 let directory = directory.as_ref();
470 467
471 468 if options.collect_traversed_dirs {
472 469 traversed_sender
473 470 .send(directory.to_owned())
474 471 .expect("receiver should outlive sender");
475 472 }
476 473
477 474 let visit_entries = match matcher.visit_children_set(directory) {
478 475 VisitChildrenSet::Empty => return Ok(()),
479 476 VisitChildrenSet::This | VisitChildrenSet::Recursive => None,
480 477 VisitChildrenSet::Set(set) => Some(set),
481 478 };
482 479 let buf = hg_path_to_path_buf(directory)?;
483 480 let dir_path = root_dir.as_ref().join(buf);
484 481
485 482 let skip_dot_hg = !directory.as_bytes().is_empty();
486 483 let entries = match list_directory(dir_path, skip_dot_hg) {
487 484 Err(e) => match e.kind() {
488 485 ErrorKind::NotFound | ErrorKind::PermissionDenied => {
489 486 files_sender
490 487 .send(Ok((
491 488 directory.to_owned(),
492 489 Dispatch::Bad(BadMatch::OsError(
493 490 // Unwrapping here is OK because the error always
494 491 // is a real os error
495 492 e.raw_os_error().unwrap(),
496 493 )),
497 494 )))
498 495 .unwrap();
499 496 return Ok(());
500 497 }
501 498 _ => return Err(e),
502 499 },
503 500 Ok(entries) => entries,
504 501 };
505 502
506 503 rayon::scope(|scope| -> IoResult<()> {
507 504 for (filename, dir_entry) in entries {
508 505 if let Some(ref set) = visit_entries {
509 506 if !set.contains(filename.deref()) {
510 507 continue;
511 508 }
512 509 }
513 510 // TODO normalize
514 511 let filename = if directory.is_empty() {
515 512 filename.to_owned()
516 513 } else {
517 514 directory.join(&filename)
518 515 };
519 516
520 517 if !old_results.contains_key(filename.deref()) {
521 518 handle_traversed_entry(
522 519 scope,
523 520 files_sender,
524 521 matcher,
525 522 root_dir,
526 523 dmap,
527 524 old_results,
528 525 ignore_fn,
529 526 dir_ignore_fn,
530 527 options,
531 528 filename,
532 529 dir_entry,
533 530 traversed_sender.clone(),
534 531 )?;
535 532 }
536 533 }
537 534 Ok(())
538 535 })
539 536 }
540 537
541 538 /// Walk the working directory recursively to look for changes compared to the
542 539 /// current `DirstateMap`.
543 540 ///
544 541 /// This takes a mutable reference to the results to account for the `extend`
545 542 /// in timings
546 543 #[timed]
547 544 fn traverse<'a>(
548 545 matcher: &'a (impl Matcher + Sync),
549 546 root_dir: impl AsRef<Path> + Sync + Send + Copy,
550 547 dmap: &'a DirstateMap,
551 548 path: impl AsRef<HgPath>,
552 549 old_results: &FastHashMap<Cow<'a, HgPath>, Dispatch>,
553 550 ignore_fn: &IgnoreFnType,
554 551 dir_ignore_fn: &IgnoreFnType,
555 552 options: StatusOptions,
556 553 results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
557 554 traversed_sender: crossbeam::Sender<HgPathBuf>,
558 555 ) -> IoResult<()> {
559 556 let root_dir = root_dir.as_ref();
560 557
561 558 // The traversal is done in parallel, so use a channel to gather entries.
562 559 // `crossbeam::Sender` is `Sync`, while `mpsc::Sender` is not.
563 560 let (files_transmitter, files_receiver) = crossbeam::channel::unbounded();
564 561
565 562 traverse_dir(
566 563 &files_transmitter,
567 564 matcher,
568 565 root_dir,
569 566 &dmap,
570 567 path,
571 568 &old_results,
572 569 &ignore_fn,
573 570 &dir_ignore_fn,
574 571 options,
575 572 traversed_sender,
576 573 )?;
577 574
578 575 // Disconnect the channel so the receiver stops waiting
579 576 drop(files_transmitter);
580 577
581 578 // TODO don't collect. Find a way of replicating the behavior of
582 579 // `itertools::process_results`, but for `rayon::ParallelIterator`
583 580 let new_results: IoResult<Vec<(Cow<'a, HgPath>, Dispatch)>> =
584 581 files_receiver
585 582 .into_iter()
586 583 .map(|item| {
587 584 let (f, d) = item?;
588 585 Ok((Cow::Owned(f), d))
589 586 })
590 587 .collect();
591 588
592 589 results.par_extend(new_results?);
593 590
594 591 Ok(())
595 592 }
596 593
597 594 /// Stat all entries in the `DirstateMap` and mark them for dispatch.
598 595 fn stat_dmap_entries(
599 596 dmap: &DirstateMap,
600 597 root_dir: impl AsRef<Path> + Sync + Send,
601 598 options: StatusOptions,
602 599 ) -> impl ParallelIterator<Item = IoResult<(&HgPath, Dispatch)>> {
603 600 dmap.par_iter().map(move |(filename, entry)| {
604 601 let filename: &HgPath = filename;
605 602 let filename_as_path = hg_path_to_path_buf(filename)?;
606 603 let meta = root_dir.as_ref().join(filename_as_path).symlink_metadata();
607 604
608 605 match meta {
609 606 Ok(ref m)
610 607 if !(m.file_type().is_file()
611 608 || m.file_type().is_symlink()) =>
612 609 {
613 610 Ok((filename, dispatch_missing(entry.state)))
614 611 }
615 612 Ok(m) => Ok((
616 613 filename,
617 614 dispatch_found(
618 615 filename,
619 616 *entry,
620 617 HgMetadata::from_metadata(m),
621 618 &dmap.copy_map,
622 619 options,
623 620 ),
624 621 )),
625 622 Err(ref e)
626 623 if e.kind() == ErrorKind::NotFound
627 624 || e.raw_os_error() == Some(20) =>
628 625 {
629 626 // Rust does not yet have an `ErrorKind` for
630 627 // `NotADirectory` (errno 20)
631 628 // It happens if the dirstate contains `foo/bar` and
632 629 // foo is not a directory
633 630 Ok((filename, dispatch_missing(entry.state)))
634 631 }
635 632 Err(e) => Err(e),
636 633 }
637 634 })
638 635 }
639 636
640 637 /// This takes a mutable reference to the results to account for the `extend`
641 638 /// in timings
642 639 #[timed]
643 640 fn extend_from_dmap<'a>(
644 641 dmap: &'a DirstateMap,
645 642 root_dir: impl AsRef<Path> + Sync + Send,
646 643 options: StatusOptions,
647 644 results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
648 645 ) {
649 646 results.par_extend(
650 647 stat_dmap_entries(dmap, root_dir, options)
651 648 .flatten()
652 649 .map(|(filename, dispatch)| (Cow::Borrowed(filename), dispatch)),
653 650 );
654 651 }
655 652
656 653 #[derive(Debug)]
657 654 pub struct DirstateStatus<'a> {
658 655 pub modified: Vec<Cow<'a, HgPath>>,
659 656 pub added: Vec<Cow<'a, HgPath>>,
660 657 pub removed: Vec<Cow<'a, HgPath>>,
661 658 pub deleted: Vec<Cow<'a, HgPath>>,
662 659 pub clean: Vec<Cow<'a, HgPath>>,
663 660 pub ignored: Vec<Cow<'a, HgPath>>,
664 661 pub unknown: Vec<Cow<'a, HgPath>>,
665 662 pub bad: Vec<(Cow<'a, HgPath>, BadMatch)>,
666 663 /// Only filled if `collect_traversed_dirs` is `true`
667 664 pub traversed: Vec<HgPathBuf>,
668 665 }
669 666
670 667 #[timed]
671 668 fn build_response<'a>(
672 669 results: impl IntoIterator<Item = (Cow<'a, HgPath>, Dispatch)>,
673 670 traversed: Vec<HgPathBuf>,
674 671 ) -> (Vec<Cow<'a, HgPath>>, DirstateStatus<'a>) {
675 672 let mut lookup = vec![];
676 673 let mut modified = vec![];
677 674 let mut added = vec![];
678 675 let mut removed = vec![];
679 676 let mut deleted = vec![];
680 677 let mut clean = vec![];
681 678 let mut ignored = vec![];
682 679 let mut unknown = vec![];
683 680 let mut bad = vec![];
684 681
685 682 for (filename, dispatch) in results.into_iter() {
686 683 match dispatch {
687 684 Dispatch::Unknown => unknown.push(filename),
688 685 Dispatch::Unsure => lookup.push(filename),
689 686 Dispatch::Modified => modified.push(filename),
690 687 Dispatch::Added => added.push(filename),
691 688 Dispatch::Removed => removed.push(filename),
692 689 Dispatch::Deleted => deleted.push(filename),
693 690 Dispatch::Clean => clean.push(filename),
694 691 Dispatch::Ignored => ignored.push(filename),
695 692 Dispatch::None => {}
696 693 Dispatch::Bad(reason) => bad.push((filename, reason)),
697 694 Dispatch::Directory { .. } => {}
698 695 }
699 696 }
700 697
701 698 (
702 699 lookup,
703 700 DirstateStatus {
704 701 modified,
705 702 added,
706 703 removed,
707 704 deleted,
708 705 clean,
709 706 ignored,
710 707 unknown,
711 708 bad,
712 709 traversed,
713 710 },
714 711 )
715 712 }
716 713
717 714 #[derive(Debug)]
718 715 pub enum StatusError {
719 716 IO(std::io::Error),
720 717 Path(HgPathError),
721 718 Pattern(PatternError),
722 719 }
723 720
724 721 pub type StatusResult<T> = Result<T, StatusError>;
725 722
726 723 impl From<PatternError> for StatusError {
727 724 fn from(e: PatternError) -> Self {
728 725 StatusError::Pattern(e)
729 726 }
730 727 }
731 728 impl From<HgPathError> for StatusError {
732 729 fn from(e: HgPathError) -> Self {
733 730 StatusError::Path(e)
734 731 }
735 732 }
736 733 impl From<std::io::Error> for StatusError {
737 734 fn from(e: std::io::Error) -> Self {
738 735 StatusError::IO(e)
739 736 }
740 737 }
741 738
742 739 impl ToString for StatusError {
743 740 fn to_string(&self) -> String {
744 741 match self {
745 742 StatusError::IO(e) => e.to_string(),
746 743 StatusError::Path(e) => e.to_string(),
747 744 StatusError::Pattern(e) => e.to_string(),
748 745 }
749 746 }
750 747 }
751 748
752 749 /// This takes a mutable reference to the results to account for the `extend`
753 750 /// in timings
754 751 #[timed]
755 752 fn handle_unknowns<'a>(
756 753 dmap: &'a DirstateMap,
757 754 matcher: &(impl Matcher + Sync),
758 755 root_dir: impl AsRef<Path> + Sync + Send + Copy,
759 756 options: StatusOptions,
760 757 results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
761 758 ) -> IoResult<()> {
762 759 let to_visit: Vec<(&HgPath, &DirstateEntry)> = if results.is_empty()
763 760 && matcher.matches_everything()
764 761 {
765 762 dmap.iter().map(|(f, e)| (f.deref(), e)).collect()
766 763 } else {
767 764 // Only convert to a hashmap if needed.
768 765 let old_results: FastHashMap<_, _> = results.iter().cloned().collect();
769 766 dmap.iter()
770 767 .filter_map(move |(f, e)| {
771 768 if !old_results.contains_key(f.deref()) && matcher.matches(f) {
772 769 Some((f.deref(), e))
773 770 } else {
774 771 None
775 772 }
776 773 })
777 774 .collect()
778 775 };
779 776
780 777 // We walked all dirs under the roots that weren't ignored, and
781 778 // everything that matched was stat'ed and is already in results.
782 779 // The rest must thus be ignored or under a symlink.
783 780 let path_auditor = PathAuditor::new(root_dir);
784 781
785 782 // TODO don't collect. Find a way of replicating the behavior of
786 783 // `itertools::process_results`, but for `rayon::ParallelIterator`
787 784 let new_results: IoResult<Vec<_>> = to_visit
788 785 .into_par_iter()
789 786 .filter_map(|(filename, entry)| -> Option<IoResult<_>> {
790 787 // Report ignored items in the dmap as long as they are not
791 788 // under a symlink directory.
792 789 if path_auditor.check(filename) {
793 790 // TODO normalize for case-insensitive filesystems
794 791 let buf = match hg_path_to_path_buf(filename) {
795 792 Ok(x) => x,
796 793 Err(e) => return Some(Err(e.into())),
797 794 };
798 795 Some(Ok((
799 796 Cow::Borrowed(filename),
800 797 match root_dir.as_ref().join(&buf).symlink_metadata() {
801 798 // File was just ignored, no links, and exists
802 799 Ok(meta) => {
803 800 let metadata = HgMetadata::from_metadata(meta);
804 801 dispatch_found(
805 802 filename,
806 803 *entry,
807 804 metadata,
808 805 &dmap.copy_map,
809 806 options,
810 807 )
811 808 }
812 809 // File doesn't exist
813 810 Err(_) => dispatch_missing(entry.state),
814 811 },
815 812 )))
816 813 } else {
817 814 // It's either missing or under a symlink directory which
818 815 // we, in this case, report as missing.
819 816 Some(Ok((
820 817 Cow::Borrowed(filename),
821 818 dispatch_missing(entry.state),
822 819 )))
823 820 }
824 821 })
825 822 .collect();
826 823
827 824 results.par_extend(new_results?);
828 825
829 826 Ok(())
830 827 }
831 828
832 829 /// Get the status of files in the working directory.
833 830 ///
834 831 /// This is the current entry-point for `hg-core` and is realistically unusable
835 832 /// outside of a Python context because its arguments need to provide a lot of
836 833 /// information that will not be necessary in the future.
837 834 #[timed]
838 835 pub fn status<'a: 'c, 'b: 'c, 'c>(
839 836 dmap: &'a DirstateMap,
840 837 matcher: &'b (impl Matcher + Sync),
841 838 root_dir: impl AsRef<Path> + Sync + Send + Copy + 'c,
842 839 ignore_files: Vec<PathBuf>,
843 840 options: StatusOptions,
844 841 ) -> StatusResult<(
845 842 (Vec<Cow<'c, HgPath>>, DirstateStatus<'c>),
846 843 Vec<PatternFileWarning>,
847 844 )> {
848 845 // Needs to outlive `dir_ignore_fn` since it's captured.
849 846 let ignore_fn: IgnoreFnType;
850 847
851 848 // Only involve real ignore mechanism if we're listing unknowns or ignored.
852 849 let (dir_ignore_fn, warnings): (IgnoreFnType, _) = if options.list_ignored
853 850 || options.list_unknown
854 851 {
855 852 let (ignore, warnings) = get_ignore_function(ignore_files, root_dir)?;
856 853
857 854 ignore_fn = ignore;
858 855 let dir_ignore_fn = Box::new(|dir: &_| {
859 856 // Is the path or one of its ancestors ignored?
860 857 if ignore_fn(dir) {
861 858 true
862 859 } else {
863 860 for p in find_dirs(dir) {
864 861 if ignore_fn(p) {
865 862 return true;
866 863 }
867 864 }
868 865 false
869 866 }
870 867 });
871 868 (dir_ignore_fn, warnings)
872 869 } else {
873 870 ignore_fn = Box::new(|&_| true);
874 871 (Box::new(|&_| true), vec![])
875 872 };
876 873
877 874 let files = matcher.file_set();
878 875
879 876 // `crossbeam::Sender` is `Sync`, while `mpsc::Sender` is not.
880 877 let (traversed_sender, traversed_recv) = crossbeam::channel::unbounded();
881 878
882 879 // Step 1: check the files explicitly mentioned by the user
883 880 let explicit = walk_explicit(
884 881 files,
885 882 &dmap,
886 883 root_dir,
887 884 options,
888 885 traversed_sender.clone(),
889 886 );
890 887
891 888 // Collect results into a `Vec` because we do very few lookups in most
892 889 // cases.
893 890 let (work, mut results): (Vec<_>, Vec<_>) = explicit
894 891 .filter_map(Result::ok)
895 892 .map(|(filename, dispatch)| (Cow::Borrowed(filename), dispatch))
896 893 .partition(|(_, dispatch)| match dispatch {
897 894 Dispatch::Directory { .. } => true,
898 895 _ => false,
899 896 });
900 897
901 898 if !work.is_empty() {
902 899 // Hashmaps are quite a bit slower to build than vecs, so only build it
903 900 // if needed.
904 901 let old_results = results.iter().cloned().collect();
905 902
906 903 // Step 2: recursively check the working directory for changes if
907 904 // needed
908 905 for (dir, dispatch) in work {
909 906 match dispatch {
910 907 Dispatch::Directory { was_file } => {
911 908 if was_file {
912 909 results.push((dir.to_owned(), Dispatch::Removed));
913 910 }
914 911 if options.list_ignored
915 912 || options.list_unknown && !dir_ignore_fn(&dir)
916 913 {
917 914 traverse(
918 915 matcher,
919 916 root_dir,
920 917 &dmap,
921 918 &dir,
922 919 &old_results,
923 920 &ignore_fn,
924 921 &dir_ignore_fn,
925 922 options,
926 923 &mut results,
927 924 traversed_sender.clone(),
928 925 )?;
929 926 }
930 927 }
931 928 _ => unreachable!("There can only be directories in `work`"),
932 929 }
933 930 }
934 931 }
935 932
936 933 if !matcher.is_exact() {
937 934 // Step 3: Check the remaining files from the dmap.
938 935 // If a dmap file is not in results yet, it was either
939 936 // a) not matched b) ignored, c) missing, or d) under a
940 937 // symlink directory.
941 938
942 939 if options.list_unknown {
943 940 handle_unknowns(dmap, matcher, root_dir, options, &mut results)?;
944 941 } else {
945 942 // We may not have walked the full directory tree above, so stat
946 943 // and check everything we missed.
947 944 extend_from_dmap(&dmap, root_dir, options, &mut results);
948 945 }
949 946 }
950 947
951 948 // Close the channel
952 949 drop(traversed_sender);
953 950 let traversed_dirs = traversed_recv.into_iter().collect();
954 951
955 952 Ok((build_response(results, traversed_dirs), warnings))
956 953 }
@@ -1,695 +1,695 b''
1 1 // discovery.rs
2 2 //
3 3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Discovery operations
9 9 //!
10 10 //! This is a Rust counterpart to the `partialdiscovery` class of
11 11 //! `mercurial.setdiscovery`
12 12
13 13 use super::{Graph, GraphError, Revision, NULL_REVISION};
14 14 use crate::{ancestors::MissingAncestors, dagops, FastHashMap};
15 15 use rand::seq::SliceRandom;
16 16 use rand::{thread_rng, RngCore, SeedableRng};
17 17 use std::cmp::{max, min};
18 18 use std::collections::{HashSet, VecDeque};
19 19
20 20 type Rng = rand_pcg::Pcg32;
21 21 type Seed = [u8; 16];
22 22
23 23 pub struct PartialDiscovery<G: Graph + Clone> {
24 24 target_heads: Option<Vec<Revision>>,
25 25 graph: G, // plays the role of self._repo
26 26 common: MissingAncestors<G>,
27 27 undecided: Option<HashSet<Revision>>,
28 28 children_cache: Option<FastHashMap<Revision, Vec<Revision>>>,
29 29 missing: HashSet<Revision>,
30 30 rng: Rng,
31 31 respect_size: bool,
32 32 randomize: bool,
33 33 }
34 34
35 35 pub struct DiscoveryStats {
36 36 pub undecided: Option<usize>,
37 37 }
38 38
39 39 /// Update an existing sample to match the expected size
40 40 ///
41 41 /// The sample is updated with revisions exponentially distant from each
42 42 /// element of `heads`.
43 43 ///
44 44 /// If a target size is specified, the sampling will stop once this size is
45 45 /// reached. Otherwise sampling will happen until roots of the <revs> set are
46 46 /// reached.
47 47 ///
48 48 /// - `revs`: set of revs we want to discover (if None, `assume` the whole dag
49 49 /// represented by `parentfn`
50 50 /// - `heads`: set of DAG head revs
51 51 /// - `sample`: a sample to update
52 52 /// - `parentfn`: a callable to resolve parents for a revision
53 53 /// - `quicksamplesize`: optional target size of the sample
54 54 fn update_sample<I>(
55 55 revs: Option<&HashSet<Revision>>,
56 56 heads: impl IntoIterator<Item = Revision>,
57 57 sample: &mut HashSet<Revision>,
58 58 parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
59 59 quicksamplesize: Option<usize>,
60 60 ) -> Result<(), GraphError>
61 61 where
62 62 I: Iterator<Item = Revision>,
63 63 {
64 64 let mut distances: FastHashMap<Revision, u32> = FastHashMap::default();
65 65 let mut visit: VecDeque<Revision> = heads.into_iter().collect();
66 66 let mut factor: u32 = 1;
67 67 let mut seen: HashSet<Revision> = HashSet::new();
68 68 while let Some(current) = visit.pop_front() {
69 69 if !seen.insert(current) {
70 70 continue;
71 71 }
72 72
73 73 let d = *distances.entry(current).or_insert(1);
74 74 if d > factor {
75 75 factor *= 2;
76 76 }
77 77 if d == factor {
78 78 sample.insert(current);
79 79 if let Some(sz) = quicksamplesize {
80 80 if sample.len() >= sz {
81 81 return Ok(());
82 82 }
83 83 }
84 84 }
85 85 for p in parentsfn(current)? {
86 86 if let Some(revs) = revs {
87 87 if !revs.contains(&p) {
88 88 continue;
89 89 }
90 90 }
91 91 distances.entry(p).or_insert(d + 1);
92 92 visit.push_back(p);
93 93 }
94 94 }
95 95 Ok(())
96 96 }
97 97
98 98 struct ParentsIterator {
99 99 parents: [Revision; 2],
100 100 cur: usize,
101 101 }
102 102
103 103 impl ParentsIterator {
104 104 fn graph_parents(
105 105 graph: &impl Graph,
106 106 r: Revision,
107 107 ) -> Result<ParentsIterator, GraphError> {
108 108 Ok(ParentsIterator {
109 109 parents: graph.parents(r)?,
110 110 cur: 0,
111 111 })
112 112 }
113 113 }
114 114
115 115 impl Iterator for ParentsIterator {
116 116 type Item = Revision;
117 117
118 118 fn next(&mut self) -> Option<Revision> {
119 119 if self.cur > 1 {
120 120 return None;
121 121 }
122 122 let rev = self.parents[self.cur];
123 123 self.cur += 1;
124 124 if rev == NULL_REVISION {
125 125 return self.next();
126 126 }
127 127 Some(rev)
128 128 }
129 129 }
130 130
131 131 impl<G: Graph + Clone> PartialDiscovery<G> {
132 132 /// Create a PartialDiscovery object, with the intent
133 133 /// of comparing our `::<target_heads>` revset to the contents of another
134 134 /// repo.
135 135 ///
136 136 /// For now `target_heads` is passed as a vector, and will be used
137 137 /// at the first call to `ensure_undecided()`.
138 138 ///
139 139 /// If we want to make the signature more flexible,
140 140 /// we'll have to make it a type argument of `PartialDiscovery` or a trait
141 141 /// object since we'll keep it in the meanwhile
142 142 ///
143 143 /// The `respect_size` boolean controls how the sampling methods
144 144 /// will interpret the size argument requested by the caller. If it's
145 145 /// `false`, they are allowed to produce a sample whose size is more
146 146 /// appropriate to the situation (typically bigger).
147 147 ///
148 148 /// The `randomize` boolean affects sampling, and specifically how
149 149 /// limiting or last-minute expanding is been done:
150 150 ///
151 151 /// If `true`, both will perform random picking from `self.undecided`.
152 152 /// This is currently the best for actual discoveries.
153 153 ///
154 154 /// If `false`, a reproductible picking strategy is performed. This is
155 155 /// useful for integration tests.
156 156 pub fn new(
157 157 graph: G,
158 158 target_heads: Vec<Revision>,
159 159 respect_size: bool,
160 160 randomize: bool,
161 161 ) -> Self {
162 162 let mut seed = [0; 16];
163 163 if randomize {
164 164 thread_rng().fill_bytes(&mut seed);
165 165 }
166 166 Self::new_with_seed(graph, target_heads, seed, respect_size, randomize)
167 167 }
168 168
169 169 pub fn new_with_seed(
170 170 graph: G,
171 171 target_heads: Vec<Revision>,
172 172 seed: Seed,
173 173 respect_size: bool,
174 174 randomize: bool,
175 175 ) -> Self {
176 176 PartialDiscovery {
177 177 undecided: None,
178 178 children_cache: None,
179 179 target_heads: Some(target_heads),
180 180 graph: graph.clone(),
181 181 common: MissingAncestors::new(graph, vec![]),
182 182 missing: HashSet::new(),
183 183 rng: Rng::from_seed(seed),
184 respect_size: respect_size,
185 randomize: randomize,
184 respect_size,
185 randomize,
186 186 }
187 187 }
188 188
189 189 /// Extract at most `size` random elements from sample and return them
190 190 /// as a vector
191 191 fn limit_sample(
192 192 &mut self,
193 193 mut sample: Vec<Revision>,
194 194 size: usize,
195 195 ) -> Vec<Revision> {
196 196 if !self.randomize {
197 197 sample.sort();
198 198 sample.truncate(size);
199 199 return sample;
200 200 }
201 201 let sample_len = sample.len();
202 202 if sample_len <= size {
203 203 return sample;
204 204 }
205 205 let rng = &mut self.rng;
206 206 let dropped_size = sample_len - size;
207 207 let limited_slice = if size < dropped_size {
208 208 sample.partial_shuffle(rng, size).0
209 209 } else {
210 210 sample.partial_shuffle(rng, dropped_size).1
211 211 };
212 212 limited_slice.to_owned()
213 213 }
214 214
215 215 /// Register revisions known as being common
216 216 pub fn add_common_revisions(
217 217 &mut self,
218 218 common: impl IntoIterator<Item = Revision>,
219 219 ) -> Result<(), GraphError> {
220 220 let before_len = self.common.get_bases().len();
221 221 self.common.add_bases(common);
222 222 if self.common.get_bases().len() == before_len {
223 223 return Ok(());
224 224 }
225 225 if let Some(ref mut undecided) = self.undecided {
226 226 self.common.remove_ancestors_from(undecided)?;
227 227 }
228 228 Ok(())
229 229 }
230 230
231 231 /// Register revisions known as being missing
232 232 ///
233 233 /// # Performance note
234 234 ///
235 235 /// Except in the most trivial case, the first call of this method has
236 236 /// the side effect of computing `self.undecided` set for the first time,
237 237 /// and the related caches it might need for efficiency of its internal
238 238 /// computation. This is typically faster if more information is
239 239 /// available in `self.common`. Therefore, for good performance, the
240 240 /// caller should avoid calling this too early.
241 241 pub fn add_missing_revisions(
242 242 &mut self,
243 243 missing: impl IntoIterator<Item = Revision>,
244 244 ) -> Result<(), GraphError> {
245 245 let mut tovisit: VecDeque<Revision> = missing.into_iter().collect();
246 246 if tovisit.is_empty() {
247 247 return Ok(());
248 248 }
249 249 self.ensure_children_cache()?;
250 250 self.ensure_undecided()?; // for safety of possible future refactors
251 251 let children = self.children_cache.as_ref().unwrap();
252 252 let mut seen: HashSet<Revision> = HashSet::new();
253 253 let undecided_mut = self.undecided.as_mut().unwrap();
254 254 while let Some(rev) = tovisit.pop_front() {
255 255 if !self.missing.insert(rev) {
256 256 // either it's known to be missing from a previous
257 257 // invocation, and there's no need to iterate on its
258 258 // children (we now they are all missing)
259 259 // or it's from a previous iteration of this loop
260 260 // and its children have already been queued
261 261 continue;
262 262 }
263 263 undecided_mut.remove(&rev);
264 264 match children.get(&rev) {
265 265 None => {
266 266 continue;
267 267 }
268 268 Some(this_children) => {
269 269 for child in this_children.iter().cloned() {
270 270 if seen.insert(child) {
271 271 tovisit.push_back(child);
272 272 }
273 273 }
274 274 }
275 275 }
276 276 }
277 277 Ok(())
278 278 }
279 279
280 280 /// Do we have any information about the peer?
281 281 pub fn has_info(&self) -> bool {
282 282 self.common.has_bases()
283 283 }
284 284
285 285 /// Did we acquire full knowledge of our Revisions that the peer has?
286 286 pub fn is_complete(&self) -> bool {
287 self.undecided.as_ref().map_or(false, |s| s.is_empty())
287 self.undecided.as_ref().map_or(false, HashSet::is_empty)
288 288 }
289 289
290 290 /// Return the heads of the currently known common set of revisions.
291 291 ///
292 292 /// If the discovery process is not complete (see `is_complete()`), the
293 293 /// caller must be aware that this is an intermediate state.
294 294 ///
295 295 /// On the other hand, if it is complete, then this is currently
296 296 /// the only way to retrieve the end results of the discovery process.
297 297 ///
298 298 /// We may introduce in the future an `into_common_heads` call that
299 299 /// would be more appropriate for normal Rust callers, dropping `self`
300 300 /// if it is complete.
301 301 pub fn common_heads(&self) -> Result<HashSet<Revision>, GraphError> {
302 302 self.common.bases_heads()
303 303 }
304 304
305 305 /// Force first computation of `self.undecided`
306 306 ///
307 307 /// After this, `self.undecided.as_ref()` and `.as_mut()` can be
308 308 /// unwrapped to get workable immutable or mutable references without
309 309 /// any panic.
310 310 ///
311 311 /// This is an imperative call instead of an access with added lazyness
312 312 /// to reduce easily the scope of mutable borrow for the caller,
313 313 /// compared to undecided(&'a mut self) -> &'a… that would keep it
314 314 /// as long as the resulting immutable one.
315 315 fn ensure_undecided(&mut self) -> Result<(), GraphError> {
316 316 if self.undecided.is_some() {
317 317 return Ok(());
318 318 }
319 319 let tgt = self.target_heads.take().unwrap();
320 320 self.undecided =
321 321 Some(self.common.missing_ancestors(tgt)?.into_iter().collect());
322 322 Ok(())
323 323 }
324 324
325 325 fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
326 326 if self.children_cache.is_some() {
327 327 return Ok(());
328 328 }
329 329 self.ensure_undecided()?;
330 330
331 331 let mut children: FastHashMap<Revision, Vec<Revision>> =
332 332 FastHashMap::default();
333 333 for &rev in self.undecided.as_ref().unwrap() {
334 334 for p in ParentsIterator::graph_parents(&self.graph, rev)? {
335 children.entry(p).or_insert_with(|| Vec::new()).push(rev);
335 children.entry(p).or_insert_with(Vec::new).push(rev);
336 336 }
337 337 }
338 338 self.children_cache = Some(children);
339 339 Ok(())
340 340 }
341 341
342 342 /// Provide statistics about the current state of the discovery process
343 343 pub fn stats(&self) -> DiscoveryStats {
344 344 DiscoveryStats {
345 undecided: self.undecided.as_ref().map(|s| s.len()),
345 undecided: self.undecided.as_ref().map(HashSet::len),
346 346 }
347 347 }
348 348
349 349 pub fn take_quick_sample(
350 350 &mut self,
351 351 headrevs: impl IntoIterator<Item = Revision>,
352 352 size: usize,
353 353 ) -> Result<Vec<Revision>, GraphError> {
354 354 self.ensure_undecided()?;
355 355 let mut sample = {
356 356 let undecided = self.undecided.as_ref().unwrap();
357 357 if undecided.len() <= size {
358 358 return Ok(undecided.iter().cloned().collect());
359 359 }
360 360 dagops::heads(&self.graph, undecided.iter())?
361 361 };
362 362 if sample.len() >= size {
363 363 return Ok(self.limit_sample(sample.into_iter().collect(), size));
364 364 }
365 365 update_sample(
366 366 None,
367 367 headrevs,
368 368 &mut sample,
369 369 |r| ParentsIterator::graph_parents(&self.graph, r),
370 370 Some(size),
371 371 )?;
372 372 Ok(sample.into_iter().collect())
373 373 }
374 374
375 375 /// Extract a sample from `self.undecided`, going from its heads and roots.
376 376 ///
377 377 /// The `size` parameter is used to avoid useless computations if
378 378 /// it turns out to be bigger than the whole set of undecided Revisions.
379 379 ///
380 380 /// The sample is taken by using `update_sample` from the heads, then
381 381 /// from the roots, working on the reverse DAG,
382 382 /// expressed by `self.children_cache`.
383 383 ///
384 384 /// No effort is being made to complete or limit the sample to `size`
385 385 /// but this method returns another interesting size that it derives
386 386 /// from its knowledge of the structure of the various sets, leaving
387 387 /// to the caller the decision to use it or not.
388 388 fn bidirectional_sample(
389 389 &mut self,
390 390 size: usize,
391 391 ) -> Result<(HashSet<Revision>, usize), GraphError> {
392 392 self.ensure_undecided()?;
393 393 {
394 394 // we don't want to compute children_cache before this
395 395 // but doing it after extracting self.undecided takes a mutable
396 396 // ref to self while a shareable one is still active.
397 397 let undecided = self.undecided.as_ref().unwrap();
398 398 if undecided.len() <= size {
399 399 return Ok((undecided.clone(), size));
400 400 }
401 401 }
402 402
403 403 self.ensure_children_cache()?;
404 404 let revs = self.undecided.as_ref().unwrap();
405 405 let mut sample: HashSet<Revision> = revs.clone();
406 406
407 407 // it's possible that leveraging the children cache would be more
408 408 // efficient here
409 409 dagops::retain_heads(&self.graph, &mut sample)?;
410 410 let revsheads = sample.clone(); // was again heads(revs) in python
411 411
412 412 // update from heads
413 413 update_sample(
414 414 Some(revs),
415 415 revsheads.iter().cloned(),
416 416 &mut sample,
417 417 |r| ParentsIterator::graph_parents(&self.graph, r),
418 418 None,
419 419 )?;
420 420
421 421 // update from roots
422 422 let revroots: HashSet<Revision> =
423 423 dagops::roots(&self.graph, revs)?.into_iter().collect();
424 424 let prescribed_size = max(size, min(revroots.len(), revsheads.len()));
425 425
426 426 let children = self.children_cache.as_ref().unwrap();
427 427 let empty_vec: Vec<Revision> = Vec::new();
428 428 update_sample(
429 429 Some(revs),
430 430 revroots,
431 431 &mut sample,
432 432 |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
433 433 None,
434 434 )?;
435 435 Ok((sample, prescribed_size))
436 436 }
437 437
438 438 /// Fill up sample up to the wished size with random undecided Revisions.
439 439 ///
440 440 /// This is intended to be used as a last resort completion if the
441 441 /// regular sampling algorithm returns too few elements.
442 442 fn random_complete_sample(
443 443 &mut self,
444 444 sample: &mut Vec<Revision>,
445 445 size: usize,
446 446 ) {
447 447 let sample_len = sample.len();
448 448 if size <= sample_len {
449 449 return;
450 450 }
451 451 let take_from: Vec<Revision> = self
452 452 .undecided
453 453 .as_ref()
454 454 .unwrap()
455 455 .iter()
456 456 .filter(|&r| !sample.contains(r))
457 457 .cloned()
458 458 .collect();
459 459 sample.extend(self.limit_sample(take_from, size - sample_len));
460 460 }
461 461
462 462 pub fn take_full_sample(
463 463 &mut self,
464 464 size: usize,
465 465 ) -> Result<Vec<Revision>, GraphError> {
466 466 let (sample_set, prescribed_size) = self.bidirectional_sample(size)?;
467 467 let size = if self.respect_size {
468 468 size
469 469 } else {
470 470 prescribed_size
471 471 };
472 472 let mut sample =
473 473 self.limit_sample(sample_set.into_iter().collect(), size);
474 474 self.random_complete_sample(&mut sample, size);
475 475 Ok(sample)
476 476 }
477 477 }
478 478
479 479 #[cfg(test)]
480 480 mod tests {
481 481 use super::*;
482 482 use crate::testing::SampleGraph;
483 483
484 484 /// A PartialDiscovery as for pushing all the heads of `SampleGraph`
485 485 ///
486 486 /// To avoid actual randomness in these tests, we give it a fixed
487 487 /// random seed, but by default we'll test the random version.
488 488 fn full_disco() -> PartialDiscovery<SampleGraph> {
489 489 PartialDiscovery::new_with_seed(
490 490 SampleGraph,
491 491 vec![10, 11, 12, 13],
492 492 [0; 16],
493 493 true,
494 494 true,
495 495 )
496 496 }
497 497
498 498 /// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
499 499 ///
500 500 /// To avoid actual randomness in tests, we give it a fixed random seed.
501 501 fn disco12() -> PartialDiscovery<SampleGraph> {
502 502 PartialDiscovery::new_with_seed(
503 503 SampleGraph,
504 504 vec![12],
505 505 [0; 16],
506 506 true,
507 507 true,
508 508 )
509 509 }
510 510
511 511 fn sorted_undecided(
512 512 disco: &PartialDiscovery<SampleGraph>,
513 513 ) -> Vec<Revision> {
514 514 let mut as_vec: Vec<Revision> =
515 515 disco.undecided.as_ref().unwrap().iter().cloned().collect();
516 516 as_vec.sort();
517 517 as_vec
518 518 }
519 519
520 520 fn sorted_missing(disco: &PartialDiscovery<SampleGraph>) -> Vec<Revision> {
521 521 let mut as_vec: Vec<Revision> =
522 522 disco.missing.iter().cloned().collect();
523 523 as_vec.sort();
524 524 as_vec
525 525 }
526 526
527 527 fn sorted_common_heads(
528 528 disco: &PartialDiscovery<SampleGraph>,
529 529 ) -> Result<Vec<Revision>, GraphError> {
530 530 let mut as_vec: Vec<Revision> =
531 531 disco.common_heads()?.iter().cloned().collect();
532 532 as_vec.sort();
533 533 Ok(as_vec)
534 534 }
535 535
536 536 #[test]
537 537 fn test_add_common_get_undecided() -> Result<(), GraphError> {
538 538 let mut disco = full_disco();
539 539 assert_eq!(disco.undecided, None);
540 540 assert!(!disco.has_info());
541 541 assert_eq!(disco.stats().undecided, None);
542 542
543 543 disco.add_common_revisions(vec![11, 12])?;
544 544 assert!(disco.has_info());
545 545 assert!(!disco.is_complete());
546 546 assert!(disco.missing.is_empty());
547 547
548 548 // add_common_revisions did not trigger a premature computation
549 549 // of `undecided`, let's check that and ask for them
550 550 assert_eq!(disco.undecided, None);
551 551 disco.ensure_undecided()?;
552 552 assert_eq!(sorted_undecided(&disco), vec![5, 8, 10, 13]);
553 553 assert_eq!(disco.stats().undecided, Some(4));
554 554 Ok(())
555 555 }
556 556
557 557 /// in this test, we pretend that our peer misses exactly (8+10)::
558 558 /// and we're comparing all our repo to it (as in a bare push)
559 559 #[test]
560 560 fn test_discovery() -> Result<(), GraphError> {
561 561 let mut disco = full_disco();
562 562 disco.add_common_revisions(vec![11, 12])?;
563 563 disco.add_missing_revisions(vec![8, 10])?;
564 564 assert_eq!(sorted_undecided(&disco), vec![5]);
565 565 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
566 566 assert!(!disco.is_complete());
567 567
568 568 disco.add_common_revisions(vec![5])?;
569 569 assert_eq!(sorted_undecided(&disco), vec![]);
570 570 assert_eq!(sorted_missing(&disco), vec![8, 10, 13]);
571 571 assert!(disco.is_complete());
572 572 assert_eq!(sorted_common_heads(&disco)?, vec![5, 11, 12]);
573 573 Ok(())
574 574 }
575 575
576 576 #[test]
577 577 fn test_add_missing_early_continue() -> Result<(), GraphError> {
578 578 eprintln!("test_add_missing_early_stop");
579 579 let mut disco = full_disco();
580 580 disco.add_common_revisions(vec![13, 3, 4])?;
581 581 disco.ensure_children_cache()?;
582 582 // 12 is grand-child of 6 through 9
583 583 // passing them in this order maximizes the chances of the
584 584 // early continue to do the wrong thing
585 585 disco.add_missing_revisions(vec![6, 9, 12])?;
586 586 assert_eq!(sorted_undecided(&disco), vec![5, 7, 10, 11]);
587 587 assert_eq!(sorted_missing(&disco), vec![6, 9, 12]);
588 588 assert!(!disco.is_complete());
589 589 Ok(())
590 590 }
591 591
592 592 #[test]
593 593 fn test_limit_sample_no_need_to() {
594 594 let sample = vec![1, 2, 3, 4];
595 595 assert_eq!(full_disco().limit_sample(sample, 10), vec![1, 2, 3, 4]);
596 596 }
597 597
598 598 #[test]
599 599 fn test_limit_sample_less_than_half() {
600 600 assert_eq!(full_disco().limit_sample((1..6).collect(), 2), vec![2, 5]);
601 601 }
602 602
603 603 #[test]
604 604 fn test_limit_sample_more_than_half() {
605 605 assert_eq!(full_disco().limit_sample((1..4).collect(), 2), vec![1, 2]);
606 606 }
607 607
608 608 #[test]
609 609 fn test_limit_sample_no_random() {
610 610 let mut disco = full_disco();
611 611 disco.randomize = false;
612 612 assert_eq!(
613 613 disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),
614 614 vec![1, 3, 5, 7]
615 615 );
616 616 }
617 617
618 618 #[test]
619 619 fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
620 620 let mut disco = full_disco();
621 621 disco.undecided = Some((1..=13).collect());
622 622
623 623 let mut sample_vec = disco.take_quick_sample(vec![], 4)?;
624 624 sample_vec.sort();
625 625 assert_eq!(sample_vec, vec![10, 11, 12, 13]);
626 626 Ok(())
627 627 }
628 628
629 629 #[test]
630 630 fn test_quick_sample_climbing_from_12() -> Result<(), GraphError> {
631 631 let mut disco = disco12();
632 632 disco.ensure_undecided()?;
633 633
634 634 let mut sample_vec = disco.take_quick_sample(vec![12], 4)?;
635 635 sample_vec.sort();
636 636 // r12's only parent is r9, whose unique grand-parent through the
637 637 // diamond shape is r4. This ends there because the distance from r4
638 638 // to the root is only 3.
639 639 assert_eq!(sample_vec, vec![4, 9, 12]);
640 640 Ok(())
641 641 }
642 642
643 643 #[test]
644 644 fn test_children_cache() -> Result<(), GraphError> {
645 645 let mut disco = full_disco();
646 646 disco.ensure_children_cache()?;
647 647
648 648 let cache = disco.children_cache.unwrap();
649 649 assert_eq!(cache.get(&2).cloned(), Some(vec![4]));
650 650 assert_eq!(cache.get(&10).cloned(), None);
651 651
652 652 let mut children_4 = cache.get(&4).cloned().unwrap();
653 653 children_4.sort();
654 654 assert_eq!(children_4, vec![5, 6, 7]);
655 655
656 656 let mut children_7 = cache.get(&7).cloned().unwrap();
657 657 children_7.sort();
658 658 assert_eq!(children_7, vec![9, 11]);
659 659
660 660 Ok(())
661 661 }
662 662
663 663 #[test]
664 664 fn test_complete_sample() {
665 665 let mut disco = full_disco();
666 666 let undecided: HashSet<Revision> =
667 667 [4, 7, 9, 2, 3].iter().cloned().collect();
668 668 disco.undecided = Some(undecided);
669 669
670 670 let mut sample = vec![0];
671 671 disco.random_complete_sample(&mut sample, 3);
672 672 assert_eq!(sample.len(), 3);
673 673
674 674 let mut sample = vec![2, 4, 7];
675 675 disco.random_complete_sample(&mut sample, 1);
676 676 assert_eq!(sample.len(), 3);
677 677 }
678 678
679 679 #[test]
680 680 fn test_bidirectional_sample() -> Result<(), GraphError> {
681 681 let mut disco = full_disco();
682 682 disco.undecided = Some((0..=13).into_iter().collect());
683 683
684 684 let (sample_set, size) = disco.bidirectional_sample(7)?;
685 685 assert_eq!(size, 7);
686 686 let mut sample: Vec<Revision> = sample_set.into_iter().collect();
687 687 sample.sort();
688 688 // our DAG is a bit too small for the results to be really interesting
689 689 // at least it shows that
690 690 // - we went both ways
691 691 // - we didn't take all Revisions (6 is not in the sample)
692 692 assert_eq!(sample, vec![0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13]);
693 693 Ok(())
694 694 }
695 695 }
@@ -1,669 +1,670 b''
1 1 // filepatterns.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Handling of Mercurial-specific patterns.
9 9
10 10 use crate::{
11 11 utils::{
12 12 files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
13 13 hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
14 14 SliceExt,
15 15 },
16 16 FastHashMap, PatternError,
17 17 };
18 18 use lazy_static::lazy_static;
19 19 use regex::bytes::{NoExpand, Regex};
20 20 use std::fs::File;
21 21 use std::io::Read;
22 22 use std::ops::Deref;
23 23 use std::path::{Path, PathBuf};
24 24 use std::vec::Vec;
25 25
26 26 lazy_static! {
27 27 static ref RE_ESCAPE: Vec<Vec<u8>> = {
28 28 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
29 29 let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
30 30 for byte in to_escape {
31 31 v[*byte as usize].insert(0, b'\\');
32 32 }
33 33 v
34 34 };
35 35 }
36 36
37 37 /// These are matched in order
38 38 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
39 39 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
40 40
41 41 /// Appended to the regexp of globs
42 42 const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";
43 43
44 44 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
45 45 pub enum PatternSyntax {
46 46 /// A regular expression
47 47 Regexp,
48 48 /// Glob that matches at the front of the path
49 49 RootGlob,
50 50 /// Glob that matches at any suffix of the path (still anchored at
51 51 /// slashes)
52 52 Glob,
53 53 /// a path relative to repository root, which is matched recursively
54 54 Path,
55 55 /// A path relative to cwd
56 56 RelPath,
57 57 /// an unrooted glob (*.rs matches Rust files in all dirs)
58 58 RelGlob,
59 59 /// A regexp that needn't match the start of a name
60 60 RelRegexp,
61 61 /// A path relative to repository root, which is matched non-recursively
62 62 /// (will not match subdirectories)
63 63 RootFiles,
64 64 /// A file of patterns to read and include
65 65 Include,
66 66 /// A file of patterns to match against files under the same directory
67 67 SubInclude,
68 68 }
69 69
70 70 /// Transforms a glob pattern into a regex
71 71 fn glob_to_re(pat: &[u8]) -> Vec<u8> {
72 72 let mut input = pat;
73 73 let mut res: Vec<u8> = vec![];
74 74 let mut group_depth = 0;
75 75
76 76 while let Some((c, rest)) = input.split_first() {
77 77 input = rest;
78 78
79 79 match c {
80 80 b'*' => {
81 81 for (source, repl) in GLOB_REPLACEMENTS {
82 82 if let Some(rest) = input.drop_prefix(source) {
83 83 input = rest;
84 84 res.extend(*repl);
85 85 break;
86 86 }
87 87 }
88 88 }
89 89 b'?' => res.extend(b"."),
90 90 b'[' => {
91 91 match input.iter().skip(1).position(|b| *b == b']') {
92 92 None => res.extend(b"\\["),
93 93 Some(end) => {
94 94 // Account for the one we skipped
95 95 let end = end + 1;
96 96
97 97 res.extend(b"[");
98 98
99 99 for (i, b) in input[..end].iter().enumerate() {
100 100 if *b == b'!' && i == 0 {
101 101 res.extend(b"^")
102 102 } else if *b == b'^' && i == 0 {
103 103 res.extend(b"\\^")
104 104 } else if *b == b'\\' {
105 105 res.extend(b"\\\\")
106 106 } else {
107 107 res.push(*b)
108 108 }
109 109 }
110 110 res.extend(b"]");
111 111 input = &input[end + 1..];
112 112 }
113 113 }
114 114 }
115 115 b'{' => {
116 116 group_depth += 1;
117 117 res.extend(b"(?:")
118 118 }
119 119 b'}' if group_depth > 0 => {
120 120 group_depth -= 1;
121 121 res.extend(b")");
122 122 }
123 123 b',' if group_depth > 0 => res.extend(b"|"),
124 124 b'\\' => {
125 125 let c = {
126 126 if let Some((c, rest)) = input.split_first() {
127 127 input = rest;
128 128 c
129 129 } else {
130 130 c
131 131 }
132 132 };
133 133 res.extend(&RE_ESCAPE[*c as usize])
134 134 }
135 135 _ => res.extend(&RE_ESCAPE[*c as usize]),
136 136 }
137 137 }
138 138 res
139 139 }
140 140
141 141 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
142 142 pattern
143 143 .iter()
144 144 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
145 145 .collect()
146 146 }
147 147
148 148 pub fn parse_pattern_syntax(
149 149 kind: &[u8],
150 150 ) -> Result<PatternSyntax, PatternError> {
151 151 match kind {
152 152 b"re:" => Ok(PatternSyntax::Regexp),
153 153 b"path:" => Ok(PatternSyntax::Path),
154 154 b"relpath:" => Ok(PatternSyntax::RelPath),
155 155 b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
156 156 b"relglob:" => Ok(PatternSyntax::RelGlob),
157 157 b"relre:" => Ok(PatternSyntax::RelRegexp),
158 158 b"glob:" => Ok(PatternSyntax::Glob),
159 159 b"rootglob:" => Ok(PatternSyntax::RootGlob),
160 160 b"include:" => Ok(PatternSyntax::Include),
161 161 b"subinclude:" => Ok(PatternSyntax::SubInclude),
162 162 _ => Err(PatternError::UnsupportedSyntax(
163 163 String::from_utf8_lossy(kind).to_string(),
164 164 )),
165 165 }
166 166 }
167 167
168 168 /// Builds the regex that corresponds to the given pattern.
169 169 /// If within a `syntax: regexp` context, returns the pattern,
170 170 /// otherwise, returns the corresponding regex.
171 171 fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> {
172 172 let IgnorePattern {
173 173 syntax, pattern, ..
174 174 } = entry;
175 175 if pattern.is_empty() {
176 176 return vec![];
177 177 }
178 178 match syntax {
179 179 PatternSyntax::Regexp => pattern.to_owned(),
180 180 PatternSyntax::RelRegexp => {
181 181 // The `regex` crate accepts `**` while `re2` and Python's `re`
182 182 // do not. Checking for `*` correctly triggers the same error all
183 183 // engines.
184 184 if pattern[0] == b'^'
185 185 || pattern[0] == b'*'
186 186 || pattern.starts_with(b".*")
187 187 {
188 188 return pattern.to_owned();
189 189 }
190 190 [&b".*"[..], pattern].concat()
191 191 }
192 192 PatternSyntax::Path | PatternSyntax::RelPath => {
193 193 if pattern == b"." {
194 194 return vec![];
195 195 }
196 196 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
197 197 }
198 198 PatternSyntax::RootFiles => {
199 199 let mut res = if pattern == b"." {
200 200 vec![]
201 201 } else {
202 202 // Pattern is a directory name.
203 203 [escape_pattern(pattern).as_slice(), b"/"].concat()
204 204 };
205 205
206 206 // Anything after the pattern must be a non-directory.
207 207 res.extend(b"[^/]+$");
208 208 res
209 209 }
210 210 PatternSyntax::RelGlob => {
211 211 let glob_re = glob_to_re(pattern);
212 212 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
213 213 [b".*", rest, GLOB_SUFFIX].concat()
214 214 } else {
215 215 [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat()
216 216 }
217 217 }
218 218 PatternSyntax::Glob | PatternSyntax::RootGlob => {
219 219 [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
220 220 }
221 221 PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(),
222 222 }
223 223 }
224 224
225 225 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
226 226 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
227 227
228 228 /// TODO support other platforms
229 229 #[cfg(unix)]
230 230 pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
231 231 if bytes.is_empty() {
232 232 return b".".to_vec();
233 233 }
234 234 let sep = b'/';
235 235
236 236 let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
237 237 if initial_slashes > 2 {
238 238 // POSIX allows one or two initial slashes, but treats three or more
239 239 // as single slash.
240 240 initial_slashes = 1;
241 241 }
242 242 let components = bytes
243 243 .split(|b| *b == sep)
244 244 .filter(|c| !(c.is_empty() || c == b"."))
245 245 .fold(vec![], |mut acc, component| {
246 246 if component != b".."
247 247 || (initial_slashes == 0 && acc.is_empty())
248 248 || (!acc.is_empty() && acc[acc.len() - 1] == b"..")
249 249 {
250 250 acc.push(component)
251 251 } else if !acc.is_empty() {
252 252 acc.pop();
253 253 }
254 254 acc
255 255 });
256 256 let mut new_bytes = components.join(&sep);
257 257
258 258 if initial_slashes > 0 {
259 259 let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
260 260 buf.extend(new_bytes);
261 261 new_bytes = buf;
262 262 }
263 263 if new_bytes.is_empty() {
264 264 b".".to_vec()
265 265 } else {
266 266 new_bytes
267 267 }
268 268 }
269 269
270 270 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
271 271 /// that don't need to be transformed into a regex.
272 272 pub fn build_single_regex(
273 273 entry: &IgnorePattern,
274 274 ) -> Result<Option<Vec<u8>>, PatternError> {
275 275 let IgnorePattern {
276 276 pattern, syntax, ..
277 277 } = entry;
278 278 let pattern = match syntax {
279 279 PatternSyntax::RootGlob
280 280 | PatternSyntax::Path
281 281 | PatternSyntax::RelGlob
282 282 | PatternSyntax::RootFiles => normalize_path_bytes(&pattern),
283 283 PatternSyntax::Include | PatternSyntax::SubInclude => {
284 284 return Err(PatternError::NonRegexPattern(entry.clone()))
285 285 }
286 286 _ => pattern.to_owned(),
287 287 };
288 288 if *syntax == PatternSyntax::RootGlob
289 289 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
290 290 {
291 291 Ok(None)
292 292 } else {
293 293 let mut entry = entry.clone();
294 294 entry.pattern = pattern;
295 295 Ok(Some(_build_single_regex(&entry)))
296 296 }
297 297 }
298 298
299 299 lazy_static! {
300 300 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
301 301 let mut m = FastHashMap::default();
302 302
303 303 m.insert(b"re".as_ref(), b"relre:".as_ref());
304 304 m.insert(b"regexp".as_ref(), b"relre:".as_ref());
305 305 m.insert(b"glob".as_ref(), b"relglob:".as_ref());
306 306 m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
307 307 m.insert(b"include".as_ref(), b"include:".as_ref());
308 308 m.insert(b"subinclude".as_ref(), b"subinclude:".as_ref());
309 309 m
310 310 };
311 311 }
312 312
313 313 #[derive(Debug)]
314 314 pub enum PatternFileWarning {
315 315 /// (file path, syntax bytes)
316 316 InvalidSyntax(PathBuf, Vec<u8>),
317 317 /// File path
318 318 NoSuchFile(PathBuf),
319 319 }
320 320
321 321 pub fn parse_pattern_file_contents<P: AsRef<Path>>(
322 322 lines: &[u8],
323 323 file_path: P,
324 324 warn: bool,
325 325 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
326 326 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
327
328 #[allow(clippy::trivial_regex)]
327 329 let comment_escape_regex = Regex::new(r"\\#").unwrap();
328 330 let mut inputs: Vec<IgnorePattern> = vec![];
329 331 let mut warnings: Vec<PatternFileWarning> = vec![];
330 332
331 333 let mut current_syntax = b"relre:".as_ref();
332 334
333 335 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
334 336 let line_number = line_number + 1;
335 337
336 338 let line_buf;
337 339 if line.contains(&b'#') {
338 340 if let Some(cap) = comment_regex.captures(line) {
339 341 line = &line[..cap.get(1).unwrap().end()]
340 342 }
341 343 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
342 344 line = &line_buf;
343 345 }
344 346
345 347 let mut line = line.trim_end();
346 348
347 349 if line.is_empty() {
348 350 continue;
349 351 }
350 352
351 353 if let Some(syntax) = line.drop_prefix(b"syntax:") {
352 354 let syntax = syntax.trim();
353 355
354 356 if let Some(rel_syntax) = SYNTAXES.get(syntax) {
355 357 current_syntax = rel_syntax;
356 358 } else if warn {
357 359 warnings.push(PatternFileWarning::InvalidSyntax(
358 360 file_path.as_ref().to_owned(),
359 361 syntax.to_owned(),
360 362 ));
361 363 }
362 364 continue;
363 365 }
364 366
365 367 let mut line_syntax: &[u8] = &current_syntax;
366 368
367 369 for (s, rels) in SYNTAXES.iter() {
368 370 if let Some(rest) = line.drop_prefix(rels) {
369 371 line_syntax = rels;
370 372 line = rest;
371 373 break;
372 374 }
373 375 if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) {
374 376 line_syntax = rels;
375 377 line = rest;
376 378 break;
377 379 }
378 380 }
379 381
380 382 inputs.push(IgnorePattern::new(
381 383 parse_pattern_syntax(&line_syntax).map_err(|e| match e {
382 384 PatternError::UnsupportedSyntax(syntax) => {
383 385 PatternError::UnsupportedSyntaxInFile(
384 386 syntax,
385 387 file_path.as_ref().to_string_lossy().into(),
386 388 line_number,
387 389 )
388 390 }
389 391 _ => e,
390 392 })?,
391 393 &line,
392 394 &file_path,
393 395 ));
394 396 }
395 397 Ok((inputs, warnings))
396 398 }
397 399
398 400 pub fn read_pattern_file<P: AsRef<Path>>(
399 401 file_path: P,
400 402 warn: bool,
401 403 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
402 404 let mut f = match File::open(file_path.as_ref()) {
403 405 Ok(f) => Ok(f),
404 406 Err(e) => match e.kind() {
405 407 std::io::ErrorKind::NotFound => {
406 408 return Ok((
407 409 vec![],
408 410 vec![PatternFileWarning::NoSuchFile(
409 411 file_path.as_ref().to_owned(),
410 412 )],
411 413 ))
412 414 }
413 415 _ => Err(e),
414 416 },
415 417 }?;
416 418 let mut contents = Vec::new();
417 419
418 420 f.read_to_end(&mut contents)?;
419 421
420 422 Ok(parse_pattern_file_contents(&contents, file_path, warn)?)
421 423 }
422 424
423 425 /// Represents an entry in an "ignore" file.
424 426 #[derive(Debug, Eq, PartialEq, Clone)]
425 427 pub struct IgnorePattern {
426 428 pub syntax: PatternSyntax,
427 429 pub pattern: Vec<u8>,
428 430 pub source: PathBuf,
429 431 }
430 432
431 433 impl IgnorePattern {
432 434 pub fn new(
433 435 syntax: PatternSyntax,
434 436 pattern: &[u8],
435 437 source: impl AsRef<Path>,
436 438 ) -> Self {
437 439 Self {
438 440 syntax,
439 441 pattern: pattern.to_owned(),
440 442 source: source.as_ref().to_owned(),
441 443 }
442 444 }
443 445 }
444 446
445 447 pub type PatternResult<T> = Result<T, PatternError>;
446 448
447 449 /// Wrapper for `read_pattern_file` that also recursively expands `include:`
448 450 /// patterns.
449 451 ///
450 452 /// `subinclude:` is not treated as a special pattern here: unraveling them
451 453 /// needs to occur in the "ignore" phase.
452 454 pub fn get_patterns_from_file(
453 455 pattern_file: impl AsRef<Path>,
454 456 root_dir: impl AsRef<Path>,
455 457 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
456 458 let (patterns, mut warnings) = read_pattern_file(&pattern_file, true)?;
457 459 let patterns = patterns
458 460 .into_iter()
459 461 .flat_map(|entry| -> PatternResult<_> {
460 462 let IgnorePattern {
461 syntax,
462 pattern,
463 source: _,
463 syntax, pattern, ..
464 464 } = &entry;
465 465 Ok(match syntax {
466 466 PatternSyntax::Include => {
467 467 let inner_include =
468 468 root_dir.as_ref().join(get_path_from_bytes(&pattern));
469 469 let (inner_pats, inner_warnings) = get_patterns_from_file(
470 470 &inner_include,
471 471 root_dir.as_ref(),
472 472 )?;
473 473 warnings.extend(inner_warnings);
474 474 inner_pats
475 475 }
476 476 _ => vec![entry],
477 477 })
478 478 })
479 479 .flatten()
480 480 .collect();
481 481
482 482 Ok((patterns, warnings))
483 483 }
484 484
485 485 /// Holds all the information needed to handle a `subinclude:` pattern.
486 486 pub struct SubInclude {
487 487 /// Will be used for repository (hg) paths that start with this prefix.
488 488 /// It is relative to the current working directory, so comparing against
489 489 /// repository paths is painless.
490 490 pub prefix: HgPathBuf,
491 491 /// The file itself, containing the patterns
492 492 pub path: PathBuf,
493 493 /// Folder in the filesystem where this it applies
494 494 pub root: PathBuf,
495 495 }
496 496
497 497 impl SubInclude {
498 498 pub fn new(
499 499 root_dir: impl AsRef<Path>,
500 500 pattern: &[u8],
501 501 source: impl AsRef<Path>,
502 502 ) -> Result<SubInclude, HgPathError> {
503 503 let normalized_source =
504 504 normalize_path_bytes(&get_bytes_from_path(source));
505 505
506 506 let source_root = get_path_from_bytes(&normalized_source);
507 let source_root = source_root.parent().unwrap_or(source_root.deref());
507 let source_root =
508 source_root.parent().unwrap_or_else(|| source_root.deref());
508 509
509 510 let path = source_root.join(get_path_from_bytes(pattern));
510 let new_root = path.parent().unwrap_or(path.deref());
511 let new_root = path.parent().unwrap_or_else(|| path.deref());
511 512
512 513 let prefix = canonical_path(&root_dir, &root_dir, new_root)?;
513 514
514 515 Ok(Self {
515 516 prefix: path_to_hg_path_buf(prefix).and_then(|mut p| {
516 517 if !p.is_empty() {
517 518 p.push(b'/');
518 519 }
519 520 Ok(p)
520 521 })?,
521 522 path: path.to_owned(),
522 523 root: new_root.to_owned(),
523 524 })
524 525 }
525 526 }
526 527
527 528 /// Separate and pre-process subincludes from other patterns for the "ignore"
528 529 /// phase.
529 530 pub fn filter_subincludes(
530 531 ignore_patterns: &[IgnorePattern],
531 532 root_dir: impl AsRef<Path>,
532 533 ) -> Result<(Vec<SubInclude>, Vec<&IgnorePattern>), HgPathError> {
533 534 let mut subincludes = vec![];
534 535 let mut others = vec![];
535 536
536 537 for ignore_pattern in ignore_patterns.iter() {
537 538 let IgnorePattern {
538 539 syntax,
539 540 pattern,
540 541 source,
541 542 } = ignore_pattern;
542 543 if *syntax == PatternSyntax::SubInclude {
543 544 subincludes.push(SubInclude::new(&root_dir, pattern, &source)?);
544 545 } else {
545 546 others.push(ignore_pattern)
546 547 }
547 548 }
548 549 Ok((subincludes, others))
549 550 }
550 551
551 552 #[cfg(test)]
552 553 mod tests {
553 554 use super::*;
554 555 use pretty_assertions::assert_eq;
555 556
556 557 #[test]
557 558 fn escape_pattern_test() {
558 559 let untouched =
559 560 br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
560 561 assert_eq!(escape_pattern(untouched), untouched.to_vec());
561 562 // All escape codes
562 563 assert_eq!(
563 564 escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
564 565 br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
565 566 .to_vec()
566 567 );
567 568 }
568 569
569 570 #[test]
570 571 fn glob_test() {
571 572 assert_eq!(glob_to_re(br#"?"#), br#"."#);
572 573 assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
573 574 assert_eq!(glob_to_re(br#"**"#), br#".*"#);
574 575 assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
575 576 assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
576 577 assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
577 578 assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
578 579 assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
579 580 }
580 581
581 582 #[test]
582 583 fn test_parse_pattern_file_contents() {
583 584 let lines = b"syntax: glob\n*.elc";
584 585
585 586 assert_eq!(
586 587 parse_pattern_file_contents(lines, Path::new("file_path"), false)
587 588 .unwrap()
588 589 .0,
589 590 vec![IgnorePattern::new(
590 591 PatternSyntax::RelGlob,
591 592 b"*.elc",
592 593 Path::new("file_path")
593 594 )],
594 595 );
595 596
596 597 let lines = b"syntax: include\nsyntax: glob";
597 598
598 599 assert_eq!(
599 600 parse_pattern_file_contents(lines, Path::new("file_path"), false)
600 601 .unwrap()
601 602 .0,
602 603 vec![]
603 604 );
604 605 let lines = b"glob:**.o";
605 606 assert_eq!(
606 607 parse_pattern_file_contents(lines, Path::new("file_path"), false)
607 608 .unwrap()
608 609 .0,
609 610 vec![IgnorePattern::new(
610 611 PatternSyntax::RelGlob,
611 612 b"**.o",
612 613 Path::new("file_path")
613 614 )]
614 615 );
615 616 }
616 617
617 618 #[test]
618 619 fn test_build_single_regex() {
619 620 assert_eq!(
620 621 build_single_regex(&IgnorePattern::new(
621 622 PatternSyntax::RelGlob,
622 623 b"rust/target/",
623 624 Path::new("")
624 625 ))
625 626 .unwrap(),
626 627 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
627 628 );
628 629 assert_eq!(
629 630 build_single_regex(&IgnorePattern::new(
630 631 PatternSyntax::Regexp,
631 632 br"rust/target/\d+",
632 633 Path::new("")
633 634 ))
634 635 .unwrap(),
635 636 Some(br"rust/target/\d+".to_vec()),
636 637 );
637 638 }
638 639
639 640 #[test]
640 641 fn test_build_single_regex_shortcut() {
641 642 assert_eq!(
642 643 build_single_regex(&IgnorePattern::new(
643 644 PatternSyntax::RootGlob,
644 645 b"",
645 646 Path::new("")
646 647 ))
647 648 .unwrap(),
648 649 None,
649 650 );
650 651 assert_eq!(
651 652 build_single_regex(&IgnorePattern::new(
652 653 PatternSyntax::RootGlob,
653 654 b"whatever",
654 655 Path::new("")
655 656 ))
656 657 .unwrap(),
657 658 None,
658 659 );
659 660 assert_eq!(
660 661 build_single_regex(&IgnorePattern::new(
661 662 PatternSyntax::RootGlob,
662 663 b"*.o",
663 664 Path::new("")
664 665 ))
665 666 .unwrap(),
666 667 Some(br"[^/]*\.o(?:/|$)".to_vec()),
667 668 );
668 669 }
669 670 }
@@ -1,937 +1,937 b''
1 1 // matchers.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Structs and types for matching files and directories.
9 9
10 10 use crate::{
11 11 dirstate::dirs_multiset::DirsChildrenMultiset,
12 12 filepatterns::{
13 13 build_single_regex, filter_subincludes, get_patterns_from_file,
14 14 PatternFileWarning, PatternResult, SubInclude,
15 15 },
16 16 utils::{
17 17 files::find_dirs,
18 18 hg_path::{HgPath, HgPathBuf},
19 19 Escaped,
20 20 },
21 21 DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
22 22 PatternSyntax,
23 23 };
24 24
25 25 use crate::filepatterns::normalize_path_bytes;
26 26 use std::borrow::ToOwned;
27 27 use std::collections::HashSet;
28 28 use std::fmt::{Display, Error, Formatter};
29 29 use std::iter::FromIterator;
30 30 use std::ops::Deref;
31 31 use std::path::{Path, PathBuf};
32 32
33 33 use micro_timer::timed;
34 34
35 35 #[derive(Debug, PartialEq)]
36 36 pub enum VisitChildrenSet<'a> {
37 37 /// Don't visit anything
38 38 Empty,
39 39 /// Only visit this directory
40 40 This,
41 41 /// Visit this directory and these subdirectories
42 42 /// TODO Should we implement a `NonEmptyHashSet`?
43 43 Set(HashSet<&'a HgPath>),
44 44 /// Visit this directory and all subdirectories
45 45 Recursive,
46 46 }
47 47
48 48 pub trait Matcher {
49 49 /// Explicitly listed files
50 50 fn file_set(&self) -> Option<&HashSet<&HgPath>>;
51 51 /// Returns whether `filename` is in `file_set`
52 52 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool;
53 53 /// Returns whether `filename` is matched by this matcher
54 54 fn matches(&self, filename: impl AsRef<HgPath>) -> bool;
55 55 /// Decides whether a directory should be visited based on whether it
56 56 /// has potential matches in it or one of its subdirectories, and
57 57 /// potentially lists which subdirectories of that directory should be
58 58 /// visited. This is based on the match's primary, included, and excluded
59 59 /// patterns.
60 60 ///
61 61 /// # Example
62 62 ///
63 63 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
64 64 /// return the following values (assuming the implementation of
65 65 /// visit_children_set is capable of recognizing this; some implementations
66 66 /// are not).
67 67 ///
68 68 /// ```text
69 69 /// ```ignore
70 70 /// '' -> {'foo', 'qux'}
71 71 /// 'baz' -> set()
72 72 /// 'foo' -> {'bar'}
73 73 /// // Ideally this would be `Recursive`, but since the prefix nature of
74 74 /// // matchers is applied to the entire matcher, we have to downgrade this
75 75 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
76 76 /// // `RootFilesIn'-kind matcher being mixed in.
77 77 /// 'foo/bar' -> 'this'
78 78 /// 'qux' -> 'this'
79 79 /// ```
80 80 /// # Important
81 81 ///
82 82 /// Most matchers do not know if they're representing files or
83 83 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
84 84 /// file or a directory, so `visit_children_set('dir')` for most matchers
85 85 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
86 86 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
87 87 /// it may return `VisitChildrenSet::This`.
88 88 /// Do not rely on the return being a `HashSet` indicating that there are
89 89 /// no files in this dir to investigate (or equivalently that if there are
90 90 /// files to investigate in 'dir' that it will always return
91 91 /// `VisitChildrenSet::This`).
92 92 fn visit_children_set(
93 93 &self,
94 94 directory: impl AsRef<HgPath>,
95 95 ) -> VisitChildrenSet;
96 96 /// Matcher will match everything and `files_set()` will be empty:
97 97 /// optimization might be possible.
98 98 fn matches_everything(&self) -> bool;
99 99 /// Matcher will match exactly the files in `files_set()`: optimization
100 100 /// might be possible.
101 101 fn is_exact(&self) -> bool;
102 102 }
103 103
104 104 /// Matches everything.
105 105 ///```
106 106 /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
107 107 ///
108 108 /// let matcher = AlwaysMatcher;
109 109 ///
110 110 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
111 111 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
112 112 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
113 113 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
114 114 /// ```
115 115 #[derive(Debug)]
116 116 pub struct AlwaysMatcher;
117 117
118 118 impl Matcher for AlwaysMatcher {
119 119 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
120 120 None
121 121 }
122 122 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
123 123 false
124 124 }
125 125 fn matches(&self, _filename: impl AsRef<HgPath>) -> bool {
126 126 true
127 127 }
128 128 fn visit_children_set(
129 129 &self,
130 130 _directory: impl AsRef<HgPath>,
131 131 ) -> VisitChildrenSet {
132 132 VisitChildrenSet::Recursive
133 133 }
134 134 fn matches_everything(&self) -> bool {
135 135 true
136 136 }
137 137 fn is_exact(&self) -> bool {
138 138 false
139 139 }
140 140 }
141 141
142 142 /// Matches the input files exactly. They are interpreted as paths, not
143 143 /// patterns.
144 144 ///
145 145 ///```
146 146 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::HgPath };
147 147 ///
148 148 /// let files = [HgPath::new(b"a.txt"), HgPath::new(br"re:.*\.c$")];
149 149 /// let matcher = FileMatcher::new(&files).unwrap();
150 150 ///
151 151 /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
152 152 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
153 153 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
154 154 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
155 155 /// ```
156 156 #[derive(Debug)]
157 157 pub struct FileMatcher<'a> {
158 158 files: HashSet<&'a HgPath>,
159 159 dirs: DirsMultiset,
160 160 }
161 161
162 162 impl<'a> FileMatcher<'a> {
163 163 pub fn new(
164 164 files: &'a [impl AsRef<HgPath>],
165 165 ) -> Result<Self, DirstateMapError> {
166 166 Ok(Self {
167 files: HashSet::from_iter(files.iter().map(|f| f.as_ref())),
167 files: HashSet::from_iter(files.iter().map(AsRef::as_ref)),
168 168 dirs: DirsMultiset::from_manifest(files)?,
169 169 })
170 170 }
171 171 fn inner_matches(&self, filename: impl AsRef<HgPath>) -> bool {
172 172 self.files.contains(filename.as_ref())
173 173 }
174 174 }
175 175
176 176 impl<'a> Matcher for FileMatcher<'a> {
177 177 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
178 178 Some(&self.files)
179 179 }
180 180 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool {
181 181 self.inner_matches(filename)
182 182 }
183 183 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
184 184 self.inner_matches(filename)
185 185 }
186 186 fn visit_children_set(
187 187 &self,
188 188 directory: impl AsRef<HgPath>,
189 189 ) -> VisitChildrenSet {
190 190 if self.files.is_empty() || !self.dirs.contains(&directory) {
191 191 return VisitChildrenSet::Empty;
192 192 }
193 let dirs_as_set = self.dirs.iter().map(|k| k.deref()).collect();
193 let dirs_as_set = self.dirs.iter().map(Deref::deref).collect();
194 194
195 195 let mut candidates: HashSet<&HgPath> =
196 self.files.union(&dirs_as_set).map(|k| *k).collect();
196 self.files.union(&dirs_as_set).cloned().collect();
197 197 candidates.remove(HgPath::new(b""));
198 198
199 199 if !directory.as_ref().is_empty() {
200 200 let directory = [directory.as_ref().as_bytes(), b"/"].concat();
201 201 candidates = candidates
202 202 .iter()
203 203 .filter_map(|c| {
204 204 if c.as_bytes().starts_with(&directory) {
205 205 Some(HgPath::new(&c.as_bytes()[directory.len()..]))
206 206 } else {
207 207 None
208 208 }
209 209 })
210 210 .collect();
211 211 }
212 212
213 213 // `self.dirs` includes all of the directories, recursively, so if
214 214 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
215 215 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
216 216 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
217 217 // subdir will be in there without a slash.
218 218 VisitChildrenSet::Set(
219 219 candidates
220 220 .iter()
221 221 .filter_map(|c| {
222 222 if c.bytes().all(|b| *b != b'/') {
223 223 Some(*c)
224 224 } else {
225 225 None
226 226 }
227 227 })
228 228 .collect(),
229 229 )
230 230 }
231 231 fn matches_everything(&self) -> bool {
232 232 false
233 233 }
234 234 fn is_exact(&self) -> bool {
235 235 true
236 236 }
237 237 }
238 238
239 239 /// Matches files that are included in the ignore rules.
240 240 /// ```
241 241 /// use hg::{
242 242 /// matchers::{IncludeMatcher, Matcher},
243 243 /// IgnorePattern,
244 244 /// PatternSyntax,
245 245 /// utils::hg_path::HgPath
246 246 /// };
247 247 /// use std::path::Path;
248 248 /// ///
249 249 /// let ignore_patterns =
250 250 /// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
251 251 /// let (matcher, _) = IncludeMatcher::new(ignore_patterns, "").unwrap();
252 252 /// ///
253 253 /// assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
254 254 /// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
255 255 /// assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
256 256 /// assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
257 257 /// ```
258 258 pub struct IncludeMatcher<'a> {
259 259 patterns: Vec<u8>,
260 260 match_fn: Box<dyn for<'r> Fn(&'r HgPath) -> bool + 'a + Sync>,
261 261 /// Whether all the patterns match a prefix (i.e. recursively)
262 262 prefix: bool,
263 263 roots: HashSet<HgPathBuf>,
264 264 dirs: HashSet<HgPathBuf>,
265 265 parents: HashSet<HgPathBuf>,
266 266 }
267 267
268 268 impl<'a> Matcher for IncludeMatcher<'a> {
269 269 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
270 270 None
271 271 }
272 272
273 273 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
274 274 false
275 275 }
276 276
277 277 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
278 278 (self.match_fn)(filename.as_ref())
279 279 }
280 280
281 281 fn visit_children_set(
282 282 &self,
283 283 directory: impl AsRef<HgPath>,
284 284 ) -> VisitChildrenSet {
285 285 let dir = directory.as_ref();
286 286 if self.prefix && self.roots.contains(dir) {
287 287 return VisitChildrenSet::Recursive;
288 288 }
289 289 if self.roots.contains(HgPath::new(b""))
290 290 || self.roots.contains(dir)
291 291 || self.dirs.contains(dir)
292 292 || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
293 293 {
294 294 return VisitChildrenSet::This;
295 295 }
296 296
297 297 if self.parents.contains(directory.as_ref()) {
298 298 let multiset = self.get_all_parents_children();
299 299 if let Some(children) = multiset.get(dir) {
300 300 return VisitChildrenSet::Set(children.to_owned());
301 301 }
302 302 }
303 303 VisitChildrenSet::Empty
304 304 }
305 305
306 306 fn matches_everything(&self) -> bool {
307 307 false
308 308 }
309 309
310 310 fn is_exact(&self) -> bool {
311 311 false
312 312 }
313 313 }
314 314
315 315 /// Returns a function that matches an `HgPath` against the given regex
316 316 /// pattern.
317 317 ///
318 318 /// This can fail when the pattern is invalid or not supported by the
319 319 /// underlying engine (the `regex` crate), for instance anything with
320 320 /// back-references.
321 321 #[timed]
322 322 fn re_matcher(
323 323 pattern: &[u8],
324 324 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
325 325 use std::io::Write;
326 326
327 327 // The `regex` crate adds `.*` to the start and end of expressions if there
328 328 // are no anchors, so add the start anchor.
329 329 let mut escaped_bytes = vec![b'^', b'(', b'?', b':'];
330 330 for byte in pattern {
331 331 if *byte > 127 {
332 332 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
333 333 } else {
334 334 escaped_bytes.push(*byte);
335 335 }
336 336 }
337 337 escaped_bytes.push(b')');
338 338
339 339 // Avoid the cost of UTF8 checking
340 340 //
341 341 // # Safety
342 342 // This is safe because we escaped all non-ASCII bytes.
343 343 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
344 344 let re = regex::bytes::RegexBuilder::new(&pattern_string)
345 345 .unicode(false)
346 346 // Big repos with big `.hgignore` will hit the default limit and
347 347 // incur a significant performance hit. One repo's `hg status` hit
348 348 // multiple *minutes*.
349 349 .dfa_size_limit(50 * (1 << 20))
350 350 .build()
351 351 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
352 352
353 353 Ok(move |path: &HgPath| re.is_match(path.as_bytes()))
354 354 }
355 355
356 356 /// Returns the regex pattern and a function that matches an `HgPath` against
357 357 /// said regex formed by the given ignore patterns.
358 358 fn build_regex_match<'a>(
359 359 ignore_patterns: &'a [&'a IgnorePattern],
360 360 ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)> {
361 361 let mut regexps = vec![];
362 362 let mut exact_set = HashSet::new();
363 363
364 364 for pattern in ignore_patterns {
365 365 if let Some(re) = build_single_regex(pattern)? {
366 366 regexps.push(re);
367 367 } else {
368 368 let exact = normalize_path_bytes(&pattern.pattern);
369 369 exact_set.insert(HgPathBuf::from_bytes(&exact));
370 370 }
371 371 }
372 372
373 373 let full_regex = regexps.join(&b'|');
374 374
375 375 // An empty pattern would cause the regex engine to incorrectly match the
376 376 // (empty) root directory
377 377 let func = if !(regexps.is_empty()) {
378 378 let matcher = re_matcher(&full_regex)?;
379 379 let func = move |filename: &HgPath| {
380 380 exact_set.contains(filename) || matcher(filename)
381 381 };
382 382 Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
383 383 } else {
384 384 let func = move |filename: &HgPath| exact_set.contains(filename);
385 385 Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
386 386 };
387 387
388 388 Ok((full_regex, func))
389 389 }
390 390
391 391 /// Returns roots and directories corresponding to each pattern.
392 392 ///
393 393 /// This calculates the roots and directories exactly matching the patterns and
394 394 /// returns a tuple of (roots, dirs). It does not return other directories
395 395 /// which may also need to be considered, like the parent directories.
396 396 fn roots_and_dirs(
397 397 ignore_patterns: &[IgnorePattern],
398 398 ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
399 399 let mut roots = Vec::new();
400 400 let mut dirs = Vec::new();
401 401
402 402 for ignore_pattern in ignore_patterns {
403 403 let IgnorePattern {
404 404 syntax, pattern, ..
405 405 } = ignore_pattern;
406 406 match syntax {
407 407 PatternSyntax::RootGlob | PatternSyntax::Glob => {
408 408 let mut root = vec![];
409 409
410 410 for p in pattern.split(|c| *c == b'/') {
411 411 if p.iter().any(|c| match *c {
412 412 b'[' | b'{' | b'*' | b'?' => true,
413 413 _ => false,
414 414 }) {
415 415 break;
416 416 }
417 417 root.push(HgPathBuf::from_bytes(p));
418 418 }
419 419 let buf =
420 420 root.iter().fold(HgPathBuf::new(), |acc, r| acc.join(r));
421 421 roots.push(buf);
422 422 }
423 423 PatternSyntax::Path | PatternSyntax::RelPath => {
424 424 let pat = HgPath::new(if pattern == b"." {
425 425 &[] as &[u8]
426 426 } else {
427 427 pattern
428 428 });
429 429 roots.push(pat.to_owned());
430 430 }
431 431 PatternSyntax::RootFiles => {
432 432 let pat = if pattern == b"." {
433 433 &[] as &[u8]
434 434 } else {
435 435 pattern
436 436 };
437 437 dirs.push(HgPathBuf::from_bytes(pat));
438 438 }
439 439 _ => {
440 440 roots.push(HgPathBuf::new());
441 441 }
442 442 }
443 443 }
444 444 (roots, dirs)
445 445 }
446 446
447 447 /// Paths extracted from patterns
448 448 #[derive(Debug, PartialEq)]
449 449 struct RootsDirsAndParents {
450 450 /// Directories to match recursively
451 451 pub roots: HashSet<HgPathBuf>,
452 452 /// Directories to match non-recursively
453 453 pub dirs: HashSet<HgPathBuf>,
454 454 /// Implicitly required directories to go to items in either roots or dirs
455 455 pub parents: HashSet<HgPathBuf>,
456 456 }
457 457
458 458 /// Extract roots, dirs and parents from patterns.
459 459 fn roots_dirs_and_parents(
460 460 ignore_patterns: &[IgnorePattern],
461 461 ) -> PatternResult<RootsDirsAndParents> {
462 462 let (roots, dirs) = roots_and_dirs(ignore_patterns);
463 463
464 464 let mut parents = HashSet::new();
465 465
466 466 parents.extend(
467 467 DirsMultiset::from_manifest(&dirs)
468 468 .map_err(|e| match e {
469 469 DirstateMapError::InvalidPath(e) => e,
470 470 _ => unreachable!(),
471 471 })?
472 472 .iter()
473 .map(|k| k.to_owned()),
473 .map(ToOwned::to_owned),
474 474 );
475 475 parents.extend(
476 476 DirsMultiset::from_manifest(&roots)
477 477 .map_err(|e| match e {
478 478 DirstateMapError::InvalidPath(e) => e,
479 479 _ => unreachable!(),
480 480 })?
481 481 .iter()
482 .map(|k| k.to_owned()),
482 .map(ToOwned::to_owned),
483 483 );
484 484
485 485 Ok(RootsDirsAndParents {
486 486 roots: HashSet::from_iter(roots),
487 487 dirs: HashSet::from_iter(dirs),
488 488 parents,
489 489 })
490 490 }
491 491
492 492 /// Returns a function that checks whether a given file (in the general sense)
493 493 /// should be matched.
494 494 fn build_match<'a, 'b>(
495 495 ignore_patterns: &'a [IgnorePattern],
496 496 root_dir: impl AsRef<Path>,
497 497 ) -> PatternResult<(
498 498 Vec<u8>,
499 499 Box<dyn Fn(&HgPath) -> bool + 'b + Sync>,
500 500 Vec<PatternFileWarning>,
501 501 )> {
502 502 let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![];
503 503 // For debugging and printing
504 504 let mut patterns = vec![];
505 505 let mut all_warnings = vec![];
506 506
507 507 let (subincludes, ignore_patterns) =
508 508 filter_subincludes(ignore_patterns, root_dir)?;
509 509
510 510 if !subincludes.is_empty() {
511 511 // Build prefix-based matcher functions for subincludes
512 512 let mut submatchers = FastHashMap::default();
513 513 let mut prefixes = vec![];
514 514
515 515 for SubInclude { prefix, root, path } in subincludes.into_iter() {
516 516 let (match_fn, warnings) =
517 517 get_ignore_function(vec![path.to_path_buf()], root)?;
518 518 all_warnings.extend(warnings);
519 519 prefixes.push(prefix.to_owned());
520 520 submatchers.insert(prefix.to_owned(), match_fn);
521 521 }
522 522
523 523 let match_subinclude = move |filename: &HgPath| {
524 524 for prefix in prefixes.iter() {
525 525 if let Some(rel) = filename.relative_to(prefix) {
526 if (submatchers.get(prefix).unwrap())(rel) {
526 if (submatchers[prefix])(rel) {
527 527 return true;
528 528 }
529 529 }
530 530 }
531 531 false
532 532 };
533 533
534 534 match_funcs.push(Box::new(match_subinclude));
535 535 }
536 536
537 537 if !ignore_patterns.is_empty() {
538 538 // Either do dumb matching if all patterns are rootfiles, or match
539 539 // with a regex.
540 540 if ignore_patterns
541 541 .iter()
542 542 .all(|k| k.syntax == PatternSyntax::RootFiles)
543 543 {
544 544 let dirs: HashSet<_> = ignore_patterns
545 545 .iter()
546 546 .map(|k| k.pattern.to_owned())
547 547 .collect();
548 548 let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
549 549
550 550 let match_func = move |path: &HgPath| -> bool {
551 551 let path = path.as_bytes();
552 552 let i = path.iter().rfind(|a| **a == b'/');
553 553 let dir = if let Some(i) = i {
554 554 &path[..*i as usize]
555 555 } else {
556 556 b"."
557 557 };
558 558 dirs.contains(dir.deref())
559 559 };
560 560 match_funcs.push(Box::new(match_func));
561 561
562 562 patterns.extend(b"rootfilesin: ");
563 563 dirs_vec.sort();
564 564 patterns.extend(dirs_vec.escaped_bytes());
565 565 } else {
566 566 let (new_re, match_func) = build_regex_match(&ignore_patterns)?;
567 567 patterns = new_re;
568 568 match_funcs.push(match_func)
569 569 }
570 570 }
571 571
572 572 Ok(if match_funcs.len() == 1 {
573 573 (patterns, match_funcs.remove(0), all_warnings)
574 574 } else {
575 575 (
576 576 patterns,
577 577 Box::new(move |f: &HgPath| -> bool {
578 578 match_funcs.iter().any(|match_func| match_func(f))
579 579 }),
580 580 all_warnings,
581 581 )
582 582 })
583 583 }
584 584
585 585 /// Parses all "ignore" files with their recursive includes and returns a
586 586 /// function that checks whether a given file (in the general sense) should be
587 587 /// ignored.
588 588 pub fn get_ignore_function<'a>(
589 589 all_pattern_files: Vec<PathBuf>,
590 590 root_dir: impl AsRef<Path>,
591 591 ) -> PatternResult<(
592 592 Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>,
593 593 Vec<PatternFileWarning>,
594 594 )> {
595 595 let mut all_patterns = vec![];
596 596 let mut all_warnings = vec![];
597 597
598 598 for pattern_file in all_pattern_files.into_iter() {
599 599 let (patterns, warnings) =
600 600 get_patterns_from_file(pattern_file, &root_dir)?;
601 601
602 602 all_patterns.extend(patterns.to_owned());
603 603 all_warnings.extend(warnings);
604 604 }
605 605 let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?;
606 606 all_warnings.extend(warnings);
607 607 Ok((
608 608 Box::new(move |path: &HgPath| matcher.matches(path)),
609 609 all_warnings,
610 610 ))
611 611 }
612 612
613 613 impl<'a> IncludeMatcher<'a> {
614 614 pub fn new(
615 615 ignore_patterns: Vec<IgnorePattern>,
616 616 root_dir: impl AsRef<Path>,
617 617 ) -> PatternResult<(Self, Vec<PatternFileWarning>)> {
618 618 let (patterns, match_fn, warnings) =
619 619 build_match(&ignore_patterns, root_dir)?;
620 620 let RootsDirsAndParents {
621 621 roots,
622 622 dirs,
623 623 parents,
624 624 } = roots_dirs_and_parents(&ignore_patterns)?;
625 625
626 626 let prefix = ignore_patterns.iter().any(|k| match k.syntax {
627 627 PatternSyntax::Path | PatternSyntax::RelPath => true,
628 628 _ => false,
629 629 });
630 630
631 631 Ok((
632 632 Self {
633 633 patterns,
634 634 match_fn,
635 635 prefix,
636 636 roots,
637 637 dirs,
638 638 parents,
639 639 },
640 640 warnings,
641 641 ))
642 642 }
643 643
644 644 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
645 645 // TODO cache
646 646 let thing = self
647 647 .dirs
648 648 .iter()
649 649 .chain(self.roots.iter())
650 650 .chain(self.parents.iter());
651 651 DirsChildrenMultiset::new(thing, Some(&self.parents))
652 652 }
653 653 }
654 654
655 655 impl<'a> Display for IncludeMatcher<'a> {
656 656 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
657 657 // XXX What about exact matches?
658 658 // I'm not sure it's worth it to clone the HashSet and keep it
659 659 // around just in case someone wants to display the matcher, plus
660 660 // it's going to be unreadable after a few entries, but we need to
661 661 // inform in this display that exact matches are being used and are
662 662 // (on purpose) missing from the `includes`.
663 663 write!(
664 664 f,
665 665 "IncludeMatcher(includes='{}')",
666 666 String::from_utf8_lossy(&self.patterns.escaped_bytes())
667 667 )
668 668 }
669 669 }
670 670
671 671 #[cfg(test)]
672 672 mod tests {
673 673 use super::*;
674 674 use pretty_assertions::assert_eq;
675 675 use std::path::Path;
676 676
677 677 #[test]
678 678 fn test_roots_and_dirs() {
679 679 let pats = vec![
680 680 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
681 681 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
682 682 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
683 683 ];
684 684 let (roots, dirs) = roots_and_dirs(&pats);
685 685
686 686 assert_eq!(
687 687 roots,
688 688 vec!(
689 689 HgPathBuf::from_bytes(b"g/h"),
690 690 HgPathBuf::from_bytes(b"g/h"),
691 691 HgPathBuf::new()
692 692 ),
693 693 );
694 694 assert_eq!(dirs, vec!());
695 695 }
696 696
697 697 #[test]
698 698 fn test_roots_dirs_and_parents() {
699 699 let pats = vec![
700 700 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
701 701 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
702 702 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
703 703 ];
704 704
705 705 let mut roots = HashSet::new();
706 706 roots.insert(HgPathBuf::from_bytes(b"g/h"));
707 707 roots.insert(HgPathBuf::new());
708 708
709 709 let dirs = HashSet::new();
710 710
711 711 let mut parents = HashSet::new();
712 712 parents.insert(HgPathBuf::new());
713 713 parents.insert(HgPathBuf::from_bytes(b"g"));
714 714
715 715 assert_eq!(
716 716 roots_dirs_and_parents(&pats).unwrap(),
717 717 RootsDirsAndParents {
718 718 roots,
719 719 dirs,
720 720 parents
721 721 }
722 722 );
723 723 }
724 724
725 725 #[test]
726 726 fn test_filematcher_visit_children_set() {
727 727 // Visitchildrenset
728 728 let files = vec![HgPath::new(b"dir/subdir/foo.txt")];
729 729 let matcher = FileMatcher::new(&files).unwrap();
730 730
731 731 let mut set = HashSet::new();
732 732 set.insert(HgPath::new(b"dir"));
733 733 assert_eq!(
734 734 matcher.visit_children_set(HgPath::new(b"")),
735 735 VisitChildrenSet::Set(set)
736 736 );
737 737
738 738 let mut set = HashSet::new();
739 739 set.insert(HgPath::new(b"subdir"));
740 740 assert_eq!(
741 741 matcher.visit_children_set(HgPath::new(b"dir")),
742 742 VisitChildrenSet::Set(set)
743 743 );
744 744
745 745 let mut set = HashSet::new();
746 746 set.insert(HgPath::new(b"foo.txt"));
747 747 assert_eq!(
748 748 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
749 749 VisitChildrenSet::Set(set)
750 750 );
751 751
752 752 assert_eq!(
753 753 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
754 754 VisitChildrenSet::Empty
755 755 );
756 756 assert_eq!(
757 757 matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
758 758 VisitChildrenSet::Empty
759 759 );
760 760 assert_eq!(
761 761 matcher.visit_children_set(HgPath::new(b"folder")),
762 762 VisitChildrenSet::Empty
763 763 );
764 764 }
765 765
766 766 #[test]
767 767 fn test_filematcher_visit_children_set_files_and_dirs() {
768 768 let files = vec![
769 769 HgPath::new(b"rootfile.txt"),
770 770 HgPath::new(b"a/file1.txt"),
771 771 HgPath::new(b"a/b/file2.txt"),
772 772 // No file in a/b/c
773 773 HgPath::new(b"a/b/c/d/file4.txt"),
774 774 ];
775 775 let matcher = FileMatcher::new(&files).unwrap();
776 776
777 777 let mut set = HashSet::new();
778 778 set.insert(HgPath::new(b"a"));
779 779 set.insert(HgPath::new(b"rootfile.txt"));
780 780 assert_eq!(
781 781 matcher.visit_children_set(HgPath::new(b"")),
782 782 VisitChildrenSet::Set(set)
783 783 );
784 784
785 785 let mut set = HashSet::new();
786 786 set.insert(HgPath::new(b"b"));
787 787 set.insert(HgPath::new(b"file1.txt"));
788 788 assert_eq!(
789 789 matcher.visit_children_set(HgPath::new(b"a")),
790 790 VisitChildrenSet::Set(set)
791 791 );
792 792
793 793 let mut set = HashSet::new();
794 794 set.insert(HgPath::new(b"c"));
795 795 set.insert(HgPath::new(b"file2.txt"));
796 796 assert_eq!(
797 797 matcher.visit_children_set(HgPath::new(b"a/b")),
798 798 VisitChildrenSet::Set(set)
799 799 );
800 800
801 801 let mut set = HashSet::new();
802 802 set.insert(HgPath::new(b"d"));
803 803 assert_eq!(
804 804 matcher.visit_children_set(HgPath::new(b"a/b/c")),
805 805 VisitChildrenSet::Set(set)
806 806 );
807 807 let mut set = HashSet::new();
808 808 set.insert(HgPath::new(b"file4.txt"));
809 809 assert_eq!(
810 810 matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
811 811 VisitChildrenSet::Set(set)
812 812 );
813 813
814 814 assert_eq!(
815 815 matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
816 816 VisitChildrenSet::Empty
817 817 );
818 818 assert_eq!(
819 819 matcher.visit_children_set(HgPath::new(b"folder")),
820 820 VisitChildrenSet::Empty
821 821 );
822 822 }
823 823
824 824 #[test]
825 825 fn test_includematcher() {
826 826 // VisitchildrensetPrefix
827 827 let (matcher, _) = IncludeMatcher::new(
828 828 vec![IgnorePattern::new(
829 829 PatternSyntax::RelPath,
830 830 b"dir/subdir",
831 831 Path::new(""),
832 832 )],
833 833 "",
834 834 )
835 835 .unwrap();
836 836
837 837 let mut set = HashSet::new();
838 838 set.insert(HgPath::new(b"dir"));
839 839 assert_eq!(
840 840 matcher.visit_children_set(HgPath::new(b"")),
841 841 VisitChildrenSet::Set(set)
842 842 );
843 843
844 844 let mut set = HashSet::new();
845 845 set.insert(HgPath::new(b"subdir"));
846 846 assert_eq!(
847 847 matcher.visit_children_set(HgPath::new(b"dir")),
848 848 VisitChildrenSet::Set(set)
849 849 );
850 850 assert_eq!(
851 851 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
852 852 VisitChildrenSet::Recursive
853 853 );
854 854 // OPT: This should probably be 'all' if its parent is?
855 855 assert_eq!(
856 856 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
857 857 VisitChildrenSet::This
858 858 );
859 859 assert_eq!(
860 860 matcher.visit_children_set(HgPath::new(b"folder")),
861 861 VisitChildrenSet::Empty
862 862 );
863 863
864 864 // VisitchildrensetRootfilesin
865 865 let (matcher, _) = IncludeMatcher::new(
866 866 vec![IgnorePattern::new(
867 867 PatternSyntax::RootFiles,
868 868 b"dir/subdir",
869 869 Path::new(""),
870 870 )],
871 871 "",
872 872 )
873 873 .unwrap();
874 874
875 875 let mut set = HashSet::new();
876 876 set.insert(HgPath::new(b"dir"));
877 877 assert_eq!(
878 878 matcher.visit_children_set(HgPath::new(b"")),
879 879 VisitChildrenSet::Set(set)
880 880 );
881 881
882 882 let mut set = HashSet::new();
883 883 set.insert(HgPath::new(b"subdir"));
884 884 assert_eq!(
885 885 matcher.visit_children_set(HgPath::new(b"dir")),
886 886 VisitChildrenSet::Set(set)
887 887 );
888 888
889 889 assert_eq!(
890 890 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
891 891 VisitChildrenSet::This
892 892 );
893 893 assert_eq!(
894 894 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
895 895 VisitChildrenSet::Empty
896 896 );
897 897 assert_eq!(
898 898 matcher.visit_children_set(HgPath::new(b"folder")),
899 899 VisitChildrenSet::Empty
900 900 );
901 901
902 902 // VisitchildrensetGlob
903 903 let (matcher, _) = IncludeMatcher::new(
904 904 vec![IgnorePattern::new(
905 905 PatternSyntax::Glob,
906 906 b"dir/z*",
907 907 Path::new(""),
908 908 )],
909 909 "",
910 910 )
911 911 .unwrap();
912 912
913 913 let mut set = HashSet::new();
914 914 set.insert(HgPath::new(b"dir"));
915 915 assert_eq!(
916 916 matcher.visit_children_set(HgPath::new(b"")),
917 917 VisitChildrenSet::Set(set)
918 918 );
919 919 assert_eq!(
920 920 matcher.visit_children_set(HgPath::new(b"folder")),
921 921 VisitChildrenSet::Empty
922 922 );
923 923 assert_eq!(
924 924 matcher.visit_children_set(HgPath::new(b"dir")),
925 925 VisitChildrenSet::This
926 926 );
927 927 // OPT: these should probably be set().
928 928 assert_eq!(
929 929 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
930 930 VisitChildrenSet::This
931 931 );
932 932 assert_eq!(
933 933 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
934 934 VisitChildrenSet::This
935 935 );
936 936 }
937 937 }
@@ -1,56 +1,61 b''
1 1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 //! Mercurial concepts for handling revision history
7 7
8 8 pub mod node;
9 9 pub mod nodemap;
10 10 pub use node::{Node, NodeError, NodePrefix, NodePrefixRef};
11 11
12 12 /// Mercurial revision numbers
13 13 ///
14 14 /// As noted in revlog.c, revision numbers are actually encoded in
15 15 /// 4 bytes, and are liberally converted to ints, whence the i32
16 16 pub type Revision = i32;
17 17
18 18 /// Marker expressing the absence of a parent
19 19 ///
20 20 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
21 21 /// to be smaller than all existing revisions.
22 22 pub const NULL_REVISION: Revision = -1;
23 23
24 24 /// Same as `mercurial.node.wdirrev`
25 25 ///
26 26 /// This is also equal to `i32::max_value()`, but it's better to spell
27 27 /// it out explicitely, same as in `mercurial.node`
28 #[allow(clippy::unreadable_literal)]
28 29 pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff;
29 30
30 31 /// The simplest expression of what we need of Mercurial DAGs.
31 32 pub trait Graph {
32 33 /// Return the two parents of the given `Revision`.
33 34 ///
34 35 /// Each of the parents can be independently `NULL_REVISION`
35 36 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
36 37 }
37 38
38 39 #[derive(Clone, Debug, PartialEq)]
39 40 pub enum GraphError {
40 41 ParentOutOfRange(Revision),
41 42 WorkingDirectoryUnsupported,
42 43 }
43 44
44 45 /// The Mercurial Revlog Index
45 46 ///
46 47 /// This is currently limited to the minimal interface that is needed for
47 48 /// the [`nodemap`](nodemap/index.html) module
48 49 pub trait RevlogIndex {
49 50 /// Total number of Revisions referenced in this index
50 51 fn len(&self) -> usize;
51 52
53 fn is_empty(&self) -> bool {
54 self.len() == 0
55 }
56
52 57 /// Return a reference to the Node or `None` if rev is out of bounds
53 58 ///
54 59 /// `NULL_REVISION` is not considered to be out of bounds.
55 60 fn node(&self, rev: Revision) -> Option<&Node>;
56 61 }
@@ -1,429 +1,433 b''
1 1 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 //
3 3 // This software may be used and distributed according to the terms of the
4 4 // GNU General Public License version 2 or any later version.
5 5
6 6 //! Definitions and utilities for Revision nodes
7 7 //!
8 8 //! In Mercurial code base, it is customary to call "a node" the binary SHA
9 9 //! of a revision.
10 10
11 11 use hex::{self, FromHex, FromHexError};
12 12
13 13 /// The length in bytes of a `Node`
14 14 ///
15 15 /// This constant is meant to ease refactors of this module, and
16 16 /// are private so that calling code does not expect all nodes have
17 17 /// the same size, should we support several formats concurrently in
18 18 /// the future.
19 19 const NODE_BYTES_LENGTH: usize = 20;
20 20
21 21 /// The length in bytes of a `Node`
22 22 ///
23 23 /// see also `NODES_BYTES_LENGTH` about it being private.
24 24 const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
25 25
26 26 /// Private alias for readability and to ease future change
27 27 type NodeData = [u8; NODE_BYTES_LENGTH];
28 28
29 29 /// Binary revision SHA
30 30 ///
31 31 /// ## Future changes of hash size
32 32 ///
33 33 /// To accomodate future changes of hash size, Rust callers
34 34 /// should use the conversion methods at the boundaries (FFI, actual
35 35 /// computation of hashes and I/O) only, and only if required.
36 36 ///
37 37 /// All other callers outside of unit tests should just handle `Node` values
38 38 /// and never make any assumption on the actual length, using [`nybbles_len`]
39 39 /// if they need a loop boundary.
40 40 ///
41 41 /// All methods that create a `Node` either take a type that enforces
42 42 /// the size or fail immediately at runtime with [`ExactLengthRequired`].
43 43 ///
44 44 /// [`nybbles_len`]: #method.nybbles_len
45 45 /// [`ExactLengthRequired`]: struct.NodeError#variant.ExactLengthRequired
46 46 #[derive(Clone, Debug, PartialEq)]
47 47 #[repr(transparent)]
48 48 pub struct Node {
49 49 data: NodeData,
50 50 }
51 51
52 52 /// The node value for NULL_REVISION
53 53 pub const NULL_NODE: Node = Node {
54 54 data: [0; NODE_BYTES_LENGTH],
55 55 };
56 56
57 57 impl From<NodeData> for Node {
58 58 fn from(data: NodeData) -> Node {
59 59 Node { data }
60 60 }
61 61 }
62 62
63 63 #[derive(Debug, PartialEq)]
64 64 pub enum NodeError {
65 65 ExactLengthRequired(usize, String),
66 66 PrefixTooLong(String),
67 67 HexError(FromHexError, String),
68 68 }
69 69
70 70 /// Low level utility function, also for prefixes
71 71 fn get_nybble(s: &[u8], i: usize) -> u8 {
72 72 if i % 2 == 0 {
73 73 s[i / 2] >> 4
74 74 } else {
75 75 s[i / 2] & 0x0f
76 76 }
77 77 }
78 78
79 79 impl Node {
80 80 /// Retrieve the `i`th half-byte of the binary data.
81 81 ///
82 82 /// This is also the `i`th hexadecimal digit in numeric form,
83 83 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
84 84 pub fn get_nybble(&self, i: usize) -> u8 {
85 85 get_nybble(&self.data, i)
86 86 }
87 87
88 88 /// Length of the data, in nybbles
89 89 pub fn nybbles_len(&self) -> usize {
90 90 // public exposure as an instance method only, so that we can
91 91 // easily support several sizes of hashes if needed in the future.
92 92 NODE_NYBBLES_LENGTH
93 93 }
94 94
95 95 /// Convert from hexadecimal string representation
96 96 ///
97 97 /// Exact length is required.
98 98 ///
99 99 /// To be used in FFI and I/O only, in order to facilitate future
100 100 /// changes of hash format.
101 101 pub fn from_hex(hex: &str) -> Result<Node, NodeError> {
102 102 Ok(NodeData::from_hex(hex)
103 103 .map_err(|e| NodeError::from((e, hex)))?
104 104 .into())
105 105 }
106 106
107 107 /// Convert to hexadecimal string representation
108 108 ///
109 109 /// To be used in FFI and I/O only, in order to facilitate future
110 110 /// changes of hash format.
111 111 pub fn encode_hex(&self) -> String {
112 112 hex::encode(self.data)
113 113 }
114 114
115 115 /// Provide access to binary data
116 116 ///
117 117 /// This is needed by FFI layers, for instance to return expected
118 118 /// binary values to Python.
119 119 pub fn as_bytes(&self) -> &[u8] {
120 120 &self.data
121 121 }
122 122 }
123 123
124 124 impl<T: AsRef<str>> From<(FromHexError, T)> for NodeError {
125 125 fn from(err_offender: (FromHexError, T)) -> Self {
126 126 let (err, offender) = err_offender;
127 127 match err {
128 128 FromHexError::InvalidStringLength => {
129 129 NodeError::ExactLengthRequired(
130 130 NODE_NYBBLES_LENGTH,
131 131 offender.as_ref().to_owned(),
132 132 )
133 133 }
134 134 _ => NodeError::HexError(err, offender.as_ref().to_owned()),
135 135 }
136 136 }
137 137 }
138 138
139 139 /// The beginning of a binary revision SHA.
140 140 ///
141 141 /// Since it can potentially come from an hexadecimal representation with
142 142 /// odd length, it needs to carry around whether the last 4 bits are relevant
143 143 /// or not.
144 144 #[derive(Debug, PartialEq)]
145 145 pub struct NodePrefix {
146 146 buf: Vec<u8>,
147 147 is_odd: bool,
148 148 }
149 149
150 150 impl NodePrefix {
151 151 /// Convert from hexadecimal string representation
152 152 ///
153 153 /// Similarly to `hex::decode`, can be used with Unicode string types
154 154 /// (`String`, `&str`) as well as bytes.
155 155 ///
156 156 /// To be used in FFI and I/O only, in order to facilitate future
157 157 /// changes of hash format.
158 158 pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Self, NodeError> {
159 159 let hex = hex.as_ref();
160 160 let len = hex.len();
161 161 if len > NODE_NYBBLES_LENGTH {
162 162 return Err(NodeError::PrefixTooLong(
163 163 String::from_utf8_lossy(hex).to_owned().to_string(),
164 164 ));
165 165 }
166 166
167 167 let is_odd = len % 2 == 1;
168 168 let even_part = if is_odd { &hex[..len - 1] } else { hex };
169 169 let mut buf: Vec<u8> = Vec::from_hex(&even_part)
170 170 .map_err(|e| (e, String::from_utf8_lossy(hex)))?;
171 171
172 172 if is_odd {
173 173 let latest_char = char::from(hex[len - 1]);
174 174 let latest_nybble = latest_char.to_digit(16).ok_or_else(|| {
175 175 (
176 176 FromHexError::InvalidHexCharacter {
177 177 c: latest_char,
178 178 index: len - 1,
179 179 },
180 180 String::from_utf8_lossy(hex),
181 181 )
182 182 })? as u8;
183 183 buf.push(latest_nybble << 4);
184 184 }
185 185 Ok(NodePrefix { buf, is_odd })
186 186 }
187 187
188 188 pub fn borrow(&self) -> NodePrefixRef {
189 189 NodePrefixRef {
190 190 buf: &self.buf,
191 191 is_odd: self.is_odd,
192 192 }
193 193 }
194 194 }
195 195
196 196 #[derive(Clone, Debug, PartialEq)]
197 197 pub struct NodePrefixRef<'a> {
198 198 buf: &'a [u8],
199 199 is_odd: bool,
200 200 }
201 201
202 202 impl<'a> NodePrefixRef<'a> {
203 203 pub fn len(&self) -> usize {
204 204 if self.is_odd {
205 205 self.buf.len() * 2 - 1
206 206 } else {
207 207 self.buf.len() * 2
208 208 }
209 209 }
210 210
211 pub fn is_empty(&self) -> bool {
212 self.len() == 0
213 }
214
211 215 pub fn is_prefix_of(&self, node: &Node) -> bool {
212 216 if self.is_odd {
213 217 let buf = self.buf;
214 218 let last_pos = buf.len() - 1;
215 219 node.data.starts_with(buf.split_at(last_pos).0)
216 220 && node.data[last_pos] >> 4 == buf[last_pos] >> 4
217 221 } else {
218 222 node.data.starts_with(self.buf)
219 223 }
220 224 }
221 225
222 226 /// Retrieve the `i`th half-byte from the prefix.
223 227 ///
224 228 /// This is also the `i`th hexadecimal digit in numeric form,
225 229 /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
226 230 pub fn get_nybble(&self, i: usize) -> u8 {
227 231 assert!(i < self.len());
228 232 get_nybble(self.buf, i)
229 233 }
230 234
231 235 /// Return the index first nybble that's different from `node`
232 236 ///
233 237 /// If the return value is `None` that means that `self` is
234 238 /// a prefix of `node`, but the current method is a bit slower
235 239 /// than `is_prefix_of`.
236 240 ///
237 241 /// Returned index is as in `get_nybble`, i.e., starting at 0.
238 242 pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
239 243 let buf = self.buf;
240 244 let until = if self.is_odd {
241 245 buf.len() - 1
242 246 } else {
243 247 buf.len()
244 248 };
245 for i in 0..until {
246 if buf[i] != node.data[i] {
247 if buf[i] & 0xf0 == node.data[i] & 0xf0 {
248 return Some(2 * i + 1);
249 for (i, item) in buf.iter().enumerate().take(until) {
250 if *item != node.data[i] {
251 return if *item & 0xf0 == node.data[i] & 0xf0 {
252 Some(2 * i + 1)
249 253 } else {
250 return Some(2 * i);
251 }
254 Some(2 * i)
255 };
252 256 }
253 257 }
254 258 if self.is_odd && buf[until] & 0xf0 != node.data[until] & 0xf0 {
255 259 Some(until * 2)
256 260 } else {
257 261 None
258 262 }
259 263 }
260 264 }
261 265
262 266 /// A shortcut for full `Node` references
263 267 impl<'a> From<&'a Node> for NodePrefixRef<'a> {
264 268 fn from(node: &'a Node) -> Self {
265 269 NodePrefixRef {
266 270 buf: &node.data,
267 271 is_odd: false,
268 272 }
269 273 }
270 274 }
271 275
272 276 #[cfg(test)]
273 277 mod tests {
274 278 use super::*;
275 279
276 280 fn sample_node() -> Node {
277 281 let mut data = [0; NODE_BYTES_LENGTH];
278 282 data.copy_from_slice(&[
279 283 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
280 284 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
281 285 ]);
282 286 data.into()
283 287 }
284 288
285 289 /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
286 290 ///
287 291 /// The padding is made with zeros
288 292 pub fn hex_pad_right(hex: &str) -> String {
289 293 let mut res = hex.to_string();
290 294 while res.len() < NODE_NYBBLES_LENGTH {
291 295 res.push('0');
292 296 }
293 297 res
294 298 }
295 299
296 300 fn sample_node_hex() -> String {
297 301 hex_pad_right("0123456789abcdeffedcba9876543210deadbeef")
298 302 }
299 303
300 304 #[test]
301 305 fn test_node_from_hex() {
302 306 assert_eq!(Node::from_hex(&sample_node_hex()), Ok(sample_node()));
303 307
304 308 let mut short = hex_pad_right("0123");
305 309 short.pop();
306 310 short.pop();
307 311 assert_eq!(
308 312 Node::from_hex(&short),
309 313 Err(NodeError::ExactLengthRequired(NODE_NYBBLES_LENGTH, short)),
310 314 );
311 315
312 316 let not_hex = hex_pad_right("012... oops");
313 317 assert_eq!(
314 318 Node::from_hex(&not_hex),
315 319 Err(NodeError::HexError(
316 320 FromHexError::InvalidHexCharacter { c: '.', index: 3 },
317 321 not_hex,
318 322 )),
319 323 );
320 324 }
321 325
322 326 #[test]
323 327 fn test_node_encode_hex() {
324 328 assert_eq!(sample_node().encode_hex(), sample_node_hex());
325 329 }
326 330
327 331 #[test]
328 332 fn test_prefix_from_hex() -> Result<(), NodeError> {
329 333 assert_eq!(
330 334 NodePrefix::from_hex("0e1")?,
331 335 NodePrefix {
332 336 buf: vec![14, 16],
333 337 is_odd: true
334 338 }
335 339 );
336 340 assert_eq!(
337 341 NodePrefix::from_hex("0e1a")?,
338 342 NodePrefix {
339 343 buf: vec![14, 26],
340 344 is_odd: false
341 345 }
342 346 );
343 347
344 348 // checking limit case
345 349 let node_as_vec = sample_node().data.iter().cloned().collect();
346 350 assert_eq!(
347 351 NodePrefix::from_hex(sample_node_hex())?,
348 352 NodePrefix {
349 353 buf: node_as_vec,
350 354 is_odd: false
351 355 }
352 356 );
353 357
354 358 Ok(())
355 359 }
356 360
357 361 #[test]
358 362 fn test_prefix_from_hex_errors() {
359 363 assert_eq!(
360 364 NodePrefix::from_hex("testgr"),
361 365 Err(NodeError::HexError(
362 366 FromHexError::InvalidHexCharacter { c: 't', index: 0 },
363 367 "testgr".to_string()
364 368 ))
365 369 );
366 370 let mut long = NULL_NODE.encode_hex();
367 371 long.push('c');
368 372 match NodePrefix::from_hex(&long)
369 373 .expect_err("should be refused as too long")
370 374 {
371 375 NodeError::PrefixTooLong(s) => assert_eq!(s, long),
372 376 err => panic!(format!("Should have been TooLong, got {:?}", err)),
373 377 }
374 378 }
375 379
376 380 #[test]
377 381 fn test_is_prefix_of() -> Result<(), NodeError> {
378 382 let mut node_data = [0; NODE_BYTES_LENGTH];
379 383 node_data[0] = 0x12;
380 384 node_data[1] = 0xca;
381 385 let node = Node::from(node_data);
382 386 assert!(NodePrefix::from_hex("12")?.borrow().is_prefix_of(&node));
383 387 assert!(!NodePrefix::from_hex("1a")?.borrow().is_prefix_of(&node));
384 388 assert!(NodePrefix::from_hex("12c")?.borrow().is_prefix_of(&node));
385 389 assert!(!NodePrefix::from_hex("12d")?.borrow().is_prefix_of(&node));
386 390 Ok(())
387 391 }
388 392
389 393 #[test]
390 394 fn test_get_nybble() -> Result<(), NodeError> {
391 395 let prefix = NodePrefix::from_hex("dead6789cafe")?;
392 396 assert_eq!(prefix.borrow().get_nybble(0), 13);
393 397 assert_eq!(prefix.borrow().get_nybble(7), 9);
394 398 Ok(())
395 399 }
396 400
397 401 #[test]
398 402 fn test_first_different_nybble_even_prefix() {
399 403 let prefix = NodePrefix::from_hex("12ca").unwrap();
400 404 let prefref = prefix.borrow();
401 405 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
402 406 assert_eq!(prefref.first_different_nybble(&node), Some(0));
403 407 node.data[0] = 0x13;
404 408 assert_eq!(prefref.first_different_nybble(&node), Some(1));
405 409 node.data[0] = 0x12;
406 410 assert_eq!(prefref.first_different_nybble(&node), Some(2));
407 411 node.data[1] = 0xca;
408 412 // now it is a prefix
409 413 assert_eq!(prefref.first_different_nybble(&node), None);
410 414 }
411 415
412 416 #[test]
413 417 fn test_first_different_nybble_odd_prefix() {
414 418 let prefix = NodePrefix::from_hex("12c").unwrap();
415 419 let prefref = prefix.borrow();
416 420 let mut node = Node::from([0; NODE_BYTES_LENGTH]);
417 421 assert_eq!(prefref.first_different_nybble(&node), Some(0));
418 422 node.data[0] = 0x13;
419 423 assert_eq!(prefref.first_different_nybble(&node), Some(1));
420 424 node.data[0] = 0x12;
421 425 assert_eq!(prefref.first_different_nybble(&node), Some(2));
422 426 node.data[1] = 0xca;
423 427 // now it is a prefix
424 428 assert_eq!(prefref.first_different_nybble(&node), None);
425 429 }
426 430 }
427 431
428 432 #[cfg(test)]
429 433 pub use tests::hex_pad_right;
@@ -1,1122 +1,1118 b''
1 1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 2 // and Mercurial contributors
3 3 //
4 4 // This software may be used and distributed according to the terms of the
5 5 // GNU General Public License version 2 or any later version.
6 6 //! Indexing facilities for fast retrieval of `Revision` from `Node`
7 7 //!
8 8 //! This provides a variation on the 16-ary radix tree that is
9 9 //! provided as "nodetree" in revlog.c, ready for append-only persistence
10 10 //! on disk.
11 11 //!
12 12 //! Following existing implicit conventions, the "nodemap" terminology
13 13 //! is used in a more abstract context.
14 14
15 15 use super::{
16 16 node::NULL_NODE, Node, NodeError, NodePrefix, NodePrefixRef, Revision,
17 17 RevlogIndex, NULL_REVISION,
18 18 };
19 19
20 20 use std::cmp::max;
21 21 use std::fmt;
22 22 use std::mem;
23 23 use std::ops::Deref;
24 24 use std::ops::Index;
25 25 use std::slice;
26 26
27 27 #[derive(Debug, PartialEq)]
28 28 pub enum NodeMapError {
29 29 MultipleResults,
30 30 InvalidNodePrefix(NodeError),
31 31 /// A `Revision` stored in the nodemap could not be found in the index
32 32 RevisionNotInIndex(Revision),
33 33 }
34 34
35 35 impl From<NodeError> for NodeMapError {
36 36 fn from(err: NodeError) -> Self {
37 37 NodeMapError::InvalidNodePrefix(err)
38 38 }
39 39 }
40 40
41 41 /// Mapping system from Mercurial nodes to revision numbers.
42 42 ///
43 43 /// ## `RevlogIndex` and `NodeMap`
44 44 ///
45 45 /// One way to think about their relationship is that
46 46 /// the `NodeMap` is a prefix-oriented reverse index of the `Node` information
47 47 /// carried by a [`RevlogIndex`].
48 48 ///
49 49 /// Many of the methods in this trait take a `RevlogIndex` argument
50 50 /// which is used for validation of their results. This index must naturally
51 51 /// be the one the `NodeMap` is about, and it must be consistent.
52 52 ///
53 53 /// Notably, the `NodeMap` must not store
54 54 /// information about more `Revision` values than there are in the index.
55 55 /// In these methods, an encountered `Revision` is not in the index, a
56 56 /// [`RevisionNotInIndex`] error is returned.
57 57 ///
58 58 /// In insert operations, the rule is thus that the `NodeMap` must always
59 59 /// be updated after the `RevlogIndex`
60 60 /// be updated first, and the `NodeMap` second.
61 61 ///
62 62 /// [`RevisionNotInIndex`]: enum.NodeMapError.html#variant.RevisionNotInIndex
63 63 /// [`RevlogIndex`]: ../trait.RevlogIndex.html
64 64 pub trait NodeMap {
65 65 /// Find the unique `Revision` having the given `Node`
66 66 ///
67 67 /// If no Revision matches the given `Node`, `Ok(None)` is returned.
68 68 fn find_node(
69 69 &self,
70 70 index: &impl RevlogIndex,
71 71 node: &Node,
72 72 ) -> Result<Option<Revision>, NodeMapError> {
73 73 self.find_bin(index, node.into())
74 74 }
75 75
76 76 /// Find the unique Revision whose `Node` starts with a given binary prefix
77 77 ///
78 78 /// If no Revision matches the given prefix, `Ok(None)` is returned.
79 79 ///
80 80 /// If several Revisions match the given prefix, a [`MultipleResults`]
81 81 /// error is returned.
82 82 fn find_bin<'a>(
83 83 &self,
84 84 idx: &impl RevlogIndex,
85 85 prefix: NodePrefixRef<'a>,
86 86 ) -> Result<Option<Revision>, NodeMapError>;
87 87
88 88 /// Find the unique Revision whose `Node` hexadecimal string representation
89 89 /// starts with a given prefix
90 90 ///
91 91 /// If no Revision matches the given prefix, `Ok(None)` is returned.
92 92 ///
93 93 /// If several Revisions match the given prefix, a [`MultipleResults`]
94 94 /// error is returned.
95 95 fn find_hex(
96 96 &self,
97 97 idx: &impl RevlogIndex,
98 98 prefix: &str,
99 99 ) -> Result<Option<Revision>, NodeMapError> {
100 100 self.find_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
101 101 }
102 102
103 103 /// Give the size of the shortest node prefix that determines
104 104 /// the revision uniquely.
105 105 ///
106 106 /// From a binary node prefix, if it is matched in the node map, this
107 107 /// returns the number of hexadecimal digits that would had sufficed
108 108 /// to find the revision uniquely.
109 109 ///
110 110 /// Returns `None` if no `Revision` could be found for the prefix.
111 111 ///
112 112 /// If several Revisions match the given prefix, a [`MultipleResults`]
113 113 /// error is returned.
114 114 fn unique_prefix_len_bin<'a>(
115 115 &self,
116 116 idx: &impl RevlogIndex,
117 117 node_prefix: NodePrefixRef<'a>,
118 118 ) -> Result<Option<usize>, NodeMapError>;
119 119
120 120 /// Same as `unique_prefix_len_bin`, with the hexadecimal representation
121 121 /// of the prefix as input.
122 122 fn unique_prefix_len_hex(
123 123 &self,
124 124 idx: &impl RevlogIndex,
125 125 prefix: &str,
126 126 ) -> Result<Option<usize>, NodeMapError> {
127 127 self.unique_prefix_len_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
128 128 }
129 129
130 130 /// Same as `unique_prefix_len_bin`, with a full `Node` as input
131 131 fn unique_prefix_len_node(
132 132 &self,
133 133 idx: &impl RevlogIndex,
134 134 node: &Node,
135 135 ) -> Result<Option<usize>, NodeMapError> {
136 136 self.unique_prefix_len_bin(idx, node.into())
137 137 }
138 138 }
139 139
140 140 pub trait MutableNodeMap: NodeMap {
141 141 fn insert<I: RevlogIndex>(
142 142 &mut self,
143 143 index: &I,
144 144 node: &Node,
145 145 rev: Revision,
146 146 ) -> Result<(), NodeMapError>;
147 147 }
148 148
149 149 /// Low level NodeTree [`Blocks`] elements
150 150 ///
151 151 /// These are exactly as for instance on persistent storage.
152 152 type RawElement = i32;
153 153
154 154 /// High level representation of values in NodeTree
155 155 /// [`Blocks`](struct.Block.html)
156 156 ///
157 157 /// This is the high level representation that most algorithms should
158 158 /// use.
159 159 #[derive(Clone, Debug, Eq, PartialEq)]
160 160 enum Element {
161 161 Rev(Revision),
162 162 Block(usize),
163 163 None,
164 164 }
165 165
166 166 impl From<RawElement> for Element {
167 167 /// Conversion from low level representation, after endianness conversion.
168 168 ///
169 169 /// See [`Block`](struct.Block.html) for explanation about the encoding.
170 170 fn from(raw: RawElement) -> Element {
171 171 if raw >= 0 {
172 172 Element::Block(raw as usize)
173 173 } else if raw == -1 {
174 174 Element::None
175 175 } else {
176 176 Element::Rev(-raw - 2)
177 177 }
178 178 }
179 179 }
180 180
181 181 impl From<Element> for RawElement {
182 182 fn from(element: Element) -> RawElement {
183 183 match element {
184 184 Element::None => 0,
185 185 Element::Block(i) => i as RawElement,
186 186 Element::Rev(rev) => -rev - 2,
187 187 }
188 188 }
189 189 }
190 190
191 191 /// A logical block of the `NodeTree`, packed with a fixed size.
192 192 ///
193 193 /// These are always used in container types implementing `Index<Block>`,
194 194 /// such as `&Block`
195 195 ///
196 196 /// As an array of integers, its ith element encodes that the
197 197 /// ith potential edge from the block, representing the ith hexadecimal digit
198 198 /// (nybble) `i` is either:
199 199 ///
200 200 /// - absent (value -1)
201 201 /// - another `Block` in the same indexable container (value ≥ 0)
202 202 /// - a `Revision` leaf (value ≤ -2)
203 203 ///
204 204 /// Endianness has to be fixed for consistency on shared storage across
205 205 /// different architectures.
206 206 ///
207 207 /// A key difference with the C `nodetree` is that we need to be
208 208 /// able to represent the [`Block`] at index 0, hence -1 is the empty marker
209 209 /// rather than 0 and the `Revision` range upper limit of -2 instead of -1.
210 210 ///
211 211 /// Another related difference is that `NULL_REVISION` (-1) is not
212 212 /// represented at all, because we want an immutable empty nodetree
213 213 /// to be valid.
214 214
215 215 #[derive(Copy, Clone)]
216 216 pub struct Block([u8; BLOCK_SIZE]);
217 217
218 218 /// Not derivable for arrays of length >32 until const generics are stable
219 219 impl PartialEq for Block {
220 220 fn eq(&self, other: &Self) -> bool {
221 &self.0[..] == &other.0[..]
221 self.0[..] == other.0[..]
222 222 }
223 223 }
224 224
225 225 pub const BLOCK_SIZE: usize = 64;
226 226
227 227 impl Block {
228 228 fn new() -> Self {
229 229 // -1 in 2's complement to create an absent node
230 230 let byte: u8 = 255;
231 231 Block([byte; BLOCK_SIZE])
232 232 }
233 233
234 234 fn get(&self, nybble: u8) -> Element {
235 235 let index = nybble as usize * mem::size_of::<RawElement>();
236 236 Element::from(RawElement::from_be_bytes([
237 237 self.0[index],
238 238 self.0[index + 1],
239 239 self.0[index + 2],
240 240 self.0[index + 3],
241 241 ]))
242 242 }
243 243
244 244 fn set(&mut self, nybble: u8, element: Element) {
245 245 let values = RawElement::to_be_bytes(element.into());
246 246 let index = nybble as usize * mem::size_of::<RawElement>();
247 247 self.0[index] = values[0];
248 248 self.0[index + 1] = values[1];
249 249 self.0[index + 2] = values[2];
250 250 self.0[index + 3] = values[3];
251 251 }
252 252 }
253 253
254 254 impl fmt::Debug for Block {
255 255 /// sparse representation for testing and debugging purposes
256 256 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
257 257 f.debug_map()
258 258 .entries((0..16).filter_map(|i| match self.get(i) {
259 259 Element::None => None,
260 260 element => Some((i, element)),
261 261 }))
262 262 .finish()
263 263 }
264 264 }
265 265
266 266 /// A mutable 16-radix tree with the root block logically at the end
267 267 ///
268 268 /// Because of the append only nature of our node trees, we need to
269 269 /// keep the original untouched and store new blocks separately.
270 270 ///
271 271 /// The mutable root `Block` is kept apart so that we don't have to rebump
272 272 /// it on each insertion.
273 273 pub struct NodeTree {
274 274 readonly: Box<dyn Deref<Target = [Block]> + Send>,
275 275 growable: Vec<Block>,
276 276 root: Block,
277 277 masked_inner_blocks: usize,
278 278 }
279 279
280 280 impl Index<usize> for NodeTree {
281 281 type Output = Block;
282 282
283 283 fn index(&self, i: usize) -> &Block {
284 284 let ro_len = self.readonly.len();
285 285 if i < ro_len {
286 286 &self.readonly[i]
287 287 } else if i == ro_len + self.growable.len() {
288 288 &self.root
289 289 } else {
290 290 &self.growable[i - ro_len]
291 291 }
292 292 }
293 293 }
294 294
295 295 /// Return `None` unless the `Node` for `rev` has given prefix in `index`.
296 296 fn has_prefix_or_none(
297 297 idx: &impl RevlogIndex,
298 298 prefix: NodePrefixRef,
299 299 rev: Revision,
300 300 ) -> Result<Option<Revision>, NodeMapError> {
301 301 idx.node(rev)
302 302 .ok_or_else(|| NodeMapError::RevisionNotInIndex(rev))
303 303 .map(|node| {
304 304 if prefix.is_prefix_of(node) {
305 305 Some(rev)
306 306 } else {
307 307 None
308 308 }
309 309 })
310 310 }
311 311
312 312 /// validate that the candidate's node starts indeed with given prefix,
313 313 /// and treat ambiguities related to `NULL_REVISION`.
314 314 ///
315 315 /// From the data in the NodeTree, one can only conclude that some
316 316 /// revision is the only one for a *subprefix* of the one being looked up.
317 317 fn validate_candidate(
318 318 idx: &impl RevlogIndex,
319 319 prefix: NodePrefixRef,
320 320 candidate: (Option<Revision>, usize),
321 321 ) -> Result<(Option<Revision>, usize), NodeMapError> {
322 322 let (rev, steps) = candidate;
323 323 if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
324 324 rev.map_or(Ok((None, steps)), |r| {
325 325 has_prefix_or_none(idx, prefix, r)
326 326 .map(|opt| (opt, max(steps, nz_nybble + 1)))
327 327 })
328 328 } else {
329 329 // the prefix is only made of zeros; NULL_REVISION always matches it
330 330 // and any other *valid* result is an ambiguity
331 331 match rev {
332 332 None => Ok((Some(NULL_REVISION), steps + 1)),
333 333 Some(r) => match has_prefix_or_none(idx, prefix, r)? {
334 334 None => Ok((Some(NULL_REVISION), steps + 1)),
335 335 _ => Err(NodeMapError::MultipleResults),
336 336 },
337 337 }
338 338 }
339 339 }
340 340
341 341 impl NodeTree {
342 342 /// Initiate a NodeTree from an immutable slice-like of `Block`
343 343 ///
344 344 /// We keep `readonly` and clone its root block if it isn't empty.
345 345 fn new(readonly: Box<dyn Deref<Target = [Block]> + Send>) -> Self {
346 let root = readonly
347 .last()
348 .map(|b| b.clone())
349 .unwrap_or_else(|| Block::new());
346 let root = readonly.last().cloned().unwrap_or_else(Block::new);
350 347 NodeTree {
351 readonly: readonly,
348 readonly,
352 349 growable: Vec::new(),
353 root: root,
350 root,
354 351 masked_inner_blocks: 0,
355 352 }
356 353 }
357 354
358 355 /// Create from an opaque bunch of bytes
359 356 ///
360 357 /// The created `NodeTreeBytes` from `buffer`,
361 358 /// of which exactly `amount` bytes are used.
362 359 ///
363 360 /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
364 361 /// - `offset` allows for the final file format to include fixed data
365 362 /// (generation number, behavioural flags)
366 363 /// - `amount` is expressed in bytes, and is not automatically derived from
367 364 /// `bytes`, so that a caller that manages them atomically can perform
368 365 /// temporary disk serializations and still rollback easily if needed.
369 366 /// First use-case for this would be to support Mercurial shell hooks.
370 367 ///
371 368 /// panics if `buffer` is smaller than `amount`
372 369 pub fn load_bytes(
373 370 bytes: Box<dyn Deref<Target = [u8]> + Send>,
374 371 amount: usize,
375 372 ) -> Self {
376 373 NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount)))
377 374 }
378 375
379 376 /// Retrieve added `Block` and the original immutable data
380 377 pub fn into_readonly_and_added(
381 378 self,
382 379 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
383 380 let mut vec = self.growable;
384 381 let readonly = self.readonly;
385 382 if readonly.last() != Some(&self.root) {
386 383 vec.push(self.root);
387 384 }
388 385 (readonly, vec)
389 386 }
390 387
391 388 /// Retrieve added `Blocks` as bytes, ready to be written to persistent
392 389 /// storage
393 390 pub fn into_readonly_and_added_bytes(
394 391 self,
395 392 ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
396 393 let (readonly, vec) = self.into_readonly_and_added();
397 394 // Prevent running `v`'s destructor so we are in complete control
398 395 // of the allocation.
399 396 let vec = mem::ManuallyDrop::new(vec);
400 397
401 398 // Transmute the `Vec<Block>` to a `Vec<u8>`. Blocks are contiguous
402 399 // bytes, so this is perfectly safe.
403 400 let bytes = unsafe {
404 401 // Assert that `Block` hasn't been changed and has no padding
405 402 let _: [u8; 4 * BLOCK_SIZE] =
406 403 std::mem::transmute([Block::new(); 4]);
407 404
408 405 // /!\ Any use of `vec` after this is use-after-free.
409 406 // TODO: use `into_raw_parts` once stabilized
410 407 Vec::from_raw_parts(
411 408 vec.as_ptr() as *mut u8,
412 409 vec.len() * BLOCK_SIZE,
413 410 vec.capacity() * BLOCK_SIZE,
414 411 )
415 412 };
416 413 (readonly, bytes)
417 414 }
418 415
419 416 /// Total number of blocks
420 417 fn len(&self) -> usize {
421 418 self.readonly.len() + self.growable.len() + 1
422 419 }
423 420
424 421 /// Implemented for completeness
425 422 ///
426 423 /// A `NodeTree` always has at least the mutable root block.
427 424 #[allow(dead_code)]
428 425 fn is_empty(&self) -> bool {
429 426 false
430 427 }
431 428
432 429 /// Main working method for `NodeTree` searches
433 430 ///
434 431 /// The first returned value is the result of analysing `NodeTree` data
435 432 /// *alone*: whereas `None` guarantees that the given prefix is absent
436 433 /// from the `NodeTree` data (but still could match `NULL_NODE`), with
437 434 /// `Some(rev)`, it is to be understood that `rev` is the unique `Revision`
438 435 /// that could match the prefix. Actually, all that can be inferred from
439 436 /// the `NodeTree` data is that `rev` is the revision with the longest
440 437 /// common node prefix with the given prefix.
441 438 ///
442 439 /// The second returned value is the size of the smallest subprefix
443 440 /// of `prefix` that would give the same result, i.e. not the
444 441 /// `MultipleResults` error variant (again, using only the data of the
445 442 /// `NodeTree`).
446 443 fn lookup(
447 444 &self,
448 445 prefix: NodePrefixRef,
449 446 ) -> Result<(Option<Revision>, usize), NodeMapError> {
450 447 for (i, visit_item) in self.visit(prefix).enumerate() {
451 448 if let Some(opt) = visit_item.final_revision() {
452 449 return Ok((opt, i + 1));
453 450 }
454 451 }
455 452 Err(NodeMapError::MultipleResults)
456 453 }
457 454
458 455 fn visit<'n, 'p>(
459 456 &'n self,
460 457 prefix: NodePrefixRef<'p>,
461 458 ) -> NodeTreeVisitor<'n, 'p> {
462 459 NodeTreeVisitor {
463 460 nt: self,
464 prefix: prefix,
461 prefix,
465 462 visit: self.len() - 1,
466 463 nybble_idx: 0,
467 464 done: false,
468 465 }
469 466 }
470 467 /// Return a mutable reference for `Block` at index `idx`.
471 468 ///
472 469 /// If `idx` lies in the immutable area, then the reference is to
473 470 /// a newly appended copy.
474 471 ///
475 472 /// Returns (new_idx, glen, mut_ref) where
476 473 ///
477 474 /// - `new_idx` is the index of the mutable `Block`
478 475 /// - `mut_ref` is a mutable reference to the mutable Block.
479 476 /// - `glen` is the new length of `self.growable`
480 477 ///
481 478 /// Note: the caller wouldn't be allowed to query `self.growable.len()`
482 479 /// itself because of the mutable borrow taken with the returned `Block`
483 480 fn mutable_block(&mut self, idx: usize) -> (usize, &mut Block, usize) {
484 481 let ro_blocks = &self.readonly;
485 482 let ro_len = ro_blocks.len();
486 483 let glen = self.growable.len();
487 484 if idx < ro_len {
488 485 self.masked_inner_blocks += 1;
489 // TODO OPTIM I think this makes two copies
490 self.growable.push(ro_blocks[idx].clone());
486 self.growable.push(ro_blocks[idx]);
491 487 (glen + ro_len, &mut self.growable[glen], glen + 1)
492 488 } else if glen + ro_len == idx {
493 489 (idx, &mut self.root, glen)
494 490 } else {
495 491 (idx, &mut self.growable[idx - ro_len], glen)
496 492 }
497 493 }
498 494
499 495 /// Main insertion method
500 496 ///
501 497 /// This will dive in the node tree to find the deepest `Block` for
502 498 /// `node`, split it as much as needed and record `node` in there.
503 499 /// The method then backtracks, updating references in all the visited
504 500 /// blocks from the root.
505 501 ///
506 502 /// All the mutated `Block` are copied first to the growable part if
507 503 /// needed. That happens for those in the immutable part except the root.
508 504 pub fn insert<I: RevlogIndex>(
509 505 &mut self,
510 506 index: &I,
511 507 node: &Node,
512 508 rev: Revision,
513 509 ) -> Result<(), NodeMapError> {
514 510 let ro_len = &self.readonly.len();
515 511
516 512 let mut visit_steps: Vec<_> = self.visit(node.into()).collect();
517 513 let read_nybbles = visit_steps.len();
518 514 // visit_steps cannot be empty, since we always visit the root block
519 515 let deepest = visit_steps.pop().unwrap();
520 516
521 517 let (mut block_idx, mut block, mut glen) =
522 518 self.mutable_block(deepest.block_idx);
523 519
524 520 if let Element::Rev(old_rev) = deepest.element {
525 521 let old_node = index
526 522 .node(old_rev)
527 523 .ok_or_else(|| NodeMapError::RevisionNotInIndex(old_rev))?;
528 524 if old_node == node {
529 525 return Ok(()); // avoid creating lots of useless blocks
530 526 }
531 527
532 528 // Looping over the tail of nybbles in both nodes, creating
533 529 // new blocks until we find the difference
534 530 let mut new_block_idx = ro_len + glen;
535 531 let mut nybble = deepest.nybble;
536 532 for nybble_pos in read_nybbles..node.nybbles_len() {
537 533 block.set(nybble, Element::Block(new_block_idx));
538 534
539 535 let new_nybble = node.get_nybble(nybble_pos);
540 536 let old_nybble = old_node.get_nybble(nybble_pos);
541 537
542 538 if old_nybble == new_nybble {
543 539 self.growable.push(Block::new());
544 540 block = &mut self.growable[glen];
545 541 glen += 1;
546 542 new_block_idx += 1;
547 543 nybble = new_nybble;
548 544 } else {
549 545 let mut new_block = Block::new();
550 546 new_block.set(old_nybble, Element::Rev(old_rev));
551 547 new_block.set(new_nybble, Element::Rev(rev));
552 548 self.growable.push(new_block);
553 549 break;
554 550 }
555 551 }
556 552 } else {
557 553 // Free slot in the deepest block: no splitting has to be done
558 554 block.set(deepest.nybble, Element::Rev(rev));
559 555 }
560 556
561 557 // Backtrack over visit steps to update references
562 558 while let Some(visited) = visit_steps.pop() {
563 559 let to_write = Element::Block(block_idx);
564 560 if visit_steps.is_empty() {
565 561 self.root.set(visited.nybble, to_write);
566 562 break;
567 563 }
568 564 let (new_idx, block, _) = self.mutable_block(visited.block_idx);
569 565 if block.get(visited.nybble) == to_write {
570 566 break;
571 567 }
572 568 block.set(visited.nybble, to_write);
573 569 block_idx = new_idx;
574 570 }
575 571 Ok(())
576 572 }
577 573
578 574 /// Make the whole `NodeTree` logically empty, without touching the
579 575 /// immutable part.
580 576 pub fn invalidate_all(&mut self) {
581 577 self.root = Block::new();
582 578 self.growable = Vec::new();
583 579 self.masked_inner_blocks = self.readonly.len();
584 580 }
585 581
586 582 /// Return the number of blocks in the readonly part that are currently
587 583 /// masked in the mutable part.
588 584 ///
589 585 /// The `NodeTree` structure has no efficient way to know how many blocks
590 586 /// are already unreachable in the readonly part.
591 587 ///
592 588 /// After a call to `invalidate_all()`, the returned number can be actually
593 589 /// bigger than the whole readonly part, a conventional way to mean that
594 590 /// all the readonly blocks have been masked. This is what is really
595 591 /// useful to the caller and does not require to know how many were
596 592 /// actually unreachable to begin with.
597 593 pub fn masked_readonly_blocks(&self) -> usize {
598 594 if let Some(readonly_root) = self.readonly.last() {
599 595 if readonly_root == &self.root {
600 596 return 0;
601 597 }
602 598 } else {
603 599 return 0;
604 600 }
605 601 self.masked_inner_blocks + 1
606 602 }
607 603 }
608 604
609 605 pub struct NodeTreeBytes {
610 606 buffer: Box<dyn Deref<Target = [u8]> + Send>,
611 607 len_in_blocks: usize,
612 608 }
613 609
614 610 impl NodeTreeBytes {
615 611 fn new(
616 612 buffer: Box<dyn Deref<Target = [u8]> + Send>,
617 613 amount: usize,
618 614 ) -> Self {
619 615 assert!(buffer.len() >= amount);
620 616 let len_in_blocks = amount / BLOCK_SIZE;
621 617 NodeTreeBytes {
622 618 buffer,
623 619 len_in_blocks,
624 620 }
625 621 }
626 622 }
627 623
628 624 impl Deref for NodeTreeBytes {
629 625 type Target = [Block];
630 626
631 627 fn deref(&self) -> &[Block] {
632 628 unsafe {
633 629 slice::from_raw_parts(
634 630 (&self.buffer).as_ptr() as *const Block,
635 631 self.len_in_blocks,
636 632 )
637 633 }
638 634 }
639 635 }
640 636
641 637 struct NodeTreeVisitor<'n, 'p> {
642 638 nt: &'n NodeTree,
643 639 prefix: NodePrefixRef<'p>,
644 640 visit: usize,
645 641 nybble_idx: usize,
646 642 done: bool,
647 643 }
648 644
649 645 #[derive(Debug, PartialEq, Clone)]
650 646 struct NodeTreeVisitItem {
651 647 block_idx: usize,
652 648 nybble: u8,
653 649 element: Element,
654 650 }
655 651
656 652 impl<'n, 'p> Iterator for NodeTreeVisitor<'n, 'p> {
657 653 type Item = NodeTreeVisitItem;
658 654
659 655 fn next(&mut self) -> Option<Self::Item> {
660 656 if self.done || self.nybble_idx >= self.prefix.len() {
661 657 return None;
662 658 }
663 659
664 660 let nybble = self.prefix.get_nybble(self.nybble_idx);
665 661 self.nybble_idx += 1;
666 662
667 663 let visit = self.visit;
668 664 let element = self.nt[visit].get(nybble);
669 665 if let Element::Block(idx) = element {
670 666 self.visit = idx;
671 667 } else {
672 668 self.done = true;
673 669 }
674 670
675 671 Some(NodeTreeVisitItem {
676 672 block_idx: visit,
677 nybble: nybble,
678 element: element,
673 nybble,
674 element,
679 675 })
680 676 }
681 677 }
682 678
683 679 impl NodeTreeVisitItem {
684 680 // Return `Some(opt)` if this item is final, with `opt` being the
685 681 // `Revision` that it may represent.
686 682 //
687 683 // If the item is not terminal, return `None`
688 684 fn final_revision(&self) -> Option<Option<Revision>> {
689 685 match self.element {
690 686 Element::Block(_) => None,
691 687 Element::Rev(r) => Some(Some(r)),
692 688 Element::None => Some(None),
693 689 }
694 690 }
695 691 }
696 692
697 693 impl From<Vec<Block>> for NodeTree {
698 694 fn from(vec: Vec<Block>) -> Self {
699 695 Self::new(Box::new(vec))
700 696 }
701 697 }
702 698
703 699 impl fmt::Debug for NodeTree {
704 700 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
705 701 let readonly: &[Block] = &*self.readonly;
706 702 write!(
707 703 f,
708 704 "readonly: {:?}, growable: {:?}, root: {:?}",
709 705 readonly, self.growable, self.root
710 706 )
711 707 }
712 708 }
713 709
714 710 impl Default for NodeTree {
715 711 /// Create a fully mutable empty NodeTree
716 712 fn default() -> Self {
717 713 NodeTree::new(Box::new(Vec::new()))
718 714 }
719 715 }
720 716
721 717 impl NodeMap for NodeTree {
722 718 fn find_bin<'a>(
723 719 &self,
724 720 idx: &impl RevlogIndex,
725 721 prefix: NodePrefixRef<'a>,
726 722 ) -> Result<Option<Revision>, NodeMapError> {
727 723 validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
728 724 .map(|(opt, _shortest)| opt)
729 725 }
730 726
731 727 fn unique_prefix_len_bin<'a>(
732 728 &self,
733 729 idx: &impl RevlogIndex,
734 730 prefix: NodePrefixRef<'a>,
735 731 ) -> Result<Option<usize>, NodeMapError> {
736 732 validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
737 733 .map(|(opt, shortest)| opt.map(|_rev| shortest))
738 734 }
739 735 }
740 736
741 737 #[cfg(test)]
742 738 mod tests {
743 739 use super::NodeMapError::*;
744 740 use super::*;
745 741 use crate::revlog::node::{hex_pad_right, Node};
746 742 use std::collections::HashMap;
747 743
748 744 /// Creates a `Block` using a syntax close to the `Debug` output
749 745 macro_rules! block {
750 746 {$($nybble:tt : $variant:ident($val:tt)),*} => (
751 747 {
752 748 let mut block = Block::new();
753 749 $(block.set($nybble, Element::$variant($val)));*;
754 750 block
755 751 }
756 752 )
757 753 }
758 754
759 755 #[test]
760 756 fn test_block_debug() {
761 757 let mut block = Block::new();
762 758 block.set(1, Element::Rev(3));
763 759 block.set(10, Element::Block(0));
764 760 assert_eq!(format!("{:?}", block), "{1: Rev(3), 10: Block(0)}");
765 761 }
766 762
767 763 #[test]
768 764 fn test_block_macro() {
769 765 let block = block! {5: Block(2)};
770 766 assert_eq!(format!("{:?}", block), "{5: Block(2)}");
771 767
772 768 let block = block! {13: Rev(15), 5: Block(2)};
773 769 assert_eq!(format!("{:?}", block), "{5: Block(2), 13: Rev(15)}");
774 770 }
775 771
776 772 #[test]
777 773 fn test_raw_block() {
778 774 let mut raw = [255u8; 64];
779 775
780 776 let mut counter = 0;
781 777 for val in [0, 15, -2, -1, -3].iter() {
782 778 for byte in RawElement::to_be_bytes(*val).iter() {
783 779 raw[counter] = *byte;
784 780 counter += 1;
785 781 }
786 782 }
787 783 let block = Block(raw);
788 784 assert_eq!(block.get(0), Element::Block(0));
789 785 assert_eq!(block.get(1), Element::Block(15));
790 786 assert_eq!(block.get(3), Element::None);
791 787 assert_eq!(block.get(2), Element::Rev(0));
792 788 assert_eq!(block.get(4), Element::Rev(1));
793 789 }
794 790
795 791 type TestIndex = HashMap<Revision, Node>;
796 792
797 793 impl RevlogIndex for TestIndex {
798 794 fn node(&self, rev: Revision) -> Option<&Node> {
799 795 self.get(&rev)
800 796 }
801 797
802 798 fn len(&self) -> usize {
803 799 self.len()
804 800 }
805 801 }
806 802
807 803 /// Pad hexadecimal Node prefix with zeros on the right
808 804 ///
809 805 /// This avoids having to repeatedly write very long hexadecimal
810 806 /// strings for test data, and brings actual hash size independency.
811 807 #[cfg(test)]
812 808 fn pad_node(hex: &str) -> Node {
813 809 Node::from_hex(&hex_pad_right(hex)).unwrap()
814 810 }
815 811
816 812 /// Pad hexadecimal Node prefix with zeros on the right, then insert
817 813 fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) {
818 814 idx.insert(rev, pad_node(hex));
819 815 }
820 816
821 817 fn sample_nodetree() -> NodeTree {
822 818 NodeTree::from(vec![
823 819 block![0: Rev(9)],
824 820 block![0: Rev(0), 1: Rev(9)],
825 821 block![0: Block(1), 1:Rev(1)],
826 822 ])
827 823 }
828 824
829 825 #[test]
830 826 fn test_nt_debug() {
831 827 let nt = sample_nodetree();
832 828 assert_eq!(
833 829 format!("{:?}", nt),
834 830 "readonly: \
835 831 [{0: Rev(9)}, {0: Rev(0), 1: Rev(9)}, {0: Block(1), 1: Rev(1)}], \
836 832 growable: [], \
837 833 root: {0: Block(1), 1: Rev(1)}",
838 834 );
839 835 }
840 836
841 837 #[test]
842 838 fn test_immutable_find_simplest() -> Result<(), NodeMapError> {
843 839 let mut idx: TestIndex = HashMap::new();
844 840 pad_insert(&mut idx, 1, "1234deadcafe");
845 841
846 842 let nt = NodeTree::from(vec![block! {1: Rev(1)}]);
847 843 assert_eq!(nt.find_hex(&idx, "1")?, Some(1));
848 844 assert_eq!(nt.find_hex(&idx, "12")?, Some(1));
849 845 assert_eq!(nt.find_hex(&idx, "1234de")?, Some(1));
850 846 assert_eq!(nt.find_hex(&idx, "1a")?, None);
851 847 assert_eq!(nt.find_hex(&idx, "ab")?, None);
852 848
853 849 // and with full binary Nodes
854 850 assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1));
855 851 let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap();
856 852 assert_eq!(nt.find_node(&idx, &unknown)?, None);
857 853 Ok(())
858 854 }
859 855
860 856 #[test]
861 857 fn test_immutable_find_one_jump() {
862 858 let mut idx = TestIndex::new();
863 859 pad_insert(&mut idx, 9, "012");
864 860 pad_insert(&mut idx, 0, "00a");
865 861
866 862 let nt = sample_nodetree();
867 863
868 864 assert_eq!(nt.find_hex(&idx, "0"), Err(MultipleResults));
869 865 assert_eq!(nt.find_hex(&idx, "01"), Ok(Some(9)));
870 866 assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
871 867 assert_eq!(nt.find_hex(&idx, "00a"), Ok(Some(0)));
872 868 assert_eq!(nt.unique_prefix_len_hex(&idx, "00a"), Ok(Some(3)));
873 869 assert_eq!(nt.find_hex(&idx, "000"), Ok(Some(NULL_REVISION)));
874 870 }
875 871
876 872 #[test]
877 873 fn test_mutated_find() -> Result<(), NodeMapError> {
878 874 let mut idx = TestIndex::new();
879 875 pad_insert(&mut idx, 9, "012");
880 876 pad_insert(&mut idx, 0, "00a");
881 877 pad_insert(&mut idx, 2, "cafe");
882 878 pad_insert(&mut idx, 3, "15");
883 879 pad_insert(&mut idx, 1, "10");
884 880
885 881 let nt = NodeTree {
886 882 readonly: sample_nodetree().readonly,
887 883 growable: vec![block![0: Rev(1), 5: Rev(3)]],
888 884 root: block![0: Block(1), 1:Block(3), 12: Rev(2)],
889 885 masked_inner_blocks: 1,
890 886 };
891 887 assert_eq!(nt.find_hex(&idx, "10")?, Some(1));
892 888 assert_eq!(nt.find_hex(&idx, "c")?, Some(2));
893 889 assert_eq!(nt.unique_prefix_len_hex(&idx, "c")?, Some(1));
894 890 assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
895 891 assert_eq!(nt.find_hex(&idx, "000")?, Some(NULL_REVISION));
896 892 assert_eq!(nt.unique_prefix_len_hex(&idx, "000")?, Some(3));
897 893 assert_eq!(nt.find_hex(&idx, "01")?, Some(9));
898 894 assert_eq!(nt.masked_readonly_blocks(), 2);
899 895 Ok(())
900 896 }
901 897
902 898 struct TestNtIndex {
903 899 index: TestIndex,
904 900 nt: NodeTree,
905 901 }
906 902
907 903 impl TestNtIndex {
908 904 fn new() -> Self {
909 905 TestNtIndex {
910 906 index: HashMap::new(),
911 907 nt: NodeTree::default(),
912 908 }
913 909 }
914 910
915 911 fn insert(
916 912 &mut self,
917 913 rev: Revision,
918 914 hex: &str,
919 915 ) -> Result<(), NodeMapError> {
920 916 let node = pad_node(hex);
921 917 self.index.insert(rev, node.clone());
922 918 self.nt.insert(&self.index, &node, rev)?;
923 919 Ok(())
924 920 }
925 921
926 922 fn find_hex(
927 923 &self,
928 924 prefix: &str,
929 925 ) -> Result<Option<Revision>, NodeMapError> {
930 926 self.nt.find_hex(&self.index, prefix)
931 927 }
932 928
933 929 fn unique_prefix_len_hex(
934 930 &self,
935 931 prefix: &str,
936 932 ) -> Result<Option<usize>, NodeMapError> {
937 933 self.nt.unique_prefix_len_hex(&self.index, prefix)
938 934 }
939 935
940 936 /// Drain `added` and restart a new one
941 937 fn commit(self) -> Self {
942 938 let mut as_vec: Vec<Block> =
943 939 self.nt.readonly.iter().map(|block| block.clone()).collect();
944 940 as_vec.extend(self.nt.growable);
945 941 as_vec.push(self.nt.root);
946 942
947 943 Self {
948 944 index: self.index,
949 945 nt: NodeTree::from(as_vec).into(),
950 946 }
951 947 }
952 948 }
953 949
954 950 #[test]
955 951 fn test_insert_full_mutable() -> Result<(), NodeMapError> {
956 952 let mut idx = TestNtIndex::new();
957 953 idx.insert(0, "1234")?;
958 954 assert_eq!(idx.find_hex("1")?, Some(0));
959 955 assert_eq!(idx.find_hex("12")?, Some(0));
960 956
961 957 // let's trigger a simple split
962 958 idx.insert(1, "1a34")?;
963 959 assert_eq!(idx.nt.growable.len(), 1);
964 960 assert_eq!(idx.find_hex("12")?, Some(0));
965 961 assert_eq!(idx.find_hex("1a")?, Some(1));
966 962
967 963 // reinserting is a no_op
968 964 idx.insert(1, "1a34")?;
969 965 assert_eq!(idx.nt.growable.len(), 1);
970 966 assert_eq!(idx.find_hex("12")?, Some(0));
971 967 assert_eq!(idx.find_hex("1a")?, Some(1));
972 968
973 969 idx.insert(2, "1a01")?;
974 970 assert_eq!(idx.nt.growable.len(), 2);
975 971 assert_eq!(idx.find_hex("1a"), Err(NodeMapError::MultipleResults));
976 972 assert_eq!(idx.find_hex("12")?, Some(0));
977 973 assert_eq!(idx.find_hex("1a3")?, Some(1));
978 974 assert_eq!(idx.find_hex("1a0")?, Some(2));
979 975 assert_eq!(idx.find_hex("1a12")?, None);
980 976
981 977 // now let's make it split and create more than one additional block
982 978 idx.insert(3, "1a345")?;
983 979 assert_eq!(idx.nt.growable.len(), 4);
984 980 assert_eq!(idx.find_hex("1a340")?, Some(1));
985 981 assert_eq!(idx.find_hex("1a345")?, Some(3));
986 982 assert_eq!(idx.find_hex("1a341")?, None);
987 983
988 984 // there's no readonly block to mask
989 985 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
990 986 Ok(())
991 987 }
992 988
993 989 #[test]
994 990 fn test_unique_prefix_len_zero_prefix() {
995 991 let mut idx = TestNtIndex::new();
996 992 idx.insert(0, "00000abcd").unwrap();
997 993
998 994 assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
999 995 // in the nodetree proper, this will be found at the first nybble
1000 996 // yet the correct answer for unique_prefix_len is not 1, nor 1+1,
1001 997 // but the first difference with `NULL_NODE`
1002 998 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
1003 999 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
1004 1000
1005 1001 // same with odd result
1006 1002 idx.insert(1, "00123").unwrap();
1007 1003 assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3)));
1008 1004 assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3)));
1009 1005
1010 1006 // these are unchanged of course
1011 1007 assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
1012 1008 assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
1013 1009 }
1014 1010
1015 1011 #[test]
1016 1012 fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
1017 1013 // check that the splitting loop is long enough
1018 1014 let mut nt_idx = TestNtIndex::new();
1019 1015 let nt = &mut nt_idx.nt;
1020 1016 let idx = &mut nt_idx.index;
1021 1017
1022 1018 let node0_hex = hex_pad_right("444444");
1023 1019 let mut node1_hex = hex_pad_right("444444").clone();
1024 1020 node1_hex.pop();
1025 1021 node1_hex.push('5');
1026 1022 let node0 = Node::from_hex(&node0_hex).unwrap();
1027 1023 let node1 = Node::from_hex(&node1_hex).unwrap();
1028 1024
1029 1025 idx.insert(0, node0.clone());
1030 1026 nt.insert(idx, &node0, 0)?;
1031 1027 idx.insert(1, node1.clone());
1032 1028 nt.insert(idx, &node1, 1)?;
1033 1029
1034 1030 assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(0));
1035 1031 assert_eq!(nt.find_bin(idx, (&node1).into())?, Some(1));
1036 1032 Ok(())
1037 1033 }
1038 1034
1039 1035 #[test]
1040 1036 fn test_insert_partly_immutable() -> Result<(), NodeMapError> {
1041 1037 let mut idx = TestNtIndex::new();
1042 1038 idx.insert(0, "1234")?;
1043 1039 idx.insert(1, "1235")?;
1044 1040 idx.insert(2, "131")?;
1045 1041 idx.insert(3, "cafe")?;
1046 1042 let mut idx = idx.commit();
1047 1043 assert_eq!(idx.find_hex("1234")?, Some(0));
1048 1044 assert_eq!(idx.find_hex("1235")?, Some(1));
1049 1045 assert_eq!(idx.find_hex("131")?, Some(2));
1050 1046 assert_eq!(idx.find_hex("cafe")?, Some(3));
1051 1047 // we did not add anything since init from readonly
1052 1048 assert_eq!(idx.nt.masked_readonly_blocks(), 0);
1053 1049
1054 1050 idx.insert(4, "123A")?;
1055 1051 assert_eq!(idx.find_hex("1234")?, Some(0));
1056 1052 assert_eq!(idx.find_hex("1235")?, Some(1));
1057 1053 assert_eq!(idx.find_hex("131")?, Some(2));
1058 1054 assert_eq!(idx.find_hex("cafe")?, Some(3));
1059 1055 assert_eq!(idx.find_hex("123A")?, Some(4));
1060 1056 // we masked blocks for all prefixes of "123", including the root
1061 1057 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1062 1058
1063 1059 eprintln!("{:?}", idx.nt);
1064 1060 idx.insert(5, "c0")?;
1065 1061 assert_eq!(idx.find_hex("cafe")?, Some(3));
1066 1062 assert_eq!(idx.find_hex("c0")?, Some(5));
1067 1063 assert_eq!(idx.find_hex("c1")?, None);
1068 1064 assert_eq!(idx.find_hex("1234")?, Some(0));
1069 1065 // inserting "c0" is just splitting the 'c' slot of the mutable root,
1070 1066 // it doesn't mask anything
1071 1067 assert_eq!(idx.nt.masked_readonly_blocks(), 4);
1072 1068
1073 1069 Ok(())
1074 1070 }
1075 1071
1076 1072 #[test]
1077 1073 fn test_invalidate_all() -> Result<(), NodeMapError> {
1078 1074 let mut idx = TestNtIndex::new();
1079 1075 idx.insert(0, "1234")?;
1080 1076 idx.insert(1, "1235")?;
1081 1077 idx.insert(2, "131")?;
1082 1078 idx.insert(3, "cafe")?;
1083 1079 let mut idx = idx.commit();
1084 1080
1085 1081 idx.nt.invalidate_all();
1086 1082
1087 1083 assert_eq!(idx.find_hex("1234")?, None);
1088 1084 assert_eq!(idx.find_hex("1235")?, None);
1089 1085 assert_eq!(idx.find_hex("131")?, None);
1090 1086 assert_eq!(idx.find_hex("cafe")?, None);
1091 1087 // all the readonly blocks have been masked, this is the
1092 1088 // conventional expected response
1093 1089 assert_eq!(idx.nt.masked_readonly_blocks(), idx.nt.readonly.len() + 1);
1094 1090 Ok(())
1095 1091 }
1096 1092
1097 1093 #[test]
1098 1094 fn test_into_added_empty() {
1099 1095 assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
1100 1096 assert!(sample_nodetree()
1101 1097 .into_readonly_and_added_bytes()
1102 1098 .1
1103 1099 .is_empty());
1104 1100 }
1105 1101
1106 1102 #[test]
1107 1103 fn test_into_added_bytes() -> Result<(), NodeMapError> {
1108 1104 let mut idx = TestNtIndex::new();
1109 1105 idx.insert(0, "1234")?;
1110 1106 let mut idx = idx.commit();
1111 1107 idx.insert(4, "cafe")?;
1112 1108 let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
1113 1109
1114 1110 // only the root block has been changed
1115 1111 assert_eq!(bytes.len(), BLOCK_SIZE);
1116 1112 // big endian for -2
1117 1113 assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
1118 1114 // big endian for -6
1119 1115 assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
1120 1116 Ok(())
1121 1117 }
1122 1118 }
@@ -1,168 +1,169 b''
1 1 // utils module
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Contains useful functions, traits, structs, etc. for use in core.
9 9
10 10 use crate::utils::hg_path::HgPath;
11 11 use std::{io::Write, ops::Deref};
12 12
13 13 pub mod files;
14 14 pub mod hg_path;
15 15 pub mod path_auditor;
16 16
17 17 /// Useful until rust/issues/56345 is stable
18 18 ///
19 19 /// # Examples
20 20 ///
21 21 /// ```
22 22 /// use crate::hg::utils::find_slice_in_slice;
23 23 ///
24 24 /// let haystack = b"This is the haystack".to_vec();
25 25 /// assert_eq!(find_slice_in_slice(&haystack, b"the"), Some(8));
26 26 /// assert_eq!(find_slice_in_slice(&haystack, b"not here"), None);
27 27 /// ```
28 28 pub fn find_slice_in_slice<T>(slice: &[T], needle: &[T]) -> Option<usize>
29 29 where
30 30 for<'a> &'a [T]: PartialEq,
31 31 {
32 32 slice
33 33 .windows(needle.len())
34 34 .position(|window| window == needle)
35 35 }
36 36
37 37 /// Replaces the `from` slice with the `to` slice inside the `buf` slice.
38 38 ///
39 39 /// # Examples
40 40 ///
41 41 /// ```
42 42 /// use crate::hg::utils::replace_slice;
43 43 /// let mut line = b"I hate writing tests!".to_vec();
44 44 /// replace_slice(&mut line, b"hate", b"love");
45 45 /// assert_eq!(
46 46 /// line,
47 47 /// b"I love writing tests!".to_vec()
48 48 /// );
49 49 /// ```
50 50 pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T])
51 51 where
52 52 T: Clone + PartialEq,
53 53 {
54 54 if buf.len() < from.len() || from.len() != to.len() {
55 55 return;
56 56 }
57 57 for i in 0..=buf.len() - from.len() {
58 58 if buf[i..].starts_with(from) {
59 59 buf[i..(i + from.len())].clone_from_slice(to);
60 60 }
61 61 }
62 62 }
63 63
64 64 pub trait SliceExt {
65 65 fn trim_end(&self) -> &Self;
66 66 fn trim_start(&self) -> &Self;
67 67 fn trim(&self) -> &Self;
68 68 fn drop_prefix(&self, needle: &Self) -> Option<&Self>;
69 69 }
70 70
71 #[allow(clippy::trivially_copy_pass_by_ref)]
71 72 fn is_not_whitespace(c: &u8) -> bool {
72 73 !(*c as char).is_whitespace()
73 74 }
74 75
75 76 impl SliceExt for [u8] {
76 77 fn trim_end(&self) -> &[u8] {
77 78 if let Some(last) = self.iter().rposition(is_not_whitespace) {
78 &self[..last + 1]
79 &self[..=last]
79 80 } else {
80 81 &[]
81 82 }
82 83 }
83 84 fn trim_start(&self) -> &[u8] {
84 85 if let Some(first) = self.iter().position(is_not_whitespace) {
85 86 &self[first..]
86 87 } else {
87 88 &[]
88 89 }
89 90 }
90 91
91 92 /// ```
92 93 /// use hg::utils::SliceExt;
93 94 /// assert_eq!(
94 95 /// b" to trim ".trim(),
95 96 /// b"to trim"
96 97 /// );
97 98 /// assert_eq!(
98 99 /// b"to trim ".trim(),
99 100 /// b"to trim"
100 101 /// );
101 102 /// assert_eq!(
102 103 /// b" to trim".trim(),
103 104 /// b"to trim"
104 105 /// );
105 106 /// ```
106 107 fn trim(&self) -> &[u8] {
107 108 self.trim_start().trim_end()
108 109 }
109 110
110 111 fn drop_prefix(&self, needle: &Self) -> Option<&Self> {
111 112 if self.starts_with(needle) {
112 113 Some(&self[needle.len()..])
113 114 } else {
114 115 None
115 116 }
116 117 }
117 118 }
118 119
119 120 pub trait Escaped {
120 121 /// Return bytes escaped for display to the user
121 122 fn escaped_bytes(&self) -> Vec<u8>;
122 123 }
123 124
124 125 impl Escaped for u8 {
125 126 fn escaped_bytes(&self) -> Vec<u8> {
126 127 let mut acc = vec![];
127 128 match self {
128 129 c @ b'\'' | c @ b'\\' => {
129 130 acc.push(b'\\');
130 131 acc.push(*c);
131 132 }
132 133 b'\t' => {
133 134 acc.extend(br"\\t");
134 135 }
135 136 b'\n' => {
136 137 acc.extend(br"\\n");
137 138 }
138 139 b'\r' => {
139 140 acc.extend(br"\\r");
140 141 }
141 142 c if (*c < b' ' || *c >= 127) => {
142 143 write!(acc, "\\x{:x}", self).unwrap();
143 144 }
144 145 c => {
145 146 acc.push(*c);
146 147 }
147 148 }
148 149 acc
149 150 }
150 151 }
151 152
152 153 impl<'a, T: Escaped> Escaped for &'a [T] {
153 154 fn escaped_bytes(&self) -> Vec<u8> {
154 self.iter().flat_map(|item| item.escaped_bytes()).collect()
155 self.iter().flat_map(Escaped::escaped_bytes).collect()
155 156 }
156 157 }
157 158
158 159 impl<T: Escaped> Escaped for Vec<T> {
159 160 fn escaped_bytes(&self) -> Vec<u8> {
160 161 self.deref().escaped_bytes()
161 162 }
162 163 }
163 164
164 165 impl<'a> Escaped for &'a HgPath {
165 166 fn escaped_bytes(&self) -> Vec<u8> {
166 167 self.as_bytes().escaped_bytes()
167 168 }
168 169 }
@@ -1,384 +1,382 b''
1 1 // files.rs
2 2 //
3 3 // Copyright 2019
4 4 // Raphaël Gomès <rgomes@octobus.net>,
5 5 // Yuya Nishihara <yuya@tcha.org>
6 6 //
7 7 // This software may be used and distributed according to the terms of the
8 8 // GNU General Public License version 2 or any later version.
9 9
10 10 //! Functions for fiddling with files.
11 11
12 12 use crate::utils::{
13 13 hg_path::{path_to_hg_path_buf, HgPath, HgPathBuf, HgPathError},
14 14 path_auditor::PathAuditor,
15 15 replace_slice,
16 16 };
17 17 use lazy_static::lazy_static;
18 18 use same_file::is_same_file;
19 19 use std::borrow::ToOwned;
20 20 use std::fs::Metadata;
21 21 use std::iter::FusedIterator;
22 22 use std::ops::Deref;
23 23 use std::path::{Path, PathBuf};
24 24
25 25 pub fn get_path_from_bytes(bytes: &[u8]) -> &Path {
26 26 let os_str;
27 27 #[cfg(unix)]
28 28 {
29 29 use std::os::unix::ffi::OsStrExt;
30 30 os_str = std::ffi::OsStr::from_bytes(bytes);
31 31 }
32 32 // TODO Handle other platforms
33 33 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
34 34 // Perhaps, the return type would have to be Result<PathBuf>.
35 35
36 36 Path::new(os_str)
37 37 }
38 38
39 39 // TODO: need to convert from WTF8 to MBCS bytes on Windows.
40 40 // that's why Vec<u8> is returned.
41 41 #[cfg(unix)]
42 42 pub fn get_bytes_from_path(path: impl AsRef<Path>) -> Vec<u8> {
43 43 use std::os::unix::ffi::OsStrExt;
44 44 path.as_ref().as_os_str().as_bytes().to_vec()
45 45 }
46 46
47 47 /// An iterator over repository path yielding itself and its ancestors.
48 48 #[derive(Copy, Clone, Debug)]
49 49 pub struct Ancestors<'a> {
50 50 next: Option<&'a HgPath>,
51 51 }
52 52
53 53 impl<'a> Iterator for Ancestors<'a> {
54 54 type Item = &'a HgPath;
55 55
56 56 fn next(&mut self) -> Option<Self::Item> {
57 57 let next = self.next;
58 58 self.next = match self.next {
59 59 Some(s) if s.is_empty() => None,
60 60 Some(s) => {
61 61 let p = s.bytes().rposition(|c| *c == b'/').unwrap_or(0);
62 62 Some(HgPath::new(&s.as_bytes()[..p]))
63 63 }
64 64 None => None,
65 65 };
66 66 next
67 67 }
68 68 }
69 69
70 70 impl<'a> FusedIterator for Ancestors<'a> {}
71 71
72 72 /// An iterator over repository path yielding itself and its ancestors.
73 73 #[derive(Copy, Clone, Debug)]
74 74 pub(crate) struct AncestorsWithBase<'a> {
75 75 next: Option<(&'a HgPath, &'a HgPath)>,
76 76 }
77 77
78 78 impl<'a> Iterator for AncestorsWithBase<'a> {
79 79 type Item = (&'a HgPath, &'a HgPath);
80 80
81 81 fn next(&mut self) -> Option<Self::Item> {
82 82 let next = self.next;
83 83 self.next = match self.next {
84 84 Some((s, _)) if s.is_empty() => None,
85 85 Some((s, _)) => Some(s.split_filename()),
86 86 None => None,
87 87 };
88 88 next
89 89 }
90 90 }
91 91
92 92 impl<'a> FusedIterator for AncestorsWithBase<'a> {}
93 93
94 94 /// Returns an iterator yielding ancestor directories of the given repository
95 95 /// path.
96 96 ///
97 97 /// The path is separated by '/', and must not start with '/'.
98 98 ///
99 99 /// The path itself isn't included unless it is b"" (meaning the root
100 100 /// directory.)
101 pub fn find_dirs<'a>(path: &'a HgPath) -> Ancestors<'a> {
101 pub fn find_dirs(path: &HgPath) -> Ancestors {
102 102 let mut dirs = Ancestors { next: Some(path) };
103 103 if !path.is_empty() {
104 104 dirs.next(); // skip itself
105 105 }
106 106 dirs
107 107 }
108 108
109 109 /// Returns an iterator yielding ancestor directories of the given repository
110 110 /// path.
111 111 ///
112 112 /// The path is separated by '/', and must not start with '/'.
113 113 ///
114 114 /// The path itself isn't included unless it is b"" (meaning the root
115 115 /// directory.)
116 pub(crate) fn find_dirs_with_base<'a>(
117 path: &'a HgPath,
118 ) -> AncestorsWithBase<'a> {
116 pub(crate) fn find_dirs_with_base(path: &HgPath) -> AncestorsWithBase {
119 117 let mut dirs = AncestorsWithBase {
120 118 next: Some((path, HgPath::new(b""))),
121 119 };
122 120 if !path.is_empty() {
123 121 dirs.next(); // skip itself
124 122 }
125 123 dirs
126 124 }
127 125
128 126 /// TODO more than ASCII?
129 127 pub fn normalize_case(path: &HgPath) -> HgPathBuf {
130 128 #[cfg(windows)] // NTFS compares via upper()
131 129 return path.to_ascii_uppercase();
132 130 #[cfg(unix)]
133 131 path.to_ascii_lowercase()
134 132 }
135 133
136 134 lazy_static! {
137 135 static ref IGNORED_CHARS: Vec<Vec<u8>> = {
138 136 [
139 137 0x200c, 0x200d, 0x200e, 0x200f, 0x202a, 0x202b, 0x202c, 0x202d,
140 138 0x202e, 0x206a, 0x206b, 0x206c, 0x206d, 0x206e, 0x206f, 0xfeff,
141 139 ]
142 140 .iter()
143 141 .map(|code| {
144 142 std::char::from_u32(*code)
145 143 .unwrap()
146 144 .encode_utf8(&mut [0; 3])
147 145 .bytes()
148 146 .collect()
149 147 })
150 148 .collect()
151 149 };
152 150 }
153 151
154 152 fn hfs_ignore_clean(bytes: &[u8]) -> Vec<u8> {
155 153 let mut buf = bytes.to_owned();
156 154 let needs_escaping = bytes.iter().any(|b| *b == b'\xe2' || *b == b'\xef');
157 155 if needs_escaping {
158 156 for forbidden in IGNORED_CHARS.iter() {
159 157 replace_slice(&mut buf, forbidden, &[])
160 158 }
161 159 buf
162 160 } else {
163 161 buf
164 162 }
165 163 }
166 164
167 165 pub fn lower_clean(bytes: &[u8]) -> Vec<u8> {
168 166 hfs_ignore_clean(&bytes.to_ascii_lowercase())
169 167 }
170 168
171 169 #[derive(Eq, PartialEq, Ord, PartialOrd, Copy, Clone)]
172 170 pub struct HgMetadata {
173 171 pub st_dev: u64,
174 172 pub st_mode: u32,
175 173 pub st_nlink: u64,
176 174 pub st_size: u64,
177 175 pub st_mtime: i64,
178 176 pub st_ctime: i64,
179 177 }
180 178
181 179 // TODO support other plaforms
182 180 #[cfg(unix)]
183 181 impl HgMetadata {
184 182 pub fn from_metadata(metadata: Metadata) -> Self {
185 183 use std::os::unix::fs::MetadataExt;
186 184 Self {
187 185 st_dev: metadata.dev(),
188 186 st_mode: metadata.mode(),
189 187 st_nlink: metadata.nlink(),
190 188 st_size: metadata.size(),
191 189 st_mtime: metadata.mtime(),
192 190 st_ctime: metadata.ctime(),
193 191 }
194 192 }
195 193 }
196 194
197 195 /// Returns the canonical path of `name`, given `cwd` and `root`
198 196 pub fn canonical_path(
199 197 root: impl AsRef<Path>,
200 198 cwd: impl AsRef<Path>,
201 199 name: impl AsRef<Path>,
202 200 ) -> Result<PathBuf, HgPathError> {
203 201 // TODO add missing normalization for other platforms
204 202 let root = root.as_ref();
205 203 let cwd = cwd.as_ref();
206 204 let name = name.as_ref();
207 205
208 206 let name = if !name.is_absolute() {
209 207 root.join(&cwd).join(&name)
210 208 } else {
211 209 name.to_owned()
212 210 };
213 211 let auditor = PathAuditor::new(&root);
214 212 if name != root && name.starts_with(&root) {
215 213 let name = name.strip_prefix(&root).unwrap();
216 214 auditor.audit_path(path_to_hg_path_buf(name)?)?;
217 return Ok(name.to_owned());
215 Ok(name.to_owned())
218 216 } else if name == root {
219 return Ok("".into());
217 Ok("".into())
220 218 } else {
221 219 // Determine whether `name' is in the hierarchy at or beneath `root',
222 220 // by iterating name=name.parent() until it returns `None` (can't
223 221 // check name == '/', because that doesn't work on windows).
224 222 let mut name = name.deref();
225 223 let original_name = name.to_owned();
226 224 loop {
227 225 let same = is_same_file(&name, &root).unwrap_or(false);
228 226 if same {
229 227 if name == original_name {
230 228 // `name` was actually the same as root (maybe a symlink)
231 229 return Ok("".into());
232 230 }
233 231 // `name` is a symlink to root, so `original_name` is under
234 232 // root
235 233 let rel_path = original_name.strip_prefix(&name).unwrap();
236 234 auditor.audit_path(path_to_hg_path_buf(&rel_path)?)?;
237 235 return Ok(rel_path.to_owned());
238 236 }
239 237 name = match name.parent() {
240 238 None => break,
241 239 Some(p) => p,
242 240 };
243 241 }
244 242 // TODO hint to the user about using --cwd
245 243 // Bubble up the responsibility to Python for now
246 244 Err(HgPathError::NotUnderRoot {
247 245 path: original_name.to_owned(),
248 246 root: root.to_owned(),
249 247 })
250 248 }
251 249 }
252 250
253 251 #[cfg(test)]
254 252 mod tests {
255 253 use super::*;
256 254 use pretty_assertions::assert_eq;
257 255
258 256 #[test]
259 257 fn find_dirs_some() {
260 258 let mut dirs = super::find_dirs(HgPath::new(b"foo/bar/baz"));
261 259 assert_eq!(dirs.next(), Some(HgPath::new(b"foo/bar")));
262 260 assert_eq!(dirs.next(), Some(HgPath::new(b"foo")));
263 261 assert_eq!(dirs.next(), Some(HgPath::new(b"")));
264 262 assert_eq!(dirs.next(), None);
265 263 assert_eq!(dirs.next(), None);
266 264 }
267 265
268 266 #[test]
269 267 fn find_dirs_empty() {
270 268 // looks weird, but mercurial.pathutil.finddirs(b"") yields b""
271 269 let mut dirs = super::find_dirs(HgPath::new(b""));
272 270 assert_eq!(dirs.next(), Some(HgPath::new(b"")));
273 271 assert_eq!(dirs.next(), None);
274 272 assert_eq!(dirs.next(), None);
275 273 }
276 274
277 275 #[test]
278 276 fn test_find_dirs_with_base_some() {
279 277 let mut dirs = super::find_dirs_with_base(HgPath::new(b"foo/bar/baz"));
280 278 assert_eq!(
281 279 dirs.next(),
282 280 Some((HgPath::new(b"foo/bar"), HgPath::new(b"baz")))
283 281 );
284 282 assert_eq!(
285 283 dirs.next(),
286 284 Some((HgPath::new(b"foo"), HgPath::new(b"bar")))
287 285 );
288 286 assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b"foo"))));
289 287 assert_eq!(dirs.next(), None);
290 288 assert_eq!(dirs.next(), None);
291 289 }
292 290
293 291 #[test]
294 292 fn test_find_dirs_with_base_empty() {
295 293 let mut dirs = super::find_dirs_with_base(HgPath::new(b""));
296 294 assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b""))));
297 295 assert_eq!(dirs.next(), None);
298 296 assert_eq!(dirs.next(), None);
299 297 }
300 298
301 299 #[test]
302 300 fn test_canonical_path() {
303 301 let root = Path::new("/repo");
304 302 let cwd = Path::new("/dir");
305 303 let name = Path::new("filename");
306 304 assert_eq!(
307 305 canonical_path(root, cwd, name),
308 306 Err(HgPathError::NotUnderRoot {
309 307 path: PathBuf::from("/dir/filename"),
310 308 root: root.to_path_buf()
311 309 })
312 310 );
313 311
314 312 let root = Path::new("/repo");
315 313 let cwd = Path::new("/");
316 314 let name = Path::new("filename");
317 315 assert_eq!(
318 316 canonical_path(root, cwd, name),
319 317 Err(HgPathError::NotUnderRoot {
320 318 path: PathBuf::from("/filename"),
321 319 root: root.to_path_buf()
322 320 })
323 321 );
324 322
325 323 let root = Path::new("/repo");
326 324 let cwd = Path::new("/");
327 325 let name = Path::new("repo/filename");
328 326 assert_eq!(
329 327 canonical_path(root, cwd, name),
330 328 Ok(PathBuf::from("filename"))
331 329 );
332 330
333 331 let root = Path::new("/repo");
334 332 let cwd = Path::new("/repo");
335 333 let name = Path::new("filename");
336 334 assert_eq!(
337 335 canonical_path(root, cwd, name),
338 336 Ok(PathBuf::from("filename"))
339 337 );
340 338
341 339 let root = Path::new("/repo");
342 340 let cwd = Path::new("/repo/subdir");
343 341 let name = Path::new("filename");
344 342 assert_eq!(
345 343 canonical_path(root, cwd, name),
346 344 Ok(PathBuf::from("subdir/filename"))
347 345 );
348 346 }
349 347
350 348 #[test]
351 349 fn test_canonical_path_not_rooted() {
352 350 use std::fs::create_dir;
353 351 use tempfile::tempdir;
354 352
355 353 let base_dir = tempdir().unwrap();
356 354 let base_dir_path = base_dir.path();
357 355 let beneath_repo = base_dir_path.join("a");
358 356 let root = base_dir_path.join("a/b");
359 357 let out_of_repo = base_dir_path.join("c");
360 358 let under_repo_symlink = out_of_repo.join("d");
361 359
362 360 create_dir(&beneath_repo).unwrap();
363 361 create_dir(&root).unwrap();
364 362
365 363 // TODO make portable
366 364 std::os::unix::fs::symlink(&root, &out_of_repo).unwrap();
367 365
368 366 assert_eq!(
369 367 canonical_path(&root, Path::new(""), out_of_repo),
370 368 Ok(PathBuf::from(""))
371 369 );
372 370 assert_eq!(
373 371 canonical_path(&root, Path::new(""), &beneath_repo),
374 372 Err(HgPathError::NotUnderRoot {
375 373 path: beneath_repo.to_owned(),
376 374 root: root.to_owned()
377 375 })
378 376 );
379 377 assert_eq!(
380 378 canonical_path(&root, Path::new(""), &under_repo_symlink),
381 379 Ok(PathBuf::from("d"))
382 380 );
383 381 }
384 382 }
@@ -1,768 +1,765 b''
1 1 // hg_path.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use std::borrow::Borrow;
9 9 use std::ffi::{OsStr, OsString};
10 10 use std::fmt;
11 11 use std::ops::Deref;
12 12 use std::path::{Path, PathBuf};
13 13
14 14 #[derive(Debug, Eq, PartialEq)]
15 15 pub enum HgPathError {
16 16 /// Bytes from the invalid `HgPath`
17 17 LeadingSlash(Vec<u8>),
18 18 ConsecutiveSlashes {
19 19 bytes: Vec<u8>,
20 20 second_slash_index: usize,
21 21 },
22 22 ContainsNullByte {
23 23 bytes: Vec<u8>,
24 24 null_byte_index: usize,
25 25 },
26 26 /// Bytes
27 27 DecodeError(Vec<u8>),
28 28 /// The rest come from audit errors
29 29 EndsWithSlash(HgPathBuf),
30 30 ContainsIllegalComponent(HgPathBuf),
31 31 /// Path is inside the `.hg` folder
32 32 InsideDotHg(HgPathBuf),
33 33 IsInsideNestedRepo {
34 34 path: HgPathBuf,
35 35 nested_repo: HgPathBuf,
36 36 },
37 37 TraversesSymbolicLink {
38 38 path: HgPathBuf,
39 39 symlink: HgPathBuf,
40 40 },
41 41 NotFsCompliant(HgPathBuf),
42 42 /// `path` is the smallest invalid path
43 43 NotUnderRoot {
44 44 path: PathBuf,
45 45 root: PathBuf,
46 46 },
47 47 }
48 48
49 49 impl ToString for HgPathError {
50 50 fn to_string(&self) -> String {
51 51 match self {
52 52 HgPathError::LeadingSlash(bytes) => {
53 53 format!("Invalid HgPath '{:?}': has a leading slash.", bytes)
54 54 }
55 55 HgPathError::ConsecutiveSlashes {
56 56 bytes,
57 57 second_slash_index: pos,
58 58 } => format!(
59 59 "Invalid HgPath '{:?}': consecutive slashes at pos {}.",
60 60 bytes, pos
61 61 ),
62 62 HgPathError::ContainsNullByte {
63 63 bytes,
64 64 null_byte_index: pos,
65 65 } => format!(
66 66 "Invalid HgPath '{:?}': contains null byte at pos {}.",
67 67 bytes, pos
68 68 ),
69 69 HgPathError::DecodeError(bytes) => {
70 70 format!("Invalid HgPath '{:?}': could not be decoded.", bytes)
71 71 }
72 72 HgPathError::EndsWithSlash(path) => {
73 73 format!("Audit failed for '{}': ends with a slash.", path)
74 74 }
75 75 HgPathError::ContainsIllegalComponent(path) => format!(
76 76 "Audit failed for '{}': contains an illegal component.",
77 77 path
78 78 ),
79 79 HgPathError::InsideDotHg(path) => format!(
80 80 "Audit failed for '{}': is inside the '.hg' folder.",
81 81 path
82 82 ),
83 83 HgPathError::IsInsideNestedRepo {
84 84 path,
85 85 nested_repo: nested,
86 86 } => format!(
87 87 "Audit failed for '{}': is inside a nested repository '{}'.",
88 88 path, nested
89 89 ),
90 90 HgPathError::TraversesSymbolicLink { path, symlink } => format!(
91 91 "Audit failed for '{}': traverses symbolic link '{}'.",
92 92 path, symlink
93 93 ),
94 94 HgPathError::NotFsCompliant(path) => format!(
95 95 "Audit failed for '{}': cannot be turned into a \
96 96 filesystem path.",
97 97 path
98 98 ),
99 99 HgPathError::NotUnderRoot { path, root } => format!(
100 100 "Audit failed for '{}': not under root {}.",
101 101 path.display(),
102 102 root.display()
103 103 ),
104 104 }
105 105 }
106 106 }
107 107
108 108 impl From<HgPathError> for std::io::Error {
109 109 fn from(e: HgPathError) -> Self {
110 110 std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string())
111 111 }
112 112 }
113 113
114 114 /// This is a repository-relative path (or canonical path):
115 115 /// - no null characters
116 116 /// - `/` separates directories
117 117 /// - no consecutive slashes
118 118 /// - no leading slash,
119 119 /// - no `.` nor `..` of special meaning
120 120 /// - stored in repository and shared across platforms
121 121 ///
122 122 /// Note: there is no guarantee of any `HgPath` being well-formed at any point
123 123 /// in its lifetime for performance reasons and to ease ergonomics. It is
124 124 /// however checked using the `check_state` method before any file-system
125 125 /// operation.
126 126 ///
127 127 /// This allows us to be encoding-transparent as much as possible, until really
128 128 /// needed; `HgPath` can be transformed into a platform-specific path (`OsStr`
129 129 /// or `Path`) whenever more complex operations are needed:
130 130 /// On Unix, it's just byte-to-byte conversion. On Windows, it has to be
131 131 /// decoded from MBCS to WTF-8. If WindowsUTF8Plan is implemented, the source
132 132 /// character encoding will be determined on a per-repository basis.
133 133 //
134 134 // FIXME: (adapted from a comment in the stdlib)
135 135 // `HgPath::new()` current implementation relies on `Slice` being
136 136 // layout-compatible with `[u8]`.
137 137 // When attribute privacy is implemented, `Slice` should be annotated as
138 138 // `#[repr(transparent)]`.
139 139 // Anyway, `Slice` representation and layout are considered implementation
140 140 // detail, are not documented and must not be relied upon.
141 141 #[derive(Eq, Ord, PartialEq, PartialOrd, Hash)]
142 142 pub struct HgPath {
143 143 inner: [u8],
144 144 }
145 145
146 146 impl HgPath {
147 147 pub fn new<S: AsRef<[u8]> + ?Sized>(s: &S) -> &Self {
148 148 unsafe { &*(s.as_ref() as *const [u8] as *const Self) }
149 149 }
150 150 pub fn is_empty(&self) -> bool {
151 151 self.inner.is_empty()
152 152 }
153 153 pub fn len(&self) -> usize {
154 154 self.inner.len()
155 155 }
156 156 fn to_hg_path_buf(&self) -> HgPathBuf {
157 157 HgPathBuf {
158 158 inner: self.inner.to_owned(),
159 159 }
160 160 }
161 161 pub fn bytes(&self) -> std::slice::Iter<u8> {
162 162 self.inner.iter()
163 163 }
164 164 pub fn to_ascii_uppercase(&self) -> HgPathBuf {
165 165 HgPathBuf::from(self.inner.to_ascii_uppercase())
166 166 }
167 167 pub fn to_ascii_lowercase(&self) -> HgPathBuf {
168 168 HgPathBuf::from(self.inner.to_ascii_lowercase())
169 169 }
170 170 pub fn as_bytes(&self) -> &[u8] {
171 171 &self.inner
172 172 }
173 173 pub fn contains(&self, other: u8) -> bool {
174 174 self.inner.contains(&other)
175 175 }
176 176 pub fn starts_with(&self, needle: impl AsRef<Self>) -> bool {
177 177 self.inner.starts_with(needle.as_ref().as_bytes())
178 178 }
179 179 pub fn trim_trailing_slash(&self) -> &Self {
180 180 Self::new(if self.inner.last() == Some(&b'/') {
181 181 &self.inner[..self.inner.len() - 1]
182 182 } else {
183 183 &self.inner[..]
184 184 })
185 185 }
186 186 /// Returns a tuple of slices `(base, filename)` resulting from the split
187 187 /// at the rightmost `/`, if any.
188 188 ///
189 189 /// # Examples:
190 190 ///
191 191 /// ```
192 192 /// use hg::utils::hg_path::HgPath;
193 193 ///
194 194 /// let path = HgPath::new(b"cool/hg/path").split_filename();
195 195 /// assert_eq!(path, (HgPath::new(b"cool/hg"), HgPath::new(b"path")));
196 196 ///
197 197 /// let path = HgPath::new(b"pathwithoutsep").split_filename();
198 198 /// assert_eq!(path, (HgPath::new(b""), HgPath::new(b"pathwithoutsep")));
199 199 /// ```
200 200 pub fn split_filename(&self) -> (&Self, &Self) {
201 201 match &self.inner.iter().rposition(|c| *c == b'/') {
202 202 None => (HgPath::new(""), &self),
203 203 Some(size) => (
204 204 HgPath::new(&self.inner[..*size]),
205 205 HgPath::new(&self.inner[*size + 1..]),
206 206 ),
207 207 }
208 208 }
209 209 pub fn join<T: ?Sized + AsRef<Self>>(&self, other: &T) -> HgPathBuf {
210 210 let mut inner = self.inner.to_owned();
211 if inner.len() != 0 && inner.last() != Some(&b'/') {
211 if !inner.is_empty() && inner.last() != Some(&b'/') {
212 212 inner.push(b'/');
213 213 }
214 214 inner.extend(other.as_ref().bytes());
215 215 HgPathBuf::from_bytes(&inner)
216 216 }
217 217 pub fn parent(&self) -> &Self {
218 218 let inner = self.as_bytes();
219 219 HgPath::new(match inner.iter().rposition(|b| *b == b'/') {
220 220 Some(pos) => &inner[..pos],
221 221 None => &[],
222 222 })
223 223 }
224 224 /// Given a base directory, returns the slice of `self` relative to the
225 225 /// base directory. If `base` is not a directory (does not end with a
226 226 /// `b'/'`), returns `None`.
227 227 pub fn relative_to(&self, base: impl AsRef<Self>) -> Option<&Self> {
228 228 let base = base.as_ref();
229 229 if base.is_empty() {
230 230 return Some(self);
231 231 }
232 232 let is_dir = base.as_bytes().ends_with(b"/");
233 233 if is_dir && self.starts_with(base) {
234 234 Some(Self::new(&self.inner[base.len()..]))
235 235 } else {
236 236 None
237 237 }
238 238 }
239 239
240 240 #[cfg(windows)]
241 241 /// Copied from the Python stdlib's `os.path.splitdrive` implementation.
242 242 ///
243 243 /// Split a pathname into drive/UNC sharepoint and relative path
244 244 /// specifiers. Returns a 2-tuple (drive_or_unc, path); either part may
245 245 /// be empty.
246 246 ///
247 247 /// If you assign
248 248 /// result = split_drive(p)
249 249 /// It is always true that:
250 250 /// result[0] + result[1] == p
251 251 ///
252 252 /// If the path contained a drive letter, drive_or_unc will contain
253 253 /// everything up to and including the colon.
254 254 /// e.g. split_drive("c:/dir") returns ("c:", "/dir")
255 255 ///
256 256 /// If the path contained a UNC path, the drive_or_unc will contain the
257 257 /// host name and share up to but not including the fourth directory
258 258 /// separator character.
259 259 /// e.g. split_drive("//host/computer/dir") returns ("//host/computer",
260 260 /// "/dir")
261 261 ///
262 262 /// Paths cannot contain both a drive letter and a UNC path.
263 263 pub fn split_drive<'a>(&self) -> (&HgPath, &HgPath) {
264 264 let bytes = self.as_bytes();
265 265 let is_sep = |b| std::path::is_separator(b as char);
266 266
267 267 if self.len() < 2 {
268 268 (HgPath::new(b""), &self)
269 269 } else if is_sep(bytes[0])
270 270 && is_sep(bytes[1])
271 271 && (self.len() == 2 || !is_sep(bytes[2]))
272 272 {
273 273 // Is a UNC path:
274 274 // vvvvvvvvvvvvvvvvvvvv drive letter or UNC path
275 275 // \\machine\mountpoint\directory\etc\...
276 276 // directory ^^^^^^^^^^^^^^^
277 277
278 278 let machine_end_index = bytes[2..].iter().position(|b| is_sep(*b));
279 279 let mountpoint_start_index = if let Some(i) = machine_end_index {
280 280 i + 2
281 281 } else {
282 282 return (HgPath::new(b""), &self);
283 283 };
284 284
285 285 match bytes[mountpoint_start_index + 1..]
286 286 .iter()
287 287 .position(|b| is_sep(*b))
288 288 {
289 289 // A UNC path can't have two slashes in a row
290 290 // (after the initial two)
291 291 Some(0) => (HgPath::new(b""), &self),
292 292 Some(i) => {
293 293 let (a, b) =
294 294 bytes.split_at(mountpoint_start_index + 1 + i);
295 295 (HgPath::new(a), HgPath::new(b))
296 296 }
297 297 None => (&self, HgPath::new(b"")),
298 298 }
299 299 } else if bytes[1] == b':' {
300 300 // Drive path c:\directory
301 301 let (a, b) = bytes.split_at(2);
302 302 (HgPath::new(a), HgPath::new(b))
303 303 } else {
304 304 (HgPath::new(b""), &self)
305 305 }
306 306 }
307 307
308 308 #[cfg(unix)]
309 309 /// Split a pathname into drive and path. On Posix, drive is always empty.
310 310 pub fn split_drive(&self) -> (&HgPath, &HgPath) {
311 311 (HgPath::new(b""), &self)
312 312 }
313 313
314 314 /// Checks for errors in the path, short-circuiting at the first one.
315 315 /// This generates fine-grained errors useful for debugging.
316 316 /// To simply check if the path is valid during tests, use `is_valid`.
317 317 pub fn check_state(&self) -> Result<(), HgPathError> {
318 if self.len() == 0 {
318 if self.is_empty() {
319 319 return Ok(());
320 320 }
321 321 let bytes = self.as_bytes();
322 322 let mut previous_byte = None;
323 323
324 324 if bytes[0] == b'/' {
325 325 return Err(HgPathError::LeadingSlash(bytes.to_vec()));
326 326 }
327 327 for (index, byte) in bytes.iter().enumerate() {
328 328 match byte {
329 329 0 => {
330 330 return Err(HgPathError::ContainsNullByte {
331 331 bytes: bytes.to_vec(),
332 332 null_byte_index: index,
333 333 })
334 334 }
335 335 b'/' => {
336 336 if previous_byte.is_some() && previous_byte == Some(b'/') {
337 337 return Err(HgPathError::ConsecutiveSlashes {
338 338 bytes: bytes.to_vec(),
339 339 second_slash_index: index,
340 340 });
341 341 }
342 342 }
343 343 _ => (),
344 344 };
345 345 previous_byte = Some(*byte);
346 346 }
347 347 Ok(())
348 348 }
349 349
350 350 #[cfg(test)]
351 351 /// Only usable during tests to force developers to handle invalid states
352 352 fn is_valid(&self) -> bool {
353 353 self.check_state().is_ok()
354 354 }
355 355 }
356 356
357 357 impl fmt::Debug for HgPath {
358 358 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
359 359 write!(f, "HgPath({:?})", String::from_utf8_lossy(&self.inner))
360 360 }
361 361 }
362 362
363 363 impl fmt::Display for HgPath {
364 364 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
365 365 write!(f, "{}", String::from_utf8_lossy(&self.inner))
366 366 }
367 367 }
368 368
369 #[derive(Eq, Ord, Clone, PartialEq, PartialOrd, Hash)]
369 #[derive(Default, Eq, Ord, Clone, PartialEq, PartialOrd, Hash)]
370 370 pub struct HgPathBuf {
371 371 inner: Vec<u8>,
372 372 }
373 373
374 374 impl HgPathBuf {
375 375 pub fn new() -> Self {
376 Self { inner: Vec::new() }
376 Default::default()
377 377 }
378 378 pub fn push(&mut self, byte: u8) {
379 379 self.inner.push(byte);
380 380 }
381 381 pub fn from_bytes(s: &[u8]) -> HgPathBuf {
382 382 HgPath::new(s).to_owned()
383 383 }
384 384 pub fn into_vec(self) -> Vec<u8> {
385 385 self.inner
386 386 }
387 pub fn as_ref(&self) -> &[u8] {
388 self.inner.as_ref()
389 }
390 387 }
391 388
392 389 impl fmt::Debug for HgPathBuf {
393 390 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
394 391 write!(f, "HgPathBuf({:?})", String::from_utf8_lossy(&self.inner))
395 392 }
396 393 }
397 394
398 395 impl fmt::Display for HgPathBuf {
399 396 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
400 397 write!(f, "{}", String::from_utf8_lossy(&self.inner))
401 398 }
402 399 }
403 400
404 401 impl Deref for HgPathBuf {
405 402 type Target = HgPath;
406 403
407 404 #[inline]
408 405 fn deref(&self) -> &HgPath {
409 406 &HgPath::new(&self.inner)
410 407 }
411 408 }
412 409
413 410 impl From<Vec<u8>> for HgPathBuf {
414 411 fn from(vec: Vec<u8>) -> Self {
415 412 Self { inner: vec }
416 413 }
417 414 }
418 415
419 416 impl<T: ?Sized + AsRef<HgPath>> From<&T> for HgPathBuf {
420 417 fn from(s: &T) -> HgPathBuf {
421 418 s.as_ref().to_owned()
422 419 }
423 420 }
424 421
425 422 impl Into<Vec<u8>> for HgPathBuf {
426 423 fn into(self) -> Vec<u8> {
427 424 self.inner
428 425 }
429 426 }
430 427
431 428 impl Borrow<HgPath> for HgPathBuf {
432 429 fn borrow(&self) -> &HgPath {
433 430 &HgPath::new(self.as_bytes())
434 431 }
435 432 }
436 433
437 434 impl ToOwned for HgPath {
438 435 type Owned = HgPathBuf;
439 436
440 437 fn to_owned(&self) -> HgPathBuf {
441 438 self.to_hg_path_buf()
442 439 }
443 440 }
444 441
445 442 impl AsRef<HgPath> for HgPath {
446 443 fn as_ref(&self) -> &HgPath {
447 444 self
448 445 }
449 446 }
450 447
451 448 impl AsRef<HgPath> for HgPathBuf {
452 449 fn as_ref(&self) -> &HgPath {
453 450 self
454 451 }
455 452 }
456 453
457 454 impl Extend<u8> for HgPathBuf {
458 455 fn extend<T: IntoIterator<Item = u8>>(&mut self, iter: T) {
459 456 self.inner.extend(iter);
460 457 }
461 458 }
462 459
463 460 /// TODO: Once https://www.mercurial-scm.org/wiki/WindowsUTF8Plan is
464 461 /// implemented, these conversion utils will have to work differently depending
465 462 /// on the repository encoding: either `UTF-8` or `MBCS`.
466 463
467 464 pub fn hg_path_to_os_string<P: AsRef<HgPath>>(
468 465 hg_path: P,
469 466 ) -> Result<OsString, HgPathError> {
470 467 hg_path.as_ref().check_state()?;
471 468 let os_str;
472 469 #[cfg(unix)]
473 470 {
474 471 use std::os::unix::ffi::OsStrExt;
475 472 os_str = std::ffi::OsStr::from_bytes(&hg_path.as_ref().as_bytes());
476 473 }
477 474 // TODO Handle other platforms
478 475 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
479 476 Ok(os_str.to_os_string())
480 477 }
481 478
482 479 pub fn hg_path_to_path_buf<P: AsRef<HgPath>>(
483 480 hg_path: P,
484 481 ) -> Result<PathBuf, HgPathError> {
485 482 Ok(Path::new(&hg_path_to_os_string(hg_path)?).to_path_buf())
486 483 }
487 484
488 485 pub fn os_string_to_hg_path_buf<S: AsRef<OsStr>>(
489 486 os_string: S,
490 487 ) -> Result<HgPathBuf, HgPathError> {
491 488 let buf;
492 489 #[cfg(unix)]
493 490 {
494 491 use std::os::unix::ffi::OsStrExt;
495 492 buf = HgPathBuf::from_bytes(&os_string.as_ref().as_bytes());
496 493 }
497 494 // TODO Handle other platforms
498 495 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
499 496
500 497 buf.check_state()?;
501 498 Ok(buf)
502 499 }
503 500
504 501 pub fn path_to_hg_path_buf<P: AsRef<Path>>(
505 502 path: P,
506 503 ) -> Result<HgPathBuf, HgPathError> {
507 504 let buf;
508 505 let os_str = path.as_ref().as_os_str();
509 506 #[cfg(unix)]
510 507 {
511 508 use std::os::unix::ffi::OsStrExt;
512 509 buf = HgPathBuf::from_bytes(&os_str.as_bytes());
513 510 }
514 511 // TODO Handle other platforms
515 512 // TODO: convert from WTF8 to Windows MBCS (ANSI encoding).
516 513
517 514 buf.check_state()?;
518 515 Ok(buf)
519 516 }
520 517
521 518 #[cfg(test)]
522 519 mod tests {
523 520 use super::*;
524 521 use pretty_assertions::assert_eq;
525 522
526 523 #[test]
527 524 fn test_path_states() {
528 525 assert_eq!(
529 526 Err(HgPathError::LeadingSlash(b"/".to_vec())),
530 527 HgPath::new(b"/").check_state()
531 528 );
532 529 assert_eq!(
533 530 Err(HgPathError::ConsecutiveSlashes {
534 531 bytes: b"a/b//c".to_vec(),
535 532 second_slash_index: 4
536 533 }),
537 534 HgPath::new(b"a/b//c").check_state()
538 535 );
539 536 assert_eq!(
540 537 Err(HgPathError::ContainsNullByte {
541 538 bytes: b"a/b/\0c".to_vec(),
542 539 null_byte_index: 4
543 540 }),
544 541 HgPath::new(b"a/b/\0c").check_state()
545 542 );
546 543 // TODO test HgPathError::DecodeError for the Windows implementation.
547 544 assert_eq!(true, HgPath::new(b"").is_valid());
548 545 assert_eq!(true, HgPath::new(b"a/b/c").is_valid());
549 546 // Backslashes in paths are not significant, but allowed
550 547 assert_eq!(true, HgPath::new(br"a\b/c").is_valid());
551 548 // Dots in paths are not significant, but allowed
552 549 assert_eq!(true, HgPath::new(b"a/b/../c/").is_valid());
553 550 assert_eq!(true, HgPath::new(b"./a/b/../c/").is_valid());
554 551 }
555 552
556 553 #[test]
557 554 fn test_iter() {
558 555 let path = HgPath::new(b"a");
559 556 let mut iter = path.bytes();
560 557 assert_eq!(Some(&b'a'), iter.next());
561 558 assert_eq!(None, iter.next_back());
562 559 assert_eq!(None, iter.next());
563 560
564 561 let path = HgPath::new(b"a");
565 562 let mut iter = path.bytes();
566 563 assert_eq!(Some(&b'a'), iter.next_back());
567 564 assert_eq!(None, iter.next_back());
568 565 assert_eq!(None, iter.next());
569 566
570 567 let path = HgPath::new(b"abc");
571 568 let mut iter = path.bytes();
572 569 assert_eq!(Some(&b'a'), iter.next());
573 570 assert_eq!(Some(&b'c'), iter.next_back());
574 571 assert_eq!(Some(&b'b'), iter.next_back());
575 572 assert_eq!(None, iter.next_back());
576 573 assert_eq!(None, iter.next());
577 574
578 575 let path = HgPath::new(b"abc");
579 576 let mut iter = path.bytes();
580 577 assert_eq!(Some(&b'a'), iter.next());
581 578 assert_eq!(Some(&b'b'), iter.next());
582 579 assert_eq!(Some(&b'c'), iter.next());
583 580 assert_eq!(None, iter.next_back());
584 581 assert_eq!(None, iter.next());
585 582
586 583 let path = HgPath::new(b"abc");
587 584 let iter = path.bytes();
588 585 let mut vec = Vec::new();
589 586 vec.extend(iter);
590 587 assert_eq!(vec![b'a', b'b', b'c'], vec);
591 588
592 589 let path = HgPath::new(b"abc");
593 590 let mut iter = path.bytes();
594 591 assert_eq!(Some(2), iter.rposition(|c| *c == b'c'));
595 592
596 593 let path = HgPath::new(b"abc");
597 594 let mut iter = path.bytes();
598 595 assert_eq!(None, iter.rposition(|c| *c == b'd'));
599 596 }
600 597
601 598 #[test]
602 599 fn test_join() {
603 600 let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"b"));
604 601 assert_eq!(b"a/b", path.as_bytes());
605 602
606 603 let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"b/c"));
607 604 assert_eq!(b"a/b/c", path.as_bytes());
608 605
609 606 // No leading slash if empty before join
610 607 let path = HgPathBuf::new().join(HgPath::new(b"b/c"));
611 608 assert_eq!(b"b/c", path.as_bytes());
612 609
613 610 // The leading slash is an invalid representation of an `HgPath`, but
614 611 // it can happen. This creates another invalid representation of
615 612 // consecutive bytes.
616 613 // TODO What should be done in this case? Should we silently remove
617 614 // the extra slash? Should we change the signature to a problematic
618 615 // `Result<HgPathBuf, HgPathError>`, or should we just keep it so and
619 616 // let the error happen upon filesystem interaction?
620 617 let path = HgPathBuf::from_bytes(b"a/").join(HgPath::new(b"/b"));
621 618 assert_eq!(b"a//b", path.as_bytes());
622 619 let path = HgPathBuf::from_bytes(b"a").join(HgPath::new(b"/b"));
623 620 assert_eq!(b"a//b", path.as_bytes());
624 621 }
625 622
626 623 #[test]
627 624 fn test_relative_to() {
628 625 let path = HgPath::new(b"");
629 626 let base = HgPath::new(b"");
630 627 assert_eq!(Some(path), path.relative_to(base));
631 628
632 629 let path = HgPath::new(b"path");
633 630 let base = HgPath::new(b"");
634 631 assert_eq!(Some(path), path.relative_to(base));
635 632
636 633 let path = HgPath::new(b"a");
637 634 let base = HgPath::new(b"b");
638 635 assert_eq!(None, path.relative_to(base));
639 636
640 637 let path = HgPath::new(b"a/b");
641 638 let base = HgPath::new(b"a");
642 639 assert_eq!(None, path.relative_to(base));
643 640
644 641 let path = HgPath::new(b"a/b");
645 642 let base = HgPath::new(b"a/");
646 643 assert_eq!(Some(HgPath::new(b"b")), path.relative_to(base));
647 644
648 645 let path = HgPath::new(b"nested/path/to/b");
649 646 let base = HgPath::new(b"nested/path/");
650 647 assert_eq!(Some(HgPath::new(b"to/b")), path.relative_to(base));
651 648
652 649 let path = HgPath::new(b"ends/with/dir/");
653 650 let base = HgPath::new(b"ends/");
654 651 assert_eq!(Some(HgPath::new(b"with/dir/")), path.relative_to(base));
655 652 }
656 653
657 654 #[test]
658 655 #[cfg(unix)]
659 656 fn test_split_drive() {
660 657 // Taken from the Python stdlib's tests
661 658 assert_eq!(
662 659 HgPath::new(br"/foo/bar").split_drive(),
663 660 (HgPath::new(b""), HgPath::new(br"/foo/bar"))
664 661 );
665 662 assert_eq!(
666 663 HgPath::new(br"foo:bar").split_drive(),
667 664 (HgPath::new(b""), HgPath::new(br"foo:bar"))
668 665 );
669 666 assert_eq!(
670 667 HgPath::new(br":foo:bar").split_drive(),
671 668 (HgPath::new(b""), HgPath::new(br":foo:bar"))
672 669 );
673 670 // Also try NT paths; should not split them
674 671 assert_eq!(
675 672 HgPath::new(br"c:\foo\bar").split_drive(),
676 673 (HgPath::new(b""), HgPath::new(br"c:\foo\bar"))
677 674 );
678 675 assert_eq!(
679 676 HgPath::new(b"c:/foo/bar").split_drive(),
680 677 (HgPath::new(b""), HgPath::new(br"c:/foo/bar"))
681 678 );
682 679 assert_eq!(
683 680 HgPath::new(br"\\conky\mountpoint\foo\bar").split_drive(),
684 681 (
685 682 HgPath::new(b""),
686 683 HgPath::new(br"\\conky\mountpoint\foo\bar")
687 684 )
688 685 );
689 686 }
690 687
691 688 #[test]
692 689 #[cfg(windows)]
693 690 fn test_split_drive() {
694 691 assert_eq!(
695 692 HgPath::new(br"c:\foo\bar").split_drive(),
696 693 (HgPath::new(br"c:"), HgPath::new(br"\foo\bar"))
697 694 );
698 695 assert_eq!(
699 696 HgPath::new(b"c:/foo/bar").split_drive(),
700 697 (HgPath::new(br"c:"), HgPath::new(br"/foo/bar"))
701 698 );
702 699 assert_eq!(
703 700 HgPath::new(br"\\conky\mountpoint\foo\bar").split_drive(),
704 701 (
705 702 HgPath::new(br"\\conky\mountpoint"),
706 703 HgPath::new(br"\foo\bar")
707 704 )
708 705 );
709 706 assert_eq!(
710 707 HgPath::new(br"//conky/mountpoint/foo/bar").split_drive(),
711 708 (
712 709 HgPath::new(br"//conky/mountpoint"),
713 710 HgPath::new(br"/foo/bar")
714 711 )
715 712 );
716 713 assert_eq!(
717 714 HgPath::new(br"\\\conky\mountpoint\foo\bar").split_drive(),
718 715 (
719 716 HgPath::new(br""),
720 717 HgPath::new(br"\\\conky\mountpoint\foo\bar")
721 718 )
722 719 );
723 720 assert_eq!(
724 721 HgPath::new(br"///conky/mountpoint/foo/bar").split_drive(),
725 722 (
726 723 HgPath::new(br""),
727 724 HgPath::new(br"///conky/mountpoint/foo/bar")
728 725 )
729 726 );
730 727 assert_eq!(
731 728 HgPath::new(br"\\conky\\mountpoint\foo\bar").split_drive(),
732 729 (
733 730 HgPath::new(br""),
734 731 HgPath::new(br"\\conky\\mountpoint\foo\bar")
735 732 )
736 733 );
737 734 assert_eq!(
738 735 HgPath::new(br"//conky//mountpoint/foo/bar").split_drive(),
739 736 (
740 737 HgPath::new(br""),
741 738 HgPath::new(br"//conky//mountpoint/foo/bar")
742 739 )
743 740 );
744 741 // UNC part containing U+0130
745 742 assert_eq!(
746 743 HgPath::new(b"//conky/MOUNTPO\xc4\xb0NT/foo/bar").split_drive(),
747 744 (
748 745 HgPath::new(b"//conky/MOUNTPO\xc4\xb0NT"),
749 746 HgPath::new(br"/foo/bar")
750 747 )
751 748 );
752 749 }
753 750
754 751 #[test]
755 752 fn test_parent() {
756 753 let path = HgPath::new(b"");
757 754 assert_eq!(path.parent(), path);
758 755
759 756 let path = HgPath::new(b"a");
760 757 assert_eq!(path.parent(), HgPath::new(b""));
761 758
762 759 let path = HgPath::new(b"a/b");
763 760 assert_eq!(path.parent(), HgPath::new(b"a"));
764 761
765 762 let path = HgPath::new(b"a/other/b");
766 763 assert_eq!(path.parent(), HgPath::new(b"a/other"));
767 764 }
768 765 }
@@ -1,232 +1,232 b''
1 1 // path_auditor.rs
2 2 //
3 3 // Copyright 2020
4 4 // Raphaël Gomès <rgomes@octobus.net>,
5 5 //
6 6 // This software may be used and distributed according to the terms of the
7 7 // GNU General Public License version 2 or any later version.
8 8
9 9 use crate::utils::{
10 10 files::lower_clean,
11 11 find_slice_in_slice,
12 12 hg_path::{hg_path_to_path_buf, HgPath, HgPathBuf, HgPathError},
13 13 };
14 14 use std::collections::HashSet;
15 15 use std::path::{Path, PathBuf};
16 16 use std::sync::{Mutex, RwLock};
17 17
18 18 /// Ensures that a path is valid for use in the repository i.e. does not use
19 19 /// any banned components, does not traverse a symlink, etc.
20 20 #[derive(Debug, Default)]
21 21 pub struct PathAuditor {
22 22 audited: Mutex<HashSet<HgPathBuf>>,
23 23 audited_dirs: RwLock<HashSet<HgPathBuf>>,
24 24 root: PathBuf,
25 25 }
26 26
27 27 impl PathAuditor {
28 28 pub fn new(root: impl AsRef<Path>) -> Self {
29 29 Self {
30 30 root: root.as_ref().to_owned(),
31 31 ..Default::default()
32 32 }
33 33 }
34 34 pub fn audit_path(
35 35 &self,
36 36 path: impl AsRef<HgPath>,
37 37 ) -> Result<(), HgPathError> {
38 38 // TODO windows "localpath" normalization
39 39 let path = path.as_ref();
40 40 if path.is_empty() {
41 41 return Ok(());
42 42 }
43 43 // TODO case normalization
44 44 if self.audited.lock().unwrap().contains(path) {
45 45 return Ok(());
46 46 }
47 47 // AIX ignores "/" at end of path, others raise EISDIR.
48 48 let last_byte = path.as_bytes()[path.len() - 1];
49 49 if last_byte == b'/' || last_byte == b'\\' {
50 50 return Err(HgPathError::EndsWithSlash(path.to_owned()));
51 51 }
52 52 let parts: Vec<_> = path
53 53 .as_bytes()
54 54 .split(|b| std::path::is_separator(*b as char))
55 55 .collect();
56 56
57 57 let first_component = lower_clean(parts[0]);
58 58 let first_component = first_component.as_slice();
59 59 if !path.split_drive().0.is_empty()
60 60 || (first_component == b".hg"
61 61 || first_component == b".hg."
62 62 || first_component == b"")
63 63 || parts.iter().any(|c| c == b"..")
64 64 {
65 65 return Err(HgPathError::InsideDotHg(path.to_owned()));
66 66 }
67 67
68 68 // Windows shortname aliases
69 69 for part in parts.iter() {
70 70 if part.contains(&b'~') {
71 71 let mut split = part.splitn(2, |b| *b == b'~');
72 72 let first =
73 73 split.next().unwrap().to_owned().to_ascii_uppercase();
74 74 let last = split.next().unwrap();
75 75 if last.iter().all(u8::is_ascii_digit)
76 76 && (first == b"HG" || first == b"HG8B6C")
77 77 {
78 78 return Err(HgPathError::ContainsIllegalComponent(
79 79 path.to_owned(),
80 80 ));
81 81 }
82 82 }
83 83 }
84 84 let lower_path = lower_clean(path.as_bytes());
85 85 if find_slice_in_slice(&lower_path, b".hg").is_some() {
86 86 let lower_parts: Vec<_> = path
87 87 .as_bytes()
88 88 .split(|b| std::path::is_separator(*b as char))
89 89 .collect();
90 90 for pattern in [b".hg".to_vec(), b".hg.".to_vec()].iter() {
91 91 if let Some(pos) = lower_parts[1..]
92 92 .iter()
93 93 .position(|part| part == &pattern.as_slice())
94 94 {
95 95 let base = lower_parts[..=pos]
96 96 .iter()
97 97 .fold(HgPathBuf::new(), |acc, p| {
98 98 acc.join(HgPath::new(p))
99 99 });
100 100 return Err(HgPathError::IsInsideNestedRepo {
101 101 path: path.to_owned(),
102 102 nested_repo: base,
103 103 });
104 104 }
105 105 }
106 106 }
107 107
108 108 let parts = &parts[..parts.len().saturating_sub(1)];
109 109
110 110 // We don't want to add "foo/bar/baz" to `audited_dirs` before checking
111 111 // if there's a "foo/.hg" directory. This also means we won't
112 112 // accidentally traverse a symlink into some other filesystem (which
113 113 // is potentially expensive to access).
114 114 for index in 0..parts.len() {
115 let prefix = &parts[..index + 1].join(&b'/');
115 let prefix = &parts[..=index].join(&b'/');
116 116 let prefix = HgPath::new(prefix);
117 117 if self.audited_dirs.read().unwrap().contains(prefix) {
118 118 continue;
119 119 }
120 120 self.check_filesystem(&prefix, &path)?;
121 121 self.audited_dirs.write().unwrap().insert(prefix.to_owned());
122 122 }
123 123
124 124 self.audited.lock().unwrap().insert(path.to_owned());
125 125
126 126 Ok(())
127 127 }
128 128
129 129 pub fn check_filesystem(
130 130 &self,
131 131 prefix: impl AsRef<HgPath>,
132 132 path: impl AsRef<HgPath>,
133 133 ) -> Result<(), HgPathError> {
134 134 let prefix = prefix.as_ref();
135 135 let path = path.as_ref();
136 136 let current_path = self.root.join(
137 137 hg_path_to_path_buf(prefix)
138 138 .map_err(|_| HgPathError::NotFsCompliant(path.to_owned()))?,
139 139 );
140 140 match std::fs::symlink_metadata(&current_path) {
141 141 Err(e) => {
142 142 // EINVAL can be raised as invalid path syntax under win32.
143 143 if e.kind() != std::io::ErrorKind::NotFound
144 144 && e.kind() != std::io::ErrorKind::InvalidInput
145 145 && e.raw_os_error() != Some(20)
146 146 {
147 147 // Rust does not yet have an `ErrorKind` for
148 148 // `NotADirectory` (errno 20)
149 149 // It happens if the dirstate contains `foo/bar` and
150 150 // foo is not a directory
151 151 return Err(HgPathError::NotFsCompliant(path.to_owned()));
152 152 }
153 153 }
154 154 Ok(meta) => {
155 155 if meta.file_type().is_symlink() {
156 156 return Err(HgPathError::TraversesSymbolicLink {
157 157 path: path.to_owned(),
158 158 symlink: prefix.to_owned(),
159 159 });
160 160 }
161 161 if meta.file_type().is_dir()
162 162 && current_path.join(".hg").is_dir()
163 163 {
164 164 return Err(HgPathError::IsInsideNestedRepo {
165 165 path: path.to_owned(),
166 166 nested_repo: prefix.to_owned(),
167 167 });
168 168 }
169 169 }
170 170 };
171 171
172 172 Ok(())
173 173 }
174 174
175 175 pub fn check(&self, path: impl AsRef<HgPath>) -> bool {
176 176 self.audit_path(path).is_ok()
177 177 }
178 178 }
179 179
180 180 #[cfg(test)]
181 181 mod tests {
182 182 use super::*;
183 183 use crate::utils::files::get_path_from_bytes;
184 184 use crate::utils::hg_path::path_to_hg_path_buf;
185 185
186 186 #[test]
187 187 fn test_path_auditor() {
188 188 let auditor = PathAuditor::new(get_path_from_bytes(b"/tmp"));
189 189
190 190 let path = HgPath::new(b".hg/00changelog.i");
191 191 assert_eq!(
192 192 auditor.audit_path(path),
193 193 Err(HgPathError::InsideDotHg(path.to_owned()))
194 194 );
195 195 let path = HgPath::new(b"this/is/nested/.hg/thing.txt");
196 196 assert_eq!(
197 197 auditor.audit_path(path),
198 198 Err(HgPathError::IsInsideNestedRepo {
199 199 path: path.to_owned(),
200 200 nested_repo: HgPathBuf::from_bytes(b"this/is/nested")
201 201 })
202 202 );
203 203
204 204 use std::fs::{create_dir, File};
205 205 use tempfile::tempdir;
206 206
207 207 let base_dir = tempdir().unwrap();
208 208 let base_dir_path = base_dir.path();
209 209 let a = base_dir_path.join("a");
210 210 let b = base_dir_path.join("b");
211 211 create_dir(&a).unwrap();
212 212 let in_a_path = a.join("in_a");
213 213 File::create(in_a_path).unwrap();
214 214
215 215 // TODO make portable
216 216 std::os::unix::fs::symlink(&a, &b).unwrap();
217 217
218 218 let buf = b.join("in_a").components().skip(2).collect::<PathBuf>();
219 219 eprintln!("buf: {}", buf.display());
220 220 let path = path_to_hg_path_buf(buf).unwrap();
221 221 assert_eq!(
222 222 auditor.audit_path(&path),
223 223 Err(HgPathError::TraversesSymbolicLink {
224 224 path: path,
225 225 symlink: path_to_hg_path_buf(
226 226 b.components().skip(2).collect::<PathBuf>()
227 227 )
228 228 .unwrap()
229 229 })
230 230 );
231 231 }
232 232 }
@@ -1,179 +1,176 b''
1 1 // cindex.rs
2 2 //
3 3 // Copyright 2018 Georges Racinet <gracinet@anybox.fr>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Bindings to use the Index defined by the parsers C extension
9 9 //!
10 10 //! Ideally, we should use an Index entirely implemented in Rust,
11 11 //! but this will take some time to get there.
12 12
13 13 use cpython::{
14 14 exc::ImportError, ObjectProtocol, PyClone, PyErr, PyObject, PyResult,
15 15 PyTuple, Python, PythonObject,
16 16 };
17 17 use hg::revlog::{Node, RevlogIndex};
18 18 use hg::{Graph, GraphError, Revision, WORKING_DIRECTORY_REVISION};
19 19 use libc::c_int;
20 20
21 21 const REVLOG_CABI_VERSION: c_int = 2;
22 22
23 23 #[repr(C)]
24 24 pub struct Revlog_CAPI {
25 25 abi_version: c_int,
26 26 index_length:
27 27 unsafe extern "C" fn(index: *mut revlog_capi::RawPyObject) -> c_int,
28 28 index_node: unsafe extern "C" fn(
29 29 index: *mut revlog_capi::RawPyObject,
30 30 rev: c_int,
31 31 ) -> *const Node,
32 32 index_parents: unsafe extern "C" fn(
33 33 index: *mut revlog_capi::RawPyObject,
34 34 rev: c_int,
35 35 ps: *mut [c_int; 2],
36 36 ) -> c_int,
37 37 }
38 38
39 39 py_capsule!(
40 40 from mercurial.cext.parsers import revlog_CAPI
41 41 as revlog_capi for Revlog_CAPI);
42 42
43 43 /// A `Graph` backed up by objects and functions from revlog.c
44 44 ///
45 45 /// This implementation of the `Graph` trait, relies on (pointers to)
46 46 /// - the C index object (`index` member)
47 47 /// - the `index_get_parents()` function (`parents` member)
48 48 ///
49 49 /// # Safety
50 50 ///
51 51 /// The C index itself is mutable, and this Rust exposition is **not
52 52 /// protected by the GIL**, meaning that this construct isn't safe with respect
53 53 /// to Python threads.
54 54 ///
55 55 /// All callers of this `Index` must acquire the GIL and must not release it
56 56 /// while working.
57 57 ///
58 58 /// # TODO find a solution to make it GIL safe again.
59 59 ///
60 60 /// This is non trivial, and can wait until we have a clearer picture with
61 61 /// more Rust Mercurial constructs.
62 62 ///
63 63 /// One possibility would be to a `GILProtectedIndex` wrapper enclosing
64 64 /// a `Python<'p>` marker and have it be the one implementing the
65 65 /// `Graph` trait, but this would mean the `Graph` implementor would become
66 66 /// likely to change between subsequent method invocations of the `hg-core`
67 67 /// objects (a serious change of the `hg-core` API):
68 68 /// either exposing ways to mutate the `Graph`, or making it a non persistent
69 69 /// parameter in the relevant methods that need one.
70 70 ///
71 71 /// Another possibility would be to introduce an abstract lock handle into
72 72 /// the core API, that would be tied to `GILGuard` / `Python<'p>`
73 73 /// in the case of the `cpython` crate bindings yet could leave room for other
74 74 /// mechanisms in other contexts.
75 75 pub struct Index {
76 76 index: PyObject,
77 77 capi: &'static Revlog_CAPI,
78 78 }
79 79
80 80 impl Index {
81 81 pub fn new(py: Python, index: PyObject) -> PyResult<Self> {
82 82 let capi = unsafe { revlog_capi::retrieve(py)? };
83 83 if capi.abi_version != REVLOG_CABI_VERSION {
84 84 return Err(PyErr::new::<ImportError, _>(
85 85 py,
86 86 format!(
87 87 "ABI version mismatch: the C ABI revlog version {} \
88 88 does not match the {} expected by Rust hg-cpython",
89 89 capi.abi_version, REVLOG_CABI_VERSION
90 90 ),
91 91 ));
92 92 }
93 Ok(Index {
94 index: index,
95 capi: capi,
96 })
93 Ok(Index { index, capi })
97 94 }
98 95
99 96 /// return a reference to the CPython Index object in this Struct
100 97 pub fn inner(&self) -> &PyObject {
101 98 &self.index
102 99 }
103 100
104 101 pub fn append(&mut self, py: Python, tup: PyTuple) -> PyResult<PyObject> {
105 102 self.index.call_method(
106 103 py,
107 104 "append",
108 105 PyTuple::new(py, &[tup.into_object()]),
109 106 None,
110 107 )
111 108 }
112 109 }
113 110
114 111 impl Clone for Index {
115 112 fn clone(&self) -> Self {
116 113 let guard = Python::acquire_gil();
117 114 Index {
118 115 index: self.index.clone_ref(guard.python()),
119 116 capi: self.capi,
120 117 }
121 118 }
122 119 }
123 120
124 121 impl PyClone for Index {
125 122 fn clone_ref(&self, py: Python) -> Self {
126 123 Index {
127 124 index: self.index.clone_ref(py),
128 125 capi: self.capi,
129 126 }
130 127 }
131 128 }
132 129
133 130 impl Graph for Index {
134 131 /// wrap a call to the C extern parents function
135 132 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
136 133 if rev == WORKING_DIRECTORY_REVISION {
137 134 return Err(GraphError::WorkingDirectoryUnsupported);
138 135 }
139 136 let mut res: [c_int; 2] = [0; 2];
140 137 let code = unsafe {
141 138 (self.capi.index_parents)(
142 139 self.index.as_ptr(),
143 140 rev as c_int,
144 141 &mut res as *mut [c_int; 2],
145 142 )
146 143 };
147 144 match code {
148 145 0 => Ok(res),
149 146 _ => Err(GraphError::ParentOutOfRange(rev)),
150 147 }
151 148 }
152 149 }
153 150
154 151 impl RevlogIndex for Index {
155 152 /// Note C return type is Py_ssize_t (hence signed), but we shall
156 153 /// force it to unsigned, because it's a length
157 154 fn len(&self) -> usize {
158 155 unsafe { (self.capi.index_length)(self.index.as_ptr()) as usize }
159 156 }
160 157
161 fn node<'a>(&'a self, rev: Revision) -> Option<&'a Node> {
158 fn node(&self, rev: Revision) -> Option<&Node> {
162 159 let raw = unsafe {
163 160 (self.capi.index_node)(self.index.as_ptr(), rev as c_int)
164 161 };
165 162 if raw.is_null() {
166 163 None
167 164 } else {
168 165 // TODO it would be much better for the C layer to give us
169 166 // a length, since the hash length will change in the near
170 167 // future, but that's probably out of scope for the nodemap
171 168 // patch series.
172 169 //
173 170 // The root of that unsafety relies in the signature of
174 171 // `capi.index_node()` itself: returning a `Node` pointer
175 172 // whereas it's a `char *` in the C counterpart.
176 173 Some(unsafe { &*raw })
177 174 }
178 175 }
179 176 }
@@ -1,118 +1,118 b''
1 1 // copymap.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Bindings for `hg::dirstate::dirstate_map::CopyMap` provided by the
9 9 //! `hg-core` package.
10 10
11 11 use cpython::{
12 12 PyBytes, PyClone, PyDict, PyObject, PyResult, Python, UnsafePyLeaked,
13 13 };
14 14 use std::cell::RefCell;
15 15
16 16 use crate::dirstate::dirstate_map::DirstateMap;
17 17 use hg::{utils::hg_path::HgPathBuf, CopyMapIter};
18 18
19 19 py_class!(pub class CopyMap |py| {
20 20 data dirstate_map: DirstateMap;
21 21
22 22 def __getitem__(&self, key: PyObject) -> PyResult<PyBytes> {
23 23 (*self.dirstate_map(py)).copymapgetitem(py, key)
24 24 }
25 25
26 26 def __len__(&self) -> PyResult<usize> {
27 27 self.dirstate_map(py).copymaplen(py)
28 28 }
29 29
30 30 def __contains__(&self, key: PyObject) -> PyResult<bool> {
31 31 self.dirstate_map(py).copymapcontains(py, key)
32 32 }
33 33
34 34 def get(
35 35 &self,
36 36 key: PyObject,
37 37 default: Option<PyObject> = None
38 38 ) -> PyResult<Option<PyObject>> {
39 39 self.dirstate_map(py).copymapget(py, key, default)
40 40 }
41 41
42 42 def pop(
43 43 &self,
44 44 key: PyObject,
45 45 default: Option<PyObject> = None
46 46 ) -> PyResult<Option<PyObject>> {
47 47 self.dirstate_map(py).copymappop(py, key, default)
48 48 }
49 49
50 50 def __iter__(&self) -> PyResult<CopyMapKeysIterator> {
51 51 self.dirstate_map(py).copymapiter(py)
52 52 }
53 53
54 54 // Python's `dict()` builtin works with either a subclass of dict
55 55 // or an abstract mapping. Said mapping needs to implement `__getitem__`
56 56 // and `keys`.
57 57 def keys(&self) -> PyResult<CopyMapKeysIterator> {
58 58 self.dirstate_map(py).copymapiter(py)
59 59 }
60 60
61 61 def items(&self) -> PyResult<CopyMapItemsIterator> {
62 62 self.dirstate_map(py).copymapitemsiter(py)
63 63 }
64 64
65 65 def iteritems(&self) -> PyResult<CopyMapItemsIterator> {
66 66 self.dirstate_map(py).copymapitemsiter(py)
67 67 }
68 68
69 69 def __setitem__(
70 70 &self,
71 71 key: PyObject,
72 72 item: PyObject
73 73 ) -> PyResult<()> {
74 74 self.dirstate_map(py).copymapsetitem(py, key, item)?;
75 75 Ok(())
76 76 }
77 77
78 78 def copy(&self) -> PyResult<PyDict> {
79 79 self.dirstate_map(py).copymapcopy(py)
80 80 }
81 81
82 82 });
83 83
84 84 impl CopyMap {
85 85 pub fn from_inner(py: Python, dm: DirstateMap) -> PyResult<Self> {
86 86 Self::create_instance(py, dm)
87 87 }
88 88 fn translate_key(
89 89 py: Python,
90 90 res: (&HgPathBuf, &HgPathBuf),
91 91 ) -> PyResult<Option<PyBytes>> {
92 Ok(Some(PyBytes::new(py, res.0.as_ref())))
92 Ok(Some(PyBytes::new(py, res.0.as_bytes())))
93 93 }
94 94 fn translate_key_value(
95 95 py: Python,
96 96 res: (&HgPathBuf, &HgPathBuf),
97 97 ) -> PyResult<Option<(PyBytes, PyBytes)>> {
98 98 let (k, v) = res;
99 99 Ok(Some((
100 PyBytes::new(py, k.as_ref()),
101 PyBytes::new(py, v.as_ref()),
100 PyBytes::new(py, k.as_bytes()),
101 PyBytes::new(py, v.as_bytes()),
102 102 )))
103 103 }
104 104 }
105 105
106 106 py_shared_iterator!(
107 107 CopyMapKeysIterator,
108 108 UnsafePyLeaked<CopyMapIter<'static>>,
109 109 CopyMap::translate_key,
110 110 Option<PyBytes>
111 111 );
112 112
113 113 py_shared_iterator!(
114 114 CopyMapItemsIterator,
115 115 UnsafePyLeaked<CopyMapIter<'static>>,
116 116 CopyMap::translate_key_value,
117 117 Option<(PyBytes, PyBytes)>
118 118 );
@@ -1,140 +1,140 b''
1 1 // dirs_multiset.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Bindings for the `hg::dirstate::dirs_multiset` file provided by the
9 9 //! `hg-core` package.
10 10
11 11 use std::cell::RefCell;
12 12 use std::convert::TryInto;
13 13
14 14 use cpython::{
15 15 exc, ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyObject, PyResult,
16 16 Python, UnsafePyLeaked,
17 17 };
18 18
19 19 use crate::dirstate::extract_dirstate;
20 20 use hg::{
21 21 utils::hg_path::{HgPath, HgPathBuf},
22 22 DirsMultiset, DirsMultisetIter, DirstateMapError, DirstateParseError,
23 23 EntryState,
24 24 };
25 25
26 26 py_class!(pub class Dirs |py| {
27 27 @shared data inner: DirsMultiset;
28 28
29 29 // `map` is either a `dict` or a flat iterator (usually a `set`, sometimes
30 30 // a `list`)
31 31 def __new__(
32 32 _cls,
33 33 map: PyObject,
34 34 skip: Option<PyObject> = None
35 35 ) -> PyResult<Self> {
36 36 let mut skip_state: Option<EntryState> = None;
37 37 if let Some(skip) = skip {
38 38 skip_state = Some(
39 39 skip.extract::<PyBytes>(py)?.data(py)[0]
40 40 .try_into()
41 41 .map_err(|e: DirstateParseError| {
42 42 PyErr::new::<exc::ValueError, _>(py, e.to_string())
43 43 })?,
44 44 );
45 45 }
46 46 let inner = if let Ok(map) = map.cast_as::<PyDict>(py) {
47 47 let dirstate = extract_dirstate(py, &map)?;
48 48 DirsMultiset::from_dirstate(&dirstate, skip_state)
49 49 .map_err(|e| {
50 50 PyErr::new::<exc::ValueError, _>(py, e.to_string())
51 51 })?
52 52 } else {
53 53 let map: Result<Vec<HgPathBuf>, PyErr> = map
54 54 .iter(py)?
55 55 .map(|o| {
56 56 Ok(HgPathBuf::from_bytes(
57 57 o?.extract::<PyBytes>(py)?.data(py),
58 58 ))
59 59 })
60 60 .collect();
61 61 DirsMultiset::from_manifest(&map?)
62 62 .map_err(|e| {
63 63 PyErr::new::<exc::ValueError, _>(py, e.to_string())
64 64 })?
65 65 };
66 66
67 67 Self::create_instance(py, inner)
68 68 }
69 69
70 70 def addpath(&self, path: PyObject) -> PyResult<PyObject> {
71 71 self.inner(py).borrow_mut().add_path(
72 72 HgPath::new(path.extract::<PyBytes>(py)?.data(py)),
73 73 ).and(Ok(py.None())).or_else(|e| {
74 74 match e {
75 75 DirstateMapError::EmptyPath => {
76 76 Ok(py.None())
77 77 },
78 78 e => {
79 79 Err(PyErr::new::<exc::ValueError, _>(
80 80 py,
81 81 e.to_string(),
82 82 ))
83 83 }
84 84 }
85 85 })
86 86 }
87 87
88 88 def delpath(&self, path: PyObject) -> PyResult<PyObject> {
89 89 self.inner(py).borrow_mut().delete_path(
90 90 HgPath::new(path.extract::<PyBytes>(py)?.data(py)),
91 91 )
92 92 .and(Ok(py.None()))
93 93 .or_else(|e| {
94 94 match e {
95 95 DirstateMapError::EmptyPath => {
96 96 Ok(py.None())
97 97 },
98 98 e => {
99 99 Err(PyErr::new::<exc::ValueError, _>(
100 100 py,
101 101 e.to_string(),
102 102 ))
103 103 }
104 104 }
105 105 })
106 106 }
107 107 def __iter__(&self) -> PyResult<DirsMultisetKeysIterator> {
108 108 let leaked_ref = self.inner(py).leak_immutable();
109 109 DirsMultisetKeysIterator::from_inner(
110 110 py,
111 111 unsafe { leaked_ref.map(py, |o| o.iter()) },
112 112 )
113 113 }
114 114
115 115 def __contains__(&self, item: PyObject) -> PyResult<bool> {
116 116 Ok(self.inner(py).borrow().contains(HgPath::new(
117 117 item.extract::<PyBytes>(py)?.data(py).as_ref(),
118 118 )))
119 119 }
120 120 });
121 121
122 122 impl Dirs {
123 123 pub fn from_inner(py: Python, d: DirsMultiset) -> PyResult<Self> {
124 124 Self::create_instance(py, d)
125 125 }
126 126
127 127 fn translate_key(
128 128 py: Python,
129 129 res: &HgPathBuf,
130 130 ) -> PyResult<Option<PyBytes>> {
131 Ok(Some(PyBytes::new(py, res.as_ref())))
131 Ok(Some(PyBytes::new(py, res.as_bytes())))
132 132 }
133 133 }
134 134
135 135 py_shared_iterator!(
136 136 DirsMultisetKeysIterator,
137 137 UnsafePyLeaked<DirsMultisetIter<'static>>,
138 138 Dirs::translate_key,
139 139 Option<PyBytes>
140 140 );
@@ -1,586 +1,590 b''
1 1 // dirstate_map.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Bindings for the `hg::dirstate::dirstate_map` file provided by the
9 9 //! `hg-core` package.
10 10
11 11 use std::cell::{Ref, RefCell};
12 12 use std::convert::TryInto;
13 13 use std::time::Duration;
14 14
15 15 use cpython::{
16 16 exc, ObjectProtocol, PyBool, PyBytes, PyClone, PyDict, PyErr, PyList,
17 17 PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
18 18 UnsafePyLeaked,
19 19 };
20 20
21 21 use crate::{
22 22 dirstate::copymap::{CopyMap, CopyMapItemsIterator, CopyMapKeysIterator},
23 23 dirstate::non_normal_entries::{
24 24 NonNormalEntries, NonNormalEntriesIterator,
25 25 },
26 26 dirstate::{dirs_multiset::Dirs, make_dirstate_tuple},
27 27 };
28 28 use hg::{
29 29 utils::hg_path::{HgPath, HgPathBuf},
30 30 DirsMultiset, DirstateEntry, DirstateMap as RustDirstateMap,
31 31 DirstateMapError, DirstateParents, DirstateParseError, EntryState,
32 32 StateMapIter, PARENT_SIZE,
33 33 };
34 34
35 35 // TODO
36 36 // This object needs to share references to multiple members of its Rust
37 37 // inner struct, namely `copy_map`, `dirs` and `all_dirs`.
38 38 // Right now `CopyMap` is done, but it needs to have an explicit reference
39 39 // to `RustDirstateMap` which itself needs to have an encapsulation for
40 40 // every method in `CopyMap` (copymapcopy, etc.).
41 41 // This is ugly and hard to maintain.
42 42 // The same logic applies to `dirs` and `all_dirs`, however the `Dirs`
43 43 // `py_class!` is already implemented and does not mention
44 44 // `RustDirstateMap`, rightfully so.
45 45 // All attributes also have to have a separate refcount data attribute for
46 46 // leaks, with all methods that go along for reference sharing.
47 47 py_class!(pub class DirstateMap |py| {
48 48 @shared data inner: RustDirstateMap;
49 49
50 50 def __new__(_cls, _root: PyObject) -> PyResult<Self> {
51 51 let inner = RustDirstateMap::default();
52 52 Self::create_instance(py, inner)
53 53 }
54 54
55 55 def clear(&self) -> PyResult<PyObject> {
56 56 self.inner(py).borrow_mut().clear();
57 57 Ok(py.None())
58 58 }
59 59
60 60 def get(
61 61 &self,
62 62 key: PyObject,
63 63 default: Option<PyObject> = None
64 64 ) -> PyResult<Option<PyObject>> {
65 65 let key = key.extract::<PyBytes>(py)?;
66 66 match self.inner(py).borrow().get(HgPath::new(key.data(py))) {
67 67 Some(entry) => {
68 68 Ok(Some(make_dirstate_tuple(py, entry)?))
69 69 },
70 70 None => Ok(default)
71 71 }
72 72 }
73 73
74 74 def addfile(
75 75 &self,
76 76 f: PyObject,
77 77 oldstate: PyObject,
78 78 state: PyObject,
79 79 mode: PyObject,
80 80 size: PyObject,
81 81 mtime: PyObject
82 82 ) -> PyResult<PyObject> {
83 83 self.inner(py).borrow_mut().add_file(
84 84 HgPath::new(f.extract::<PyBytes>(py)?.data(py)),
85 85 oldstate.extract::<PyBytes>(py)?.data(py)[0]
86 86 .try_into()
87 87 .map_err(|e: DirstateParseError| {
88 88 PyErr::new::<exc::ValueError, _>(py, e.to_string())
89 89 })?,
90 90 DirstateEntry {
91 91 state: state.extract::<PyBytes>(py)?.data(py)[0]
92 92 .try_into()
93 93 .map_err(|e: DirstateParseError| {
94 94 PyErr::new::<exc::ValueError, _>(py, e.to_string())
95 95 })?,
96 96 mode: mode.extract(py)?,
97 97 size: size.extract(py)?,
98 98 mtime: mtime.extract(py)?,
99 99 },
100 100 ).and(Ok(py.None())).or_else(|e: DirstateMapError| {
101 101 Err(PyErr::new::<exc::ValueError, _>(py, e.to_string()))
102 102 })
103 103 }
104 104
105 105 def removefile(
106 106 &self,
107 107 f: PyObject,
108 108 oldstate: PyObject,
109 109 size: PyObject
110 110 ) -> PyResult<PyObject> {
111 111 self.inner(py).borrow_mut()
112 112 .remove_file(
113 113 HgPath::new(f.extract::<PyBytes>(py)?.data(py)),
114 114 oldstate.extract::<PyBytes>(py)?.data(py)[0]
115 115 .try_into()
116 116 .map_err(|e: DirstateParseError| {
117 117 PyErr::new::<exc::ValueError, _>(py, e.to_string())
118 118 })?,
119 119 size.extract(py)?,
120 120 )
121 121 .or_else(|_| {
122 122 Err(PyErr::new::<exc::OSError, _>(
123 123 py,
124 124 "Dirstate error".to_string(),
125 125 ))
126 126 })?;
127 127 Ok(py.None())
128 128 }
129 129
130 130 def dropfile(
131 131 &self,
132 132 f: PyObject,
133 133 oldstate: PyObject
134 134 ) -> PyResult<PyBool> {
135 135 self.inner(py).borrow_mut()
136 136 .drop_file(
137 137 HgPath::new(f.extract::<PyBytes>(py)?.data(py)),
138 138 oldstate.extract::<PyBytes>(py)?.data(py)[0]
139 139 .try_into()
140 140 .map_err(|e: DirstateParseError| {
141 141 PyErr::new::<exc::ValueError, _>(py, e.to_string())
142 142 })?,
143 143 )
144 144 .and_then(|b| Ok(b.to_py_object(py)))
145 145 .or_else(|_| {
146 146 Err(PyErr::new::<exc::OSError, _>(
147 147 py,
148 148 "Dirstate error".to_string(),
149 149 ))
150 150 })
151 151 }
152 152
153 153 def clearambiguoustimes(
154 154 &self,
155 155 files: PyObject,
156 156 now: PyObject
157 157 ) -> PyResult<PyObject> {
158 158 let files: PyResult<Vec<HgPathBuf>> = files
159 159 .iter(py)?
160 160 .map(|filename| {
161 161 Ok(HgPathBuf::from_bytes(
162 162 filename?.extract::<PyBytes>(py)?.data(py),
163 163 ))
164 164 })
165 165 .collect();
166 166 self.inner(py).borrow_mut()
167 167 .clear_ambiguous_times(files?, now.extract(py)?);
168 168 Ok(py.None())
169 169 }
170 170
171 171 def other_parent_entries(&self) -> PyResult<PyObject> {
172 172 let mut inner_shared = self.inner(py).borrow_mut();
173 173 let (_, other_parent) =
174 174 inner_shared.get_non_normal_other_parent_entries();
175 175
176 176 let locals = PyDict::new(py);
177 177 locals.set_item(
178 178 py,
179 179 "other_parent",
180 180 other_parent
181 181 .iter()
182 .map(|v| PyBytes::new(py, v.as_ref()))
182 .map(|v| PyBytes::new(py, v.as_bytes()))
183 183 .collect::<Vec<PyBytes>>()
184 184 .to_py_object(py),
185 185 )?;
186 186
187 187 py.eval("set(other_parent)", None, Some(&locals))
188 188 }
189 189
190 190 def non_normal_entries(&self) -> PyResult<NonNormalEntries> {
191 191 NonNormalEntries::from_inner(py, self.clone_ref(py))
192 192 }
193 193
194 194 def non_normal_entries_contains(&self, key: PyObject) -> PyResult<bool> {
195 195 let key = key.extract::<PyBytes>(py)?;
196 196 Ok(self
197 197 .inner(py)
198 198 .borrow_mut()
199 199 .get_non_normal_other_parent_entries().0
200 200 .contains(HgPath::new(key.data(py))))
201 201 }
202 202
203 203 def non_normal_entries_display(&self) -> PyResult<PyString> {
204 204 Ok(
205 205 PyString::new(
206 206 py,
207 207 &format!(
208 208 "NonNormalEntries: {:?}",
209 209 self
210 210 .inner(py)
211 211 .borrow_mut()
212 212 .get_non_normal_other_parent_entries().0
213 213 .iter().map(|o| o))
214 214 )
215 215 )
216 216 }
217 217
218 218 def non_normal_entries_remove(&self, key: PyObject) -> PyResult<PyObject> {
219 219 let key = key.extract::<PyBytes>(py)?;
220 220 self
221 221 .inner(py)
222 222 .borrow_mut()
223 223 .non_normal_entries_remove(HgPath::new(key.data(py)));
224 224 Ok(py.None())
225 225 }
226 226
227 227 def non_normal_entries_union(&self, other: PyObject) -> PyResult<PyList> {
228 228 let other: PyResult<_> = other.iter(py)?
229 229 .map(|f| {
230 230 Ok(HgPathBuf::from_bytes(
231 231 f?.extract::<PyBytes>(py)?.data(py),
232 232 ))
233 233 })
234 234 .collect();
235 235
236 236 let res = self
237 237 .inner(py)
238 238 .borrow_mut()
239 239 .non_normal_entries_union(other?);
240 240
241 241 let ret = PyList::new(py, &[]);
242 242 for filename in res.iter() {
243 243 let as_pystring = PyBytes::new(py, filename.as_bytes());
244 244 ret.append(py, as_pystring.into_object());
245 245 }
246 246 Ok(ret)
247 247 }
248 248
249 249 def non_normal_entries_iter(&self) -> PyResult<NonNormalEntriesIterator> {
250 250 // Make sure the sets are defined before we no longer have a mutable
251 251 // reference to the dmap.
252 252 self.inner(py)
253 253 .borrow_mut()
254 254 .set_non_normal_other_parent_entries(false);
255 255
256 256 let leaked_ref = self.inner(py).leak_immutable();
257 257
258 258 NonNormalEntriesIterator::from_inner(py, unsafe {
259 259 leaked_ref.map(py, |o| {
260 260 o.get_non_normal_other_parent_entries_panic().0.iter()
261 261 })
262 262 })
263 263 }
264 264
265 265 def hastrackeddir(&self, d: PyObject) -> PyResult<PyBool> {
266 266 let d = d.extract::<PyBytes>(py)?;
267 267 Ok(self.inner(py).borrow_mut()
268 268 .has_tracked_dir(HgPath::new(d.data(py)))
269 269 .map_err(|e| {
270 270 PyErr::new::<exc::ValueError, _>(py, e.to_string())
271 271 })?
272 272 .to_py_object(py))
273 273 }
274 274
275 275 def hasdir(&self, d: PyObject) -> PyResult<PyBool> {
276 276 let d = d.extract::<PyBytes>(py)?;
277 277 Ok(self.inner(py).borrow_mut()
278 278 .has_dir(HgPath::new(d.data(py)))
279 279 .map_err(|e| {
280 280 PyErr::new::<exc::ValueError, _>(py, e.to_string())
281 281 })?
282 282 .to_py_object(py))
283 283 }
284 284
285 285 def parents(&self, st: PyObject) -> PyResult<PyTuple> {
286 286 self.inner(py).borrow_mut()
287 287 .parents(st.extract::<PyBytes>(py)?.data(py))
288 288 .and_then(|d| {
289 289 Ok((PyBytes::new(py, &d.p1), PyBytes::new(py, &d.p2))
290 290 .to_py_object(py))
291 291 })
292 292 .or_else(|_| {
293 293 Err(PyErr::new::<exc::OSError, _>(
294 294 py,
295 295 "Dirstate error".to_string(),
296 296 ))
297 297 })
298 298 }
299 299
300 300 def setparents(&self, p1: PyObject, p2: PyObject) -> PyResult<PyObject> {
301 301 let p1 = extract_node_id(py, &p1)?;
302 302 let p2 = extract_node_id(py, &p2)?;
303 303
304 304 self.inner(py).borrow_mut()
305 305 .set_parents(&DirstateParents { p1, p2 });
306 306 Ok(py.None())
307 307 }
308 308
309 309 def read(&self, st: PyObject) -> PyResult<Option<PyObject>> {
310 310 match self.inner(py).borrow_mut()
311 311 .read(st.extract::<PyBytes>(py)?.data(py))
312 312 {
313 313 Ok(Some(parents)) => Ok(Some(
314 314 (PyBytes::new(py, &parents.p1), PyBytes::new(py, &parents.p2))
315 315 .to_py_object(py)
316 316 .into_object(),
317 317 )),
318 318 Ok(None) => Ok(Some(py.None())),
319 319 Err(_) => Err(PyErr::new::<exc::OSError, _>(
320 320 py,
321 321 "Dirstate error".to_string(),
322 322 )),
323 323 }
324 324 }
325 325 def write(
326 326 &self,
327 327 p1: PyObject,
328 328 p2: PyObject,
329 329 now: PyObject
330 330 ) -> PyResult<PyBytes> {
331 331 let now = Duration::new(now.extract(py)?, 0);
332 332 let parents = DirstateParents {
333 333 p1: extract_node_id(py, &p1)?,
334 334 p2: extract_node_id(py, &p2)?,
335 335 };
336 336
337 337 match self.inner(py).borrow_mut().pack(parents, now) {
338 338 Ok(packed) => Ok(PyBytes::new(py, &packed)),
339 339 Err(_) => Err(PyErr::new::<exc::OSError, _>(
340 340 py,
341 341 "Dirstate error".to_string(),
342 342 )),
343 343 }
344 344 }
345 345
346 346 def filefoldmapasdict(&self) -> PyResult<PyDict> {
347 347 let dict = PyDict::new(py);
348 348 for (key, value) in
349 349 self.inner(py).borrow_mut().build_file_fold_map().iter()
350 350 {
351 dict.set_item(py, key.as_ref().to_vec(), value.as_ref().to_vec())?;
351 dict.set_item(
352 py,
353 key.as_bytes().to_vec(),
354 value.as_bytes().to_vec(),
355 )?;
352 356 }
353 357 Ok(dict)
354 358 }
355 359
356 360 def __len__(&self) -> PyResult<usize> {
357 361 Ok(self.inner(py).borrow().len())
358 362 }
359 363
360 364 def __contains__(&self, key: PyObject) -> PyResult<bool> {
361 365 let key = key.extract::<PyBytes>(py)?;
362 366 Ok(self.inner(py).borrow().contains_key(HgPath::new(key.data(py))))
363 367 }
364 368
365 369 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
366 370 let key = key.extract::<PyBytes>(py)?;
367 371 let key = HgPath::new(key.data(py));
368 372 match self.inner(py).borrow().get(key) {
369 373 Some(entry) => {
370 374 Ok(make_dirstate_tuple(py, entry)?)
371 375 },
372 376 None => Err(PyErr::new::<exc::KeyError, _>(
373 377 py,
374 378 String::from_utf8_lossy(key.as_bytes()),
375 379 )),
376 380 }
377 381 }
378 382
379 383 def keys(&self) -> PyResult<DirstateMapKeysIterator> {
380 384 let leaked_ref = self.inner(py).leak_immutable();
381 385 DirstateMapKeysIterator::from_inner(
382 386 py,
383 387 unsafe { leaked_ref.map(py, |o| o.iter()) },
384 388 )
385 389 }
386 390
387 391 def items(&self) -> PyResult<DirstateMapItemsIterator> {
388 392 let leaked_ref = self.inner(py).leak_immutable();
389 393 DirstateMapItemsIterator::from_inner(
390 394 py,
391 395 unsafe { leaked_ref.map(py, |o| o.iter()) },
392 396 )
393 397 }
394 398
395 399 def __iter__(&self) -> PyResult<DirstateMapKeysIterator> {
396 400 let leaked_ref = self.inner(py).leak_immutable();
397 401 DirstateMapKeysIterator::from_inner(
398 402 py,
399 403 unsafe { leaked_ref.map(py, |o| o.iter()) },
400 404 )
401 405 }
402 406
403 407 def getdirs(&self) -> PyResult<Dirs> {
404 408 // TODO don't copy, share the reference
405 409 self.inner(py).borrow_mut().set_dirs()
406 410 .map_err(|e| {
407 411 PyErr::new::<exc::ValueError, _>(py, e.to_string())
408 412 })?;
409 413 Dirs::from_inner(
410 414 py,
411 415 DirsMultiset::from_dirstate(
412 416 &self.inner(py).borrow(),
413 417 Some(EntryState::Removed),
414 418 )
415 419 .map_err(|e| {
416 420 PyErr::new::<exc::ValueError, _>(py, e.to_string())
417 421 })?,
418 422 )
419 423 }
420 424 def getalldirs(&self) -> PyResult<Dirs> {
421 425 // TODO don't copy, share the reference
422 426 self.inner(py).borrow_mut().set_all_dirs()
423 427 .map_err(|e| {
424 428 PyErr::new::<exc::ValueError, _>(py, e.to_string())
425 429 })?;
426 430 Dirs::from_inner(
427 431 py,
428 432 DirsMultiset::from_dirstate(
429 433 &self.inner(py).borrow(),
430 434 None,
431 435 ).map_err(|e| {
432 436 PyErr::new::<exc::ValueError, _>(py, e.to_string())
433 437 })?,
434 438 )
435 439 }
436 440
437 441 // TODO all copymap* methods, see docstring above
438 442 def copymapcopy(&self) -> PyResult<PyDict> {
439 443 let dict = PyDict::new(py);
440 444 for (key, value) in self.inner(py).borrow().copy_map.iter() {
441 445 dict.set_item(
442 446 py,
443 PyBytes::new(py, key.as_ref()),
444 PyBytes::new(py, value.as_ref()),
447 PyBytes::new(py, key.as_bytes()),
448 PyBytes::new(py, value.as_bytes()),
445 449 )?;
446 450 }
447 451 Ok(dict)
448 452 }
449 453
450 454 def copymapgetitem(&self, key: PyObject) -> PyResult<PyBytes> {
451 455 let key = key.extract::<PyBytes>(py)?;
452 456 match self.inner(py).borrow().copy_map.get(HgPath::new(key.data(py))) {
453 Some(copy) => Ok(PyBytes::new(py, copy.as_ref())),
457 Some(copy) => Ok(PyBytes::new(py, copy.as_bytes())),
454 458 None => Err(PyErr::new::<exc::KeyError, _>(
455 459 py,
456 460 String::from_utf8_lossy(key.data(py)),
457 461 )),
458 462 }
459 463 }
460 464 def copymap(&self) -> PyResult<CopyMap> {
461 465 CopyMap::from_inner(py, self.clone_ref(py))
462 466 }
463 467
464 468 def copymaplen(&self) -> PyResult<usize> {
465 469 Ok(self.inner(py).borrow().copy_map.len())
466 470 }
467 471 def copymapcontains(&self, key: PyObject) -> PyResult<bool> {
468 472 let key = key.extract::<PyBytes>(py)?;
469 473 Ok(self
470 474 .inner(py)
471 475 .borrow()
472 476 .copy_map
473 477 .contains_key(HgPath::new(key.data(py))))
474 478 }
475 479 def copymapget(
476 480 &self,
477 481 key: PyObject,
478 482 default: Option<PyObject>
479 483 ) -> PyResult<Option<PyObject>> {
480 484 let key = key.extract::<PyBytes>(py)?;
481 485 match self
482 486 .inner(py)
483 487 .borrow()
484 488 .copy_map
485 489 .get(HgPath::new(key.data(py)))
486 490 {
487 491 Some(copy) => Ok(Some(
488 PyBytes::new(py, copy.as_ref()).into_object(),
492 PyBytes::new(py, copy.as_bytes()).into_object(),
489 493 )),
490 494 None => Ok(default),
491 495 }
492 496 }
493 497 def copymapsetitem(
494 498 &self,
495 499 key: PyObject,
496 500 value: PyObject
497 501 ) -> PyResult<PyObject> {
498 502 let key = key.extract::<PyBytes>(py)?;
499 503 let value = value.extract::<PyBytes>(py)?;
500 504 self.inner(py).borrow_mut().copy_map.insert(
501 505 HgPathBuf::from_bytes(key.data(py)),
502 506 HgPathBuf::from_bytes(value.data(py)),
503 507 );
504 508 Ok(py.None())
505 509 }
506 510 def copymappop(
507 511 &self,
508 512 key: PyObject,
509 513 default: Option<PyObject>
510 514 ) -> PyResult<Option<PyObject>> {
511 515 let key = key.extract::<PyBytes>(py)?;
512 516 match self
513 517 .inner(py)
514 518 .borrow_mut()
515 519 .copy_map
516 520 .remove(HgPath::new(key.data(py)))
517 521 {
518 522 Some(_) => Ok(None),
519 523 None => Ok(default),
520 524 }
521 525 }
522 526
523 527 def copymapiter(&self) -> PyResult<CopyMapKeysIterator> {
524 528 let leaked_ref = self.inner(py).leak_immutable();
525 529 CopyMapKeysIterator::from_inner(
526 530 py,
527 531 unsafe { leaked_ref.map(py, |o| o.copy_map.iter()) },
528 532 )
529 533 }
530 534
531 535 def copymapitemsiter(&self) -> PyResult<CopyMapItemsIterator> {
532 536 let leaked_ref = self.inner(py).leak_immutable();
533 537 CopyMapItemsIterator::from_inner(
534 538 py,
535 539 unsafe { leaked_ref.map(py, |o| o.copy_map.iter()) },
536 540 )
537 541 }
538 542
539 543 });
540 544
541 545 impl DirstateMap {
542 546 pub fn get_inner<'a>(
543 547 &'a self,
544 548 py: Python<'a>,
545 549 ) -> Ref<'a, RustDirstateMap> {
546 550 self.inner(py).borrow()
547 551 }
548 552 fn translate_key(
549 553 py: Python,
550 554 res: (&HgPathBuf, &DirstateEntry),
551 555 ) -> PyResult<Option<PyBytes>> {
552 Ok(Some(PyBytes::new(py, res.0.as_ref())))
556 Ok(Some(PyBytes::new(py, res.0.as_bytes())))
553 557 }
554 558 fn translate_key_value(
555 559 py: Python,
556 560 res: (&HgPathBuf, &DirstateEntry),
557 561 ) -> PyResult<Option<(PyBytes, PyObject)>> {
558 562 let (f, entry) = res;
559 563 Ok(Some((
560 PyBytes::new(py, f.as_ref()),
564 PyBytes::new(py, f.as_bytes()),
561 565 make_dirstate_tuple(py, entry)?,
562 566 )))
563 567 }
564 568 }
565 569
566 570 py_shared_iterator!(
567 571 DirstateMapKeysIterator,
568 572 UnsafePyLeaked<StateMapIter<'static>>,
569 573 DirstateMap::translate_key,
570 574 Option<PyBytes>
571 575 );
572 576
573 577 py_shared_iterator!(
574 578 DirstateMapItemsIterator,
575 579 UnsafePyLeaked<StateMapIter<'static>>,
576 580 DirstateMap::translate_key_value,
577 581 Option<(PyBytes, PyObject)>
578 582 );
579 583
580 584 fn extract_node_id(py: Python, obj: &PyObject) -> PyResult<[u8; PARENT_SIZE]> {
581 585 let bytes = obj.extract::<PyBytes>(py)?;
582 586 match bytes.data(py).try_into() {
583 587 Ok(s) => Ok(s),
584 588 Err(e) => Err(PyErr::new::<exc::ValueError, _>(py, e.to_string())),
585 589 }
586 590 }
@@ -1,76 +1,76 b''
1 1 // non_normal_other_parent_entries.rs
2 2 //
3 3 // Copyright 2020 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 use cpython::{
9 9 exc::NotImplementedError, CompareOp, ObjectProtocol, PyBytes, PyClone,
10 10 PyErr, PyList, PyObject, PyResult, PyString, Python, PythonObject,
11 11 ToPyObject, UnsafePyLeaked,
12 12 };
13 13
14 14 use crate::dirstate::DirstateMap;
15 15 use hg::utils::hg_path::HgPathBuf;
16 16 use std::cell::RefCell;
17 17 use std::collections::hash_set;
18 18
19 19 py_class!(pub class NonNormalEntries |py| {
20 20 data dmap: DirstateMap;
21 21
22 22 def __contains__(&self, key: PyObject) -> PyResult<bool> {
23 23 self.dmap(py).non_normal_entries_contains(py, key)
24 24 }
25 25 def remove(&self, key: PyObject) -> PyResult<PyObject> {
26 26 self.dmap(py).non_normal_entries_remove(py, key)
27 27 }
28 28 def union(&self, other: PyObject) -> PyResult<PyList> {
29 29 self.dmap(py).non_normal_entries_union(py, other)
30 30 }
31 31 def __richcmp__(&self, other: PyObject, op: CompareOp) -> PyResult<bool> {
32 32 match op {
33 33 CompareOp::Eq => self.is_equal_to(py, other),
34 34 CompareOp::Ne => Ok(!self.is_equal_to(py, other)?),
35 35 _ => Err(PyErr::new::<NotImplementedError, _>(py, ""))
36 36 }
37 37 }
38 38 def __repr__(&self) -> PyResult<PyString> {
39 39 self.dmap(py).non_normal_entries_display(py)
40 40 }
41 41
42 42 def __iter__(&self) -> PyResult<NonNormalEntriesIterator> {
43 43 self.dmap(py).non_normal_entries_iter(py)
44 44 }
45 45 });
46 46
47 47 impl NonNormalEntries {
48 48 pub fn from_inner(py: Python, dm: DirstateMap) -> PyResult<Self> {
49 49 Self::create_instance(py, dm)
50 50 }
51 51
52 52 fn is_equal_to(&self, py: Python, other: PyObject) -> PyResult<bool> {
53 53 for item in other.iter(py)? {
54 54 if !self.dmap(py).non_normal_entries_contains(py, item?)? {
55 55 return Ok(false);
56 56 }
57 57 }
58 58 Ok(true)
59 59 }
60 60
61 61 fn translate_key(
62 62 py: Python,
63 63 key: &HgPathBuf,
64 64 ) -> PyResult<Option<PyBytes>> {
65 Ok(Some(PyBytes::new(py, key.as_ref())))
65 Ok(Some(PyBytes::new(py, key.as_bytes())))
66 66 }
67 67 }
68 68
69 69 type NonNormalEntriesIter<'a> = hash_set::Iter<'a, HgPathBuf>;
70 70
71 71 py_shared_iterator!(
72 72 NonNormalEntriesIterator,
73 73 UnsafePyLeaked<NonNormalEntriesIter<'static>>,
74 74 NonNormalEntries::translate_key,
75 75 Option<PyBytes>
76 76 );
@@ -1,303 +1,301 b''
1 1 // status.rs
2 2 //
3 3 // Copyright 2019, Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Bindings for the `hg::status` module provided by the
9 9 //! `hg-core` crate. From Python, this will be seen as
10 10 //! `rustext.dirstate.status`.
11 11
12 12 use crate::{dirstate::DirstateMap, exceptions::FallbackError};
13 13 use cpython::{
14 14 exc::ValueError, ObjectProtocol, PyBytes, PyErr, PyList, PyObject,
15 15 PyResult, PyTuple, Python, PythonObject, ToPyObject,
16 16 };
17 17 use hg::{
18 18 matchers::{AlwaysMatcher, FileMatcher, IncludeMatcher},
19 19 parse_pattern_syntax, status,
20 20 utils::{
21 21 files::{get_bytes_from_path, get_path_from_bytes},
22 22 hg_path::{HgPath, HgPathBuf},
23 23 },
24 24 BadMatch, DirstateStatus, IgnorePattern, PatternFileWarning, StatusError,
25 25 StatusOptions,
26 26 };
27 27 use std::borrow::{Borrow, Cow};
28 28
29 29 /// This will be useless once trait impls for collection are added to `PyBytes`
30 30 /// upstream.
31 31 fn collect_pybytes_list(
32 32 py: Python,
33 33 collection: &[impl AsRef<HgPath>],
34 34 ) -> PyList {
35 35 let list = PyList::new(py, &[]);
36 36
37 37 for path in collection.iter() {
38 38 list.append(
39 39 py,
40 40 PyBytes::new(py, path.as_ref().as_bytes()).into_object(),
41 41 )
42 42 }
43 43
44 44 list
45 45 }
46 46
47 47 fn collect_bad_matches(
48 48 py: Python,
49 49 collection: &[(impl AsRef<HgPath>, BadMatch)],
50 50 ) -> PyResult<PyList> {
51 51 let list = PyList::new(py, &[]);
52 52
53 53 let os = py.import("os")?;
54 54 let get_error_message = |code: i32| -> PyResult<_> {
55 55 os.call(
56 56 py,
57 57 "strerror",
58 58 PyTuple::new(py, &[code.to_py_object(py).into_object()]),
59 59 None,
60 60 )
61 61 };
62 62
63 63 for (path, bad_match) in collection.iter() {
64 64 let message = match bad_match {
65 65 BadMatch::OsError(code) => get_error_message(*code)?,
66 66 BadMatch::BadType(bad_type) => format!(
67 67 "unsupported file type (type is {})",
68 68 bad_type.to_string()
69 69 )
70 70 .to_py_object(py)
71 71 .into_object(),
72 72 };
73 73 list.append(
74 74 py,
75 75 (PyBytes::new(py, path.as_ref().as_bytes()), message)
76 76 .to_py_object(py)
77 77 .into_object(),
78 78 )
79 79 }
80 80
81 81 Ok(list)
82 82 }
83 83
84 84 fn handle_fallback(py: Python, err: StatusError) -> PyErr {
85 85 match err {
86 86 StatusError::Pattern(e) => {
87 87 let as_string = e.to_string();
88 88 log::trace!("Rust status fallback: `{}`", &as_string);
89 89
90 90 PyErr::new::<FallbackError, _>(py, &as_string)
91 91 }
92 92 e => PyErr::new::<ValueError, _>(py, e.to_string()),
93 93 }
94 94 }
95 95
96 96 pub fn status_wrapper(
97 97 py: Python,
98 98 dmap: DirstateMap,
99 99 matcher: PyObject,
100 100 root_dir: PyObject,
101 101 ignore_files: PyList,
102 102 check_exec: bool,
103 103 last_normal_time: i64,
104 104 list_clean: bool,
105 105 list_ignored: bool,
106 106 list_unknown: bool,
107 107 collect_traversed_dirs: bool,
108 108 ) -> PyResult<PyTuple> {
109 109 let bytes = root_dir.extract::<PyBytes>(py)?;
110 110 let root_dir = get_path_from_bytes(bytes.data(py));
111 111
112 112 let dmap: DirstateMap = dmap.to_py_object(py);
113 113 let dmap = dmap.get_inner(py);
114 114
115 115 let ignore_files: PyResult<Vec<_>> = ignore_files
116 116 .iter(py)
117 117 .map(|b| {
118 118 let file = b.extract::<PyBytes>(py)?;
119 119 Ok(get_path_from_bytes(file.data(py)).to_owned())
120 120 })
121 121 .collect();
122 122 let ignore_files = ignore_files?;
123 123
124 124 match matcher.get_type(py).name(py).borrow() {
125 125 "alwaysmatcher" => {
126 126 let matcher = AlwaysMatcher;
127 127 let ((lookup, status_res), warnings) = status(
128 128 &dmap,
129 129 &matcher,
130 130 &root_dir,
131 131 ignore_files,
132 132 StatusOptions {
133 133 check_exec,
134 134 last_normal_time,
135 135 list_clean,
136 136 list_ignored,
137 137 list_unknown,
138 138 collect_traversed_dirs,
139 139 },
140 140 )
141 141 .map_err(|e| handle_fallback(py, e))?;
142 142 build_response(py, lookup, status_res, warnings)
143 143 }
144 144 "exactmatcher" => {
145 145 let files = matcher.call_method(
146 146 py,
147 147 "files",
148 148 PyTuple::new(py, &[]),
149 149 None,
150 150 )?;
151 151 let files: PyList = files.cast_into(py)?;
152 152 let files: PyResult<Vec<HgPathBuf>> = files
153 153 .iter(py)
154 154 .map(|f| {
155 155 Ok(HgPathBuf::from_bytes(
156 156 f.extract::<PyBytes>(py)?.data(py),
157 157 ))
158 158 })
159 159 .collect();
160 160
161 161 let files = files?;
162 162 let matcher = FileMatcher::new(&files)
163 163 .map_err(|e| PyErr::new::<ValueError, _>(py, e.to_string()))?;
164 164 let ((lookup, status_res), warnings) = status(
165 165 &dmap,
166 166 &matcher,
167 167 &root_dir,
168 168 ignore_files,
169 169 StatusOptions {
170 170 check_exec,
171 171 last_normal_time,
172 172 list_clean,
173 173 list_ignored,
174 174 list_unknown,
175 175 collect_traversed_dirs,
176 176 },
177 177 )
178 178 .map_err(|e| handle_fallback(py, e))?;
179 179 build_response(py, lookup, status_res, warnings)
180 180 }
181 181 "includematcher" => {
182 182 // Get the patterns from Python even though most of them are
183 183 // redundant with those we will parse later on, as they include
184 184 // those passed from the command line.
185 185 let ignore_patterns: PyResult<Vec<_>> = matcher
186 186 .getattr(py, "_kindpats")?
187 187 .iter(py)?
188 188 .map(|k| {
189 189 let k = k?;
190 190 let syntax = parse_pattern_syntax(
191 191 &[
192 192 k.get_item(py, 0)?
193 193 .extract::<PyBytes>(py)?
194 194 .data(py),
195 195 &b":"[..],
196 196 ]
197 197 .concat(),
198 198 )
199 199 .map_err(|e| {
200 200 handle_fallback(py, StatusError::Pattern(e))
201 201 })?;
202 202 let pattern = k.get_item(py, 1)?.extract::<PyBytes>(py)?;
203 203 let pattern = pattern.data(py);
204 204 let source = k.get_item(py, 2)?.extract::<PyBytes>(py)?;
205 205 let source = get_path_from_bytes(source.data(py));
206 206 let new = IgnorePattern::new(syntax, pattern, source);
207 207 Ok(new)
208 208 })
209 209 .collect();
210 210
211 211 let ignore_patterns = ignore_patterns?;
212 212 let mut all_warnings = vec![];
213 213
214 214 let (matcher, warnings) =
215 215 IncludeMatcher::new(ignore_patterns, &root_dir)
216 216 .map_err(|e| handle_fallback(py, e.into()))?;
217 217 all_warnings.extend(warnings);
218 218
219 219 let ((lookup, status_res), warnings) = status(
220 220 &dmap,
221 221 &matcher,
222 222 &root_dir,
223 223 ignore_files,
224 224 StatusOptions {
225 225 check_exec,
226 226 last_normal_time,
227 227 list_clean,
228 228 list_ignored,
229 229 list_unknown,
230 230 collect_traversed_dirs,
231 231 },
232 232 )
233 233 .map_err(|e| handle_fallback(py, e))?;
234 234
235 235 all_warnings.extend(warnings);
236 236
237 237 build_response(py, lookup, status_res, all_warnings)
238 238 }
239 e => {
240 return Err(PyErr::new::<ValueError, _>(
241 py,
242 format!("Unsupported matcher {}", e),
243 ));
244 }
239 e => Err(PyErr::new::<ValueError, _>(
240 py,
241 format!("Unsupported matcher {}", e),
242 )),
245 243 }
246 244 }
247 245
248 246 fn build_response(
249 247 py: Python,
250 248 lookup: Vec<Cow<HgPath>>,
251 249 status_res: DirstateStatus,
252 250 warnings: Vec<PatternFileWarning>,
253 251 ) -> PyResult<PyTuple> {
254 252 let modified = collect_pybytes_list(py, status_res.modified.as_ref());
255 253 let added = collect_pybytes_list(py, status_res.added.as_ref());
256 254 let removed = collect_pybytes_list(py, status_res.removed.as_ref());
257 255 let deleted = collect_pybytes_list(py, status_res.deleted.as_ref());
258 256 let clean = collect_pybytes_list(py, status_res.clean.as_ref());
259 257 let ignored = collect_pybytes_list(py, status_res.ignored.as_ref());
260 258 let unknown = collect_pybytes_list(py, status_res.unknown.as_ref());
261 259 let lookup = collect_pybytes_list(py, lookup.as_ref());
262 260 let bad = collect_bad_matches(py, status_res.bad.as_ref())?;
263 261 let traversed = collect_pybytes_list(py, status_res.traversed.as_ref());
264 262 let py_warnings = PyList::new(py, &[]);
265 263 for warning in warnings.iter() {
266 264 // We use duck-typing on the Python side for dispatch, good enough for
267 265 // now.
268 266 match warning {
269 267 PatternFileWarning::InvalidSyntax(file, syn) => {
270 268 py_warnings.append(
271 269 py,
272 270 (
273 271 PyBytes::new(py, &get_bytes_from_path(&file)),
274 272 PyBytes::new(py, syn),
275 273 )
276 274 .to_py_object(py)
277 275 .into_object(),
278 276 );
279 277 }
280 278 PatternFileWarning::NoSuchFile(file) => py_warnings.append(
281 279 py,
282 280 PyBytes::new(py, &get_bytes_from_path(&file)).into_object(),
283 281 ),
284 282 }
285 283 }
286 284
287 285 Ok(PyTuple::new(
288 286 py,
289 287 &[
290 288 lookup.into_object(),
291 289 modified.into_object(),
292 290 added.into_object(),
293 291 removed.into_object(),
294 292 deleted.into_object(),
295 293 clean.into_object(),
296 294 ignored.into_object(),
297 295 unknown.into_object(),
298 296 py_warnings.into_object(),
299 297 bad.into_object(),
300 298 traversed.into_object(),
301 299 ][..],
302 300 ))
303 301 }
@@ -1,175 +1,175 b''
1 1 // parsers.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Bindings for the `hg::dirstate::parsers` module provided by the
9 9 //! `hg-core` package.
10 10 //!
11 11 //! From Python, this will be seen as `mercurial.rustext.parsers`
12 12 use cpython::{
13 13 exc, PyBytes, PyDict, PyErr, PyInt, PyModule, PyResult, PyTuple, Python,
14 14 PythonObject, ToPyObject,
15 15 };
16 16 use hg::{
17 17 pack_dirstate, parse_dirstate, utils::hg_path::HgPathBuf,
18 18 DirstatePackError, DirstateParents, DirstateParseError, FastHashMap,
19 19 PARENT_SIZE,
20 20 };
21 21 use std::convert::TryInto;
22 22
23 23 use crate::dirstate::{extract_dirstate, make_dirstate_tuple};
24 24 use std::time::Duration;
25 25
26 26 fn parse_dirstate_wrapper(
27 27 py: Python,
28 28 dmap: PyDict,
29 29 copymap: PyDict,
30 30 st: PyBytes,
31 31 ) -> PyResult<PyTuple> {
32 32 let mut dirstate_map = FastHashMap::default();
33 33 let mut copies = FastHashMap::default();
34 34
35 35 match parse_dirstate(&mut dirstate_map, &mut copies, st.data(py)) {
36 36 Ok(parents) => {
37 37 for (filename, entry) in &dirstate_map {
38 38 dmap.set_item(
39 39 py,
40 PyBytes::new(py, filename.as_ref()),
40 PyBytes::new(py, filename.as_bytes()),
41 41 make_dirstate_tuple(py, entry)?,
42 42 )?;
43 43 }
44 44 for (path, copy_path) in copies {
45 45 copymap.set_item(
46 46 py,
47 PyBytes::new(py, path.as_ref()),
48 PyBytes::new(py, copy_path.as_ref()),
47 PyBytes::new(py, path.as_bytes()),
48 PyBytes::new(py, copy_path.as_bytes()),
49 49 )?;
50 50 }
51 51 Ok(
52 52 (PyBytes::new(py, &parents.p1), PyBytes::new(py, &parents.p2))
53 53 .to_py_object(py),
54 54 )
55 55 }
56 56 Err(e) => Err(PyErr::new::<exc::ValueError, _>(
57 57 py,
58 58 match e {
59 59 DirstateParseError::TooLittleData => {
60 60 "too little data for parents".to_string()
61 61 }
62 62 DirstateParseError::Overflow => {
63 63 "overflow in dirstate".to_string()
64 64 }
65 65 DirstateParseError::CorruptedEntry(e) => e,
66 66 DirstateParseError::Damaged => {
67 67 "dirstate appears to be damaged".to_string()
68 68 }
69 69 },
70 70 )),
71 71 }
72 72 }
73 73
74 74 fn pack_dirstate_wrapper(
75 75 py: Python,
76 76 dmap: PyDict,
77 77 copymap: PyDict,
78 78 pl: PyTuple,
79 79 now: PyInt,
80 80 ) -> PyResult<PyBytes> {
81 81 let p1 = pl.get_item(py, 0).extract::<PyBytes>(py)?;
82 82 let p1: &[u8] = p1.data(py);
83 83 let p2 = pl.get_item(py, 1).extract::<PyBytes>(py)?;
84 84 let p2: &[u8] = p2.data(py);
85 85
86 86 let mut dirstate_map = extract_dirstate(py, &dmap)?;
87 87
88 88 let copies: Result<FastHashMap<HgPathBuf, HgPathBuf>, PyErr> = copymap
89 89 .items(py)
90 90 .iter()
91 91 .map(|(key, value)| {
92 92 Ok((
93 93 HgPathBuf::from_bytes(key.extract::<PyBytes>(py)?.data(py)),
94 94 HgPathBuf::from_bytes(value.extract::<PyBytes>(py)?.data(py)),
95 95 ))
96 96 })
97 97 .collect();
98 98
99 99 if p1.len() != PARENT_SIZE || p2.len() != PARENT_SIZE {
100 100 return Err(PyErr::new::<exc::ValueError, _>(
101 101 py,
102 102 "expected a 20-byte hash".to_string(),
103 103 ));
104 104 }
105 105
106 106 match pack_dirstate(
107 107 &mut dirstate_map,
108 108 &copies?,
109 109 DirstateParents {
110 110 p1: p1.try_into().unwrap(),
111 111 p2: p2.try_into().unwrap(),
112 112 },
113 113 Duration::from_secs(now.as_object().extract::<u64>(py)?),
114 114 ) {
115 115 Ok(packed) => {
116 116 for (filename, entry) in &dirstate_map {
117 117 dmap.set_item(
118 118 py,
119 PyBytes::new(py, filename.as_ref()),
119 PyBytes::new(py, filename.as_bytes()),
120 120 make_dirstate_tuple(py, entry)?,
121 121 )?;
122 122 }
123 123 Ok(PyBytes::new(py, &packed))
124 124 }
125 125 Err(error) => Err(PyErr::new::<exc::ValueError, _>(
126 126 py,
127 127 match error {
128 128 DirstatePackError::CorruptedParent => {
129 129 "expected a 20-byte hash".to_string()
130 130 }
131 131 DirstatePackError::CorruptedEntry(e) => e,
132 132 DirstatePackError::BadSize(expected, actual) => {
133 133 format!("bad dirstate size: {} != {}", actual, expected)
134 134 }
135 135 },
136 136 )),
137 137 }
138 138 }
139 139
140 140 /// Create the module, with `__package__` given from parent
141 141 pub fn init_parsers_module(py: Python, package: &str) -> PyResult<PyModule> {
142 142 let dotted_name = &format!("{}.parsers", package);
143 143 let m = PyModule::new(py, dotted_name)?;
144 144
145 145 m.add(py, "__package__", package)?;
146 146 m.add(py, "__doc__", "Parsers - Rust implementation")?;
147 147
148 148 m.add(
149 149 py,
150 150 "parse_dirstate",
151 151 py_fn!(
152 152 py,
153 153 parse_dirstate_wrapper(dmap: PyDict, copymap: PyDict, st: PyBytes)
154 154 ),
155 155 )?;
156 156 m.add(
157 157 py,
158 158 "pack_dirstate",
159 159 py_fn!(
160 160 py,
161 161 pack_dirstate_wrapper(
162 162 dmap: PyDict,
163 163 copymap: PyDict,
164 164 pl: PyTuple,
165 165 now: PyInt
166 166 )
167 167 ),
168 168 )?;
169 169
170 170 let sys = PyModule::import(py, "sys")?;
171 171 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
172 172 sys_modules.set_item(py, dotted_name, &m)?;
173 173
174 174 Ok(m)
175 175 }
@@ -1,47 +1,44 b''
1 1 use cpython::exc::ValueError;
2 2 use cpython::{PyBytes, PyDict, PyErr, PyObject, PyResult, PyTuple, Python};
3 3 use hg::revlog::Node;
4 4 use std::convert::TryFrom;
5 5
6 6 #[allow(unused)]
7 7 pub fn print_python_trace(py: Python) -> PyResult<PyObject> {
8 8 eprintln!("===============================");
9 9 eprintln!("Printing Python stack from Rust");
10 10 eprintln!("===============================");
11 11 let traceback = py.import("traceback")?;
12 12 let sys = py.import("sys")?;
13 13 let kwargs = PyDict::new(py);
14 14 kwargs.set_item(py, "file", sys.get(py, "stderr")?)?;
15 15 traceback.call(py, "print_stack", PyTuple::new(py, &[]), Some(&kwargs))
16 16 }
17 17
18 18 // Necessary evil for the time being, could maybe be moved to
19 19 // a TryFrom in Node itself
20 20 const NODE_BYTES_LENGTH: usize = 20;
21 21 type NodeData = [u8; NODE_BYTES_LENGTH];
22 22
23 23 /// Copy incoming Python bytes given as `PyObject` into `Node`,
24 24 /// doing the necessary checks
25 25 pub fn node_from_py_object<'a>(
26 26 py: Python,
27 27 bytes: &'a PyObject,
28 28 ) -> PyResult<Node> {
29 29 let as_py_bytes: &'a PyBytes = bytes.extract(py)?;
30 30 node_from_py_bytes(py, as_py_bytes)
31 31 }
32 32
33 33 /// Clone incoming Python bytes given as `PyBytes` as a `Node`,
34 34 /// doing the necessary checks.
35 pub fn node_from_py_bytes<'a>(
36 py: Python,
37 bytes: &'a PyBytes,
38 ) -> PyResult<Node> {
35 pub fn node_from_py_bytes(py: Python, bytes: &PyBytes) -> PyResult<Node> {
39 36 <NodeData>::try_from(bytes.data(py))
40 37 .map_err(|_| {
41 38 PyErr::new::<ValueError, _>(
42 39 py,
43 40 format!("{}-byte hash required", NODE_BYTES_LENGTH),
44 41 )
45 42 })
46 .map(|n| n.into())
43 .map(Into::into)
47 44 }
General Comments 0
You need to be logged in to leave comments. Login now