##// END OF EJS Templates
rust: add Debug constraint to Matcher trait...
Raphaël Gomès -
r50381:e8481625 default
parent child Browse files
Show More
@@ -1,1673 +1,1688
1 1 // matchers.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Structs and types for matching files and directories.
9 9
10 10 use crate::{
11 11 dirstate::dirs_multiset::DirsChildrenMultiset,
12 12 filepatterns::{
13 13 build_single_regex, filter_subincludes, get_patterns_from_file,
14 14 PatternFileWarning, PatternResult,
15 15 },
16 16 utils::{
17 17 files::find_dirs,
18 18 hg_path::{HgPath, HgPathBuf},
19 19 Escaped,
20 20 },
21 21 DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
22 22 PatternSyntax,
23 23 };
24 24
25 25 use crate::dirstate::status::IgnoreFnType;
26 26 use crate::filepatterns::normalize_path_bytes;
27 27 use std::borrow::ToOwned;
28 28 use std::collections::HashSet;
29 29 use std::fmt::{Display, Error, Formatter};
30 30 use std::iter::FromIterator;
31 31 use std::ops::Deref;
32 32 use std::path::{Path, PathBuf};
33 33
34 34 use micro_timer::timed;
35 35
36 36 #[derive(Debug, PartialEq)]
37 37 pub enum VisitChildrenSet {
38 38 /// Don't visit anything
39 39 Empty,
40 40 /// Only visit this directory
41 41 This,
42 42 /// Visit this directory and these subdirectories
43 43 /// TODO Should we implement a `NonEmptyHashSet`?
44 44 Set(HashSet<HgPathBuf>),
45 45 /// Visit this directory and all subdirectories
46 46 Recursive,
47 47 }
48 48
49 pub trait Matcher {
49 pub trait Matcher: core::fmt::Debug {
50 50 /// Explicitly listed files
51 51 fn file_set(&self) -> Option<&HashSet<HgPathBuf>>;
52 52 /// Returns whether `filename` is in `file_set`
53 53 fn exact_match(&self, filename: &HgPath) -> bool;
54 54 /// Returns whether `filename` is matched by this matcher
55 55 fn matches(&self, filename: &HgPath) -> bool;
56 56 /// Decides whether a directory should be visited based on whether it
57 57 /// has potential matches in it or one of its subdirectories, and
58 58 /// potentially lists which subdirectories of that directory should be
59 59 /// visited. This is based on the match's primary, included, and excluded
60 60 /// patterns.
61 61 ///
62 62 /// # Example
63 63 ///
64 64 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
65 65 /// return the following values (assuming the implementation of
66 66 /// visit_children_set is capable of recognizing this; some implementations
67 67 /// are not).
68 68 ///
69 69 /// ```text
70 70 /// ```ignore
71 71 /// '' -> {'foo', 'qux'}
72 72 /// 'baz' -> set()
73 73 /// 'foo' -> {'bar'}
74 74 /// // Ideally this would be `Recursive`, but since the prefix nature of
75 75 /// // matchers is applied to the entire matcher, we have to downgrade this
76 76 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
77 77 /// // `RootFilesIn'-kind matcher being mixed in.
78 78 /// 'foo/bar' -> 'this'
79 79 /// 'qux' -> 'this'
80 80 /// ```
81 81 /// # Important
82 82 ///
83 83 /// Most matchers do not know if they're representing files or
84 84 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
85 85 /// file or a directory, so `visit_children_set('dir')` for most matchers
86 86 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
87 87 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
88 88 /// it may return `VisitChildrenSet::This`.
89 89 /// Do not rely on the return being a `HashSet` indicating that there are
90 90 /// no files in this dir to investigate (or equivalently that if there are
91 91 /// files to investigate in 'dir' that it will always return
92 92 /// `VisitChildrenSet::This`).
93 93 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet;
94 94 /// Matcher will match everything and `files_set()` will be empty:
95 95 /// optimization might be possible.
96 96 fn matches_everything(&self) -> bool;
97 97 /// Matcher will match exactly the files in `files_set()`: optimization
98 98 /// might be possible.
99 99 fn is_exact(&self) -> bool;
100 100 }
101 101
102 102 /// Matches everything.
103 103 ///```
104 104 /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
105 105 ///
106 106 /// let matcher = AlwaysMatcher;
107 107 ///
108 108 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
109 109 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
110 110 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
111 111 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
112 112 /// ```
113 113 #[derive(Debug)]
114 114 pub struct AlwaysMatcher;
115 115
116 116 impl Matcher for AlwaysMatcher {
117 117 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
118 118 None
119 119 }
120 120 fn exact_match(&self, _filename: &HgPath) -> bool {
121 121 false
122 122 }
123 123 fn matches(&self, _filename: &HgPath) -> bool {
124 124 true
125 125 }
126 126 fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
127 127 VisitChildrenSet::Recursive
128 128 }
129 129 fn matches_everything(&self) -> bool {
130 130 true
131 131 }
132 132 fn is_exact(&self) -> bool {
133 133 false
134 134 }
135 135 }
136 136
137 137 /// Matches nothing.
138 138 #[derive(Debug)]
139 139 pub struct NeverMatcher;
140 140
141 141 impl Matcher for NeverMatcher {
142 142 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
143 143 None
144 144 }
145 145 fn exact_match(&self, _filename: &HgPath) -> bool {
146 146 false
147 147 }
148 148 fn matches(&self, _filename: &HgPath) -> bool {
149 149 false
150 150 }
151 151 fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
152 152 VisitChildrenSet::Empty
153 153 }
154 154 fn matches_everything(&self) -> bool {
155 155 false
156 156 }
157 157 fn is_exact(&self) -> bool {
158 158 true
159 159 }
160 160 }
161 161
162 162 /// Matches the input files exactly. They are interpreted as paths, not
163 163 /// patterns.
164 164 ///
165 165 ///```
166 166 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::{HgPath, HgPathBuf} };
167 167 ///
168 168 /// let files = vec![HgPathBuf::from_bytes(b"a.txt"), HgPathBuf::from_bytes(br"re:.*\.c$")];
169 169 /// let matcher = FileMatcher::new(files).unwrap();
170 170 ///
171 171 /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
172 172 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
173 173 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
174 174 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
175 175 /// ```
176 176 #[derive(Debug)]
177 177 pub struct FileMatcher {
178 178 files: HashSet<HgPathBuf>,
179 179 dirs: DirsMultiset,
180 180 }
181 181
182 182 impl FileMatcher {
183 183 pub fn new(files: Vec<HgPathBuf>) -> Result<Self, DirstateMapError> {
184 184 let dirs = DirsMultiset::from_manifest(&files)?;
185 185 Ok(Self {
186 186 files: HashSet::from_iter(files.into_iter()),
187 187 dirs,
188 188 })
189 189 }
190 190 fn inner_matches(&self, filename: &HgPath) -> bool {
191 191 self.files.contains(filename.as_ref())
192 192 }
193 193 }
194 194
195 195 impl Matcher for FileMatcher {
196 196 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
197 197 Some(&self.files)
198 198 }
199 199 fn exact_match(&self, filename: &HgPath) -> bool {
200 200 self.inner_matches(filename)
201 201 }
202 202 fn matches(&self, filename: &HgPath) -> bool {
203 203 self.inner_matches(filename)
204 204 }
205 205 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
206 206 if self.files.is_empty() || !self.dirs.contains(&directory) {
207 207 return VisitChildrenSet::Empty;
208 208 }
209 209 let mut candidates: HashSet<HgPathBuf> =
210 210 self.dirs.iter().cloned().collect();
211 211
212 212 candidates.extend(self.files.iter().cloned());
213 213 candidates.remove(HgPath::new(b""));
214 214
215 215 if !directory.as_ref().is_empty() {
216 216 let directory = [directory.as_ref().as_bytes(), b"/"].concat();
217 217 candidates = candidates
218 218 .iter()
219 219 .filter_map(|c| {
220 220 if c.as_bytes().starts_with(&directory) {
221 221 Some(HgPathBuf::from_bytes(
222 222 &c.as_bytes()[directory.len()..],
223 223 ))
224 224 } else {
225 225 None
226 226 }
227 227 })
228 228 .collect();
229 229 }
230 230
231 231 // `self.dirs` includes all of the directories, recursively, so if
232 232 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
233 233 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
234 234 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
235 235 // subdir will be in there without a slash.
236 236 VisitChildrenSet::Set(
237 237 candidates
238 238 .into_iter()
239 239 .filter_map(|c| {
240 240 if c.bytes().all(|b| *b != b'/') {
241 241 Some(c)
242 242 } else {
243 243 None
244 244 }
245 245 })
246 246 .collect(),
247 247 )
248 248 }
249 249 fn matches_everything(&self) -> bool {
250 250 false
251 251 }
252 252 fn is_exact(&self) -> bool {
253 253 true
254 254 }
255 255 }
256 256
257 257 /// Matches files that are included in the ignore rules.
258 258 /// ```
259 259 /// use hg::{
260 260 /// matchers::{IncludeMatcher, Matcher},
261 261 /// IgnorePattern,
262 262 /// PatternSyntax,
263 263 /// utils::hg_path::HgPath
264 264 /// };
265 265 /// use std::path::Path;
266 266 /// ///
267 267 /// let ignore_patterns =
268 268 /// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
269 269 /// let matcher = IncludeMatcher::new(ignore_patterns).unwrap();
270 270 /// ///
271 271 /// assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
272 272 /// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
273 273 /// assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
274 274 /// assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
275 275 /// ```
276 276 pub struct IncludeMatcher<'a> {
277 277 patterns: Vec<u8>,
278 278 match_fn: IgnoreFnType<'a>,
279 279 /// Whether all the patterns match a prefix (i.e. recursively)
280 280 prefix: bool,
281 281 roots: HashSet<HgPathBuf>,
282 282 dirs: HashSet<HgPathBuf>,
283 283 parents: HashSet<HgPathBuf>,
284 284 }
285 285
286 impl core::fmt::Debug for IncludeMatcher<'_> {
287 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
288 f.debug_struct("IncludeMatcher")
289 .field("patterns", &String::from_utf8_lossy(&self.patterns))
290 .field("prefix", &self.prefix)
291 .field("roots", &self.roots)
292 .field("dirs", &self.dirs)
293 .field("parents", &self.parents)
294 .finish()
295 }
296 }
297
286 298 impl<'a> Matcher for IncludeMatcher<'a> {
287 299 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
288 300 None
289 301 }
290 302
291 303 fn exact_match(&self, _filename: &HgPath) -> bool {
292 304 false
293 305 }
294 306
295 307 fn matches(&self, filename: &HgPath) -> bool {
296 308 (self.match_fn)(filename.as_ref())
297 309 }
298 310
299 311 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
300 312 let dir = directory.as_ref();
301 313 if self.prefix && self.roots.contains(dir) {
302 314 return VisitChildrenSet::Recursive;
303 315 }
304 316 if self.roots.contains(HgPath::new(b""))
305 317 || self.roots.contains(dir)
306 318 || self.dirs.contains(dir)
307 319 || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
308 320 {
309 321 return VisitChildrenSet::This;
310 322 }
311 323
312 324 if self.parents.contains(directory.as_ref()) {
313 325 let multiset = self.get_all_parents_children();
314 326 if let Some(children) = multiset.get(dir) {
315 327 return VisitChildrenSet::Set(
316 328 children.into_iter().map(HgPathBuf::from).collect(),
317 329 );
318 330 }
319 331 }
320 332 VisitChildrenSet::Empty
321 333 }
322 334
323 335 fn matches_everything(&self) -> bool {
324 336 false
325 337 }
326 338
327 339 fn is_exact(&self) -> bool {
328 340 false
329 341 }
330 342 }
331 343
332 344 /// The union of multiple matchers. Will match if any of the matchers match.
345 #[derive(Debug)]
333 346 pub struct UnionMatcher {
334 347 matchers: Vec<Box<dyn Matcher + Sync>>,
335 348 }
336 349
337 350 impl Matcher for UnionMatcher {
338 351 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
339 352 None
340 353 }
341 354
342 355 fn exact_match(&self, _filename: &HgPath) -> bool {
343 356 false
344 357 }
345 358
346 359 fn matches(&self, filename: &HgPath) -> bool {
347 360 self.matchers.iter().any(|m| m.matches(filename))
348 361 }
349 362
350 363 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
351 364 let mut result = HashSet::new();
352 365 let mut this = false;
353 366 for matcher in self.matchers.iter() {
354 367 let visit = matcher.visit_children_set(directory);
355 368 match visit {
356 369 VisitChildrenSet::Empty => continue,
357 370 VisitChildrenSet::This => {
358 371 this = true;
359 372 // Don't break, we might have an 'all' in here.
360 373 continue;
361 374 }
362 375 VisitChildrenSet::Set(set) => {
363 376 result.extend(set);
364 377 }
365 378 VisitChildrenSet::Recursive => {
366 379 return visit;
367 380 }
368 381 }
369 382 }
370 383 if this {
371 384 return VisitChildrenSet::This;
372 385 }
373 386 if result.is_empty() {
374 387 VisitChildrenSet::Empty
375 388 } else {
376 389 VisitChildrenSet::Set(result)
377 390 }
378 391 }
379 392
380 393 fn matches_everything(&self) -> bool {
381 394 // TODO Maybe if all are AlwaysMatcher?
382 395 false
383 396 }
384 397
385 398 fn is_exact(&self) -> bool {
386 399 false
387 400 }
388 401 }
389 402
390 403 impl UnionMatcher {
391 404 pub fn new(matchers: Vec<Box<dyn Matcher + Sync>>) -> Self {
392 405 Self { matchers }
393 406 }
394 407 }
395 408
409 #[derive(Debug)]
396 410 pub struct IntersectionMatcher {
397 411 m1: Box<dyn Matcher + Sync>,
398 412 m2: Box<dyn Matcher + Sync>,
399 413 files: Option<HashSet<HgPathBuf>>,
400 414 }
401 415
402 416 impl Matcher for IntersectionMatcher {
403 417 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
404 418 self.files.as_ref()
405 419 }
406 420
407 421 fn exact_match(&self, filename: &HgPath) -> bool {
408 422 self.files.as_ref().map_or(false, |f| f.contains(filename))
409 423 }
410 424
411 425 fn matches(&self, filename: &HgPath) -> bool {
412 426 self.m1.matches(filename) && self.m2.matches(filename)
413 427 }
414 428
415 429 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
416 430 let m1_set = self.m1.visit_children_set(directory);
417 431 if m1_set == VisitChildrenSet::Empty {
418 432 return VisitChildrenSet::Empty;
419 433 }
420 434 let m2_set = self.m2.visit_children_set(directory);
421 435 if m2_set == VisitChildrenSet::Empty {
422 436 return VisitChildrenSet::Empty;
423 437 }
424 438
425 439 if m1_set == VisitChildrenSet::Recursive {
426 440 return m2_set;
427 441 } else if m2_set == VisitChildrenSet::Recursive {
428 442 return m1_set;
429 443 }
430 444
431 445 match (&m1_set, &m2_set) {
432 446 (VisitChildrenSet::Recursive, _) => m2_set,
433 447 (_, VisitChildrenSet::Recursive) => m1_set,
434 448 (VisitChildrenSet::This, _) | (_, VisitChildrenSet::This) => {
435 449 VisitChildrenSet::This
436 450 }
437 451 (VisitChildrenSet::Set(m1), VisitChildrenSet::Set(m2)) => {
438 452 let set: HashSet<_> = m1.intersection(&m2).cloned().collect();
439 453 if set.is_empty() {
440 454 VisitChildrenSet::Empty
441 455 } else {
442 456 VisitChildrenSet::Set(set)
443 457 }
444 458 }
445 459 _ => unreachable!(),
446 460 }
447 461 }
448 462
449 463 fn matches_everything(&self) -> bool {
450 464 self.m1.matches_everything() && self.m2.matches_everything()
451 465 }
452 466
453 467 fn is_exact(&self) -> bool {
454 468 self.m1.is_exact() || self.m2.is_exact()
455 469 }
456 470 }
457 471
458 472 impl IntersectionMatcher {
459 473 pub fn new(
460 474 mut m1: Box<dyn Matcher + Sync>,
461 475 mut m2: Box<dyn Matcher + Sync>,
462 476 ) -> Self {
463 477 let files = if m1.is_exact() || m2.is_exact() {
464 478 if !m1.is_exact() {
465 479 std::mem::swap(&mut m1, &mut m2);
466 480 }
467 481 m1.file_set().map(|m1_files| {
468 482 m1_files.iter().cloned().filter(|f| m2.matches(f)).collect()
469 483 })
470 484 } else {
471 485 None
472 486 };
473 487 Self { m1, m2, files }
474 488 }
475 489 }
476 490
491 #[derive(Debug)]
477 492 pub struct DifferenceMatcher {
478 493 base: Box<dyn Matcher + Sync>,
479 494 excluded: Box<dyn Matcher + Sync>,
480 495 files: Option<HashSet<HgPathBuf>>,
481 496 }
482 497
483 498 impl Matcher for DifferenceMatcher {
484 499 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
485 500 self.files.as_ref()
486 501 }
487 502
488 503 fn exact_match(&self, filename: &HgPath) -> bool {
489 504 self.files.as_ref().map_or(false, |f| f.contains(filename))
490 505 }
491 506
492 507 fn matches(&self, filename: &HgPath) -> bool {
493 508 self.base.matches(filename) && !self.excluded.matches(filename)
494 509 }
495 510
496 511 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
497 512 let excluded_set = self.excluded.visit_children_set(directory);
498 513 if excluded_set == VisitChildrenSet::Recursive {
499 514 return VisitChildrenSet::Empty;
500 515 }
501 516 let base_set = self.base.visit_children_set(directory);
502 517 // Possible values for base: 'recursive', 'this', set(...), set()
503 518 // Possible values for excluded: 'this', set(...), set()
504 519 // If excluded has nothing under here that we care about, return base,
505 520 // even if it's 'recursive'.
506 521 if excluded_set == VisitChildrenSet::Empty {
507 522 return base_set;
508 523 }
509 524 match base_set {
510 525 VisitChildrenSet::This | VisitChildrenSet::Recursive => {
511 526 // Never return 'recursive' here if excluded_set is any kind of
512 527 // non-empty (either 'this' or set(foo)), since excluded might
513 528 // return set() for a subdirectory.
514 529 VisitChildrenSet::This
515 530 }
516 531 set => {
517 532 // Possible values for base: set(...), set()
518 533 // Possible values for excluded: 'this', set(...)
519 534 // We ignore excluded set results. They're possibly incorrect:
520 535 // base = path:dir/subdir
521 536 // excluded=rootfilesin:dir,
522 537 // visit_children_set(''):
523 538 // base returns {'dir'}, excluded returns {'dir'}, if we
524 539 // subtracted we'd return set(), which is *not* correct, we
525 540 // still need to visit 'dir'!
526 541 set
527 542 }
528 543 }
529 544 }
530 545
531 546 fn matches_everything(&self) -> bool {
532 547 false
533 548 }
534 549
535 550 fn is_exact(&self) -> bool {
536 551 self.base.is_exact()
537 552 }
538 553 }
539 554
540 555 impl DifferenceMatcher {
541 556 pub fn new(
542 557 base: Box<dyn Matcher + Sync>,
543 558 excluded: Box<dyn Matcher + Sync>,
544 559 ) -> Self {
545 560 let base_is_exact = base.is_exact();
546 561 let base_files = base.file_set().map(ToOwned::to_owned);
547 562 let mut new = Self {
548 563 base,
549 564 excluded,
550 565 files: None,
551 566 };
552 567 if base_is_exact {
553 568 new.files = base_files.map(|files| {
554 569 files.iter().cloned().filter(|f| new.matches(f)).collect()
555 570 });
556 571 }
557 572 new
558 573 }
559 574 }
560 575
561 576 /// Returns a function that matches an `HgPath` against the given regex
562 577 /// pattern.
563 578 ///
564 579 /// This can fail when the pattern is invalid or not supported by the
565 580 /// underlying engine (the `regex` crate), for instance anything with
566 581 /// back-references.
567 582 #[timed]
568 583 fn re_matcher(
569 584 pattern: &[u8],
570 585 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
571 586 use std::io::Write;
572 587
573 588 // The `regex` crate adds `.*` to the start and end of expressions if there
574 589 // are no anchors, so add the start anchor.
575 590 let mut escaped_bytes = vec![b'^', b'(', b'?', b':'];
576 591 for byte in pattern {
577 592 if *byte > 127 {
578 593 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
579 594 } else {
580 595 escaped_bytes.push(*byte);
581 596 }
582 597 }
583 598 escaped_bytes.push(b')');
584 599
585 600 // Avoid the cost of UTF8 checking
586 601 //
587 602 // # Safety
588 603 // This is safe because we escaped all non-ASCII bytes.
589 604 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
590 605 let re = regex::bytes::RegexBuilder::new(&pattern_string)
591 606 .unicode(false)
592 607 // Big repos with big `.hgignore` will hit the default limit and
593 608 // incur a significant performance hit. One repo's `hg status` hit
594 609 // multiple *minutes*.
595 610 .dfa_size_limit(50 * (1 << 20))
596 611 .build()
597 612 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
598 613
599 614 Ok(move |path: &HgPath| re.is_match(path.as_bytes()))
600 615 }
601 616
602 617 /// Returns the regex pattern and a function that matches an `HgPath` against
603 618 /// said regex formed by the given ignore patterns.
604 619 fn build_regex_match<'a, 'b>(
605 620 ignore_patterns: &'a [IgnorePattern],
606 621 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'b>)> {
607 622 let mut regexps = vec![];
608 623 let mut exact_set = HashSet::new();
609 624
610 625 for pattern in ignore_patterns {
611 626 if let Some(re) = build_single_regex(pattern)? {
612 627 regexps.push(re);
613 628 } else {
614 629 let exact = normalize_path_bytes(&pattern.pattern);
615 630 exact_set.insert(HgPathBuf::from_bytes(&exact));
616 631 }
617 632 }
618 633
619 634 let full_regex = regexps.join(&b'|');
620 635
621 636 // An empty pattern would cause the regex engine to incorrectly match the
622 637 // (empty) root directory
623 638 let func = if !(regexps.is_empty()) {
624 639 let matcher = re_matcher(&full_regex)?;
625 640 let func = move |filename: &HgPath| {
626 641 exact_set.contains(filename) || matcher(filename)
627 642 };
628 643 Box::new(func) as IgnoreFnType
629 644 } else {
630 645 let func = move |filename: &HgPath| exact_set.contains(filename);
631 646 Box::new(func) as IgnoreFnType
632 647 };
633 648
634 649 Ok((full_regex, func))
635 650 }
636 651
637 652 /// Returns roots and directories corresponding to each pattern.
638 653 ///
639 654 /// This calculates the roots and directories exactly matching the patterns and
640 655 /// returns a tuple of (roots, dirs). It does not return other directories
641 656 /// which may also need to be considered, like the parent directories.
642 657 fn roots_and_dirs(
643 658 ignore_patterns: &[IgnorePattern],
644 659 ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
645 660 let mut roots = Vec::new();
646 661 let mut dirs = Vec::new();
647 662
648 663 for ignore_pattern in ignore_patterns {
649 664 let IgnorePattern {
650 665 syntax, pattern, ..
651 666 } = ignore_pattern;
652 667 match syntax {
653 668 PatternSyntax::RootGlob | PatternSyntax::Glob => {
654 669 let mut root = HgPathBuf::new();
655 670 for p in pattern.split(|c| *c == b'/') {
656 671 if p.iter().any(|c| match *c {
657 672 b'[' | b'{' | b'*' | b'?' => true,
658 673 _ => false,
659 674 }) {
660 675 break;
661 676 }
662 677 root.push(HgPathBuf::from_bytes(p).as_ref());
663 678 }
664 679 roots.push(root);
665 680 }
666 681 PatternSyntax::Path | PatternSyntax::RelPath => {
667 682 let pat = HgPath::new(if pattern == b"." {
668 683 &[] as &[u8]
669 684 } else {
670 685 pattern
671 686 });
672 687 roots.push(pat.to_owned());
673 688 }
674 689 PatternSyntax::RootFiles => {
675 690 let pat = if pattern == b"." {
676 691 &[] as &[u8]
677 692 } else {
678 693 pattern
679 694 };
680 695 dirs.push(HgPathBuf::from_bytes(pat));
681 696 }
682 697 _ => {
683 698 roots.push(HgPathBuf::new());
684 699 }
685 700 }
686 701 }
687 702 (roots, dirs)
688 703 }
689 704
690 705 /// Paths extracted from patterns
691 706 #[derive(Debug, PartialEq)]
692 707 struct RootsDirsAndParents {
693 708 /// Directories to match recursively
694 709 pub roots: HashSet<HgPathBuf>,
695 710 /// Directories to match non-recursively
696 711 pub dirs: HashSet<HgPathBuf>,
697 712 /// Implicitly required directories to go to items in either roots or dirs
698 713 pub parents: HashSet<HgPathBuf>,
699 714 }
700 715
701 716 /// Extract roots, dirs and parents from patterns.
702 717 fn roots_dirs_and_parents(
703 718 ignore_patterns: &[IgnorePattern],
704 719 ) -> PatternResult<RootsDirsAndParents> {
705 720 let (roots, dirs) = roots_and_dirs(ignore_patterns);
706 721
707 722 let mut parents = HashSet::new();
708 723
709 724 parents.extend(
710 725 DirsMultiset::from_manifest(&dirs)
711 726 .map_err(|e| match e {
712 727 DirstateMapError::InvalidPath(e) => e,
713 728 _ => unreachable!(),
714 729 })?
715 730 .iter()
716 731 .map(ToOwned::to_owned),
717 732 );
718 733 parents.extend(
719 734 DirsMultiset::from_manifest(&roots)
720 735 .map_err(|e| match e {
721 736 DirstateMapError::InvalidPath(e) => e,
722 737 _ => unreachable!(),
723 738 })?
724 739 .iter()
725 740 .map(ToOwned::to_owned),
726 741 );
727 742
728 743 Ok(RootsDirsAndParents {
729 744 roots: HashSet::from_iter(roots),
730 745 dirs: HashSet::from_iter(dirs),
731 746 parents,
732 747 })
733 748 }
734 749
735 750 /// Returns a function that checks whether a given file (in the general sense)
736 751 /// should be matched.
737 752 fn build_match<'a, 'b>(
738 753 ignore_patterns: Vec<IgnorePattern>,
739 754 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'b>)> {
740 755 let mut match_funcs: Vec<IgnoreFnType<'b>> = vec![];
741 756 // For debugging and printing
742 757 let mut patterns = vec![];
743 758
744 759 let (subincludes, ignore_patterns) = filter_subincludes(ignore_patterns)?;
745 760
746 761 if !subincludes.is_empty() {
747 762 // Build prefix-based matcher functions for subincludes
748 763 let mut submatchers = FastHashMap::default();
749 764 let mut prefixes = vec![];
750 765
751 766 for sub_include in subincludes {
752 767 let matcher = IncludeMatcher::new(sub_include.included_patterns)?;
753 768 let match_fn =
754 769 Box::new(move |path: &HgPath| matcher.matches(path));
755 770 prefixes.push(sub_include.prefix.clone());
756 771 submatchers.insert(sub_include.prefix.clone(), match_fn);
757 772 }
758 773
759 774 let match_subinclude = move |filename: &HgPath| {
760 775 for prefix in prefixes.iter() {
761 776 if let Some(rel) = filename.relative_to(prefix) {
762 777 if (submatchers[prefix])(rel) {
763 778 return true;
764 779 }
765 780 }
766 781 }
767 782 false
768 783 };
769 784
770 785 match_funcs.push(Box::new(match_subinclude));
771 786 }
772 787
773 788 if !ignore_patterns.is_empty() {
774 789 // Either do dumb matching if all patterns are rootfiles, or match
775 790 // with a regex.
776 791 if ignore_patterns
777 792 .iter()
778 793 .all(|k| k.syntax == PatternSyntax::RootFiles)
779 794 {
780 795 let dirs: HashSet<_> = ignore_patterns
781 796 .iter()
782 797 .map(|k| k.pattern.to_owned())
783 798 .collect();
784 799 let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
785 800
786 801 let match_func = move |path: &HgPath| -> bool {
787 802 let path = path.as_bytes();
788 803 let i = path.iter().rfind(|a| **a == b'/');
789 804 let dir = if let Some(i) = i {
790 805 &path[..*i as usize]
791 806 } else {
792 807 b"."
793 808 };
794 809 dirs.contains(dir.deref())
795 810 };
796 811 match_funcs.push(Box::new(match_func));
797 812
798 813 patterns.extend(b"rootfilesin: ");
799 814 dirs_vec.sort();
800 815 patterns.extend(dirs_vec.escaped_bytes());
801 816 } else {
802 817 let (new_re, match_func) = build_regex_match(&ignore_patterns)?;
803 818 patterns = new_re;
804 819 match_funcs.push(match_func)
805 820 }
806 821 }
807 822
808 823 Ok(if match_funcs.len() == 1 {
809 824 (patterns, match_funcs.remove(0))
810 825 } else {
811 826 (
812 827 patterns,
813 828 Box::new(move |f: &HgPath| -> bool {
814 829 match_funcs.iter().any(|match_func| match_func(f))
815 830 }),
816 831 )
817 832 })
818 833 }
819 834
820 835 /// Parses all "ignore" files with their recursive includes and returns a
821 836 /// function that checks whether a given file (in the general sense) should be
822 837 /// ignored.
823 838 pub fn get_ignore_matcher<'a>(
824 839 mut all_pattern_files: Vec<PathBuf>,
825 840 root_dir: &Path,
826 841 inspect_pattern_bytes: &mut impl FnMut(&[u8]),
827 842 ) -> PatternResult<(IncludeMatcher<'a>, Vec<PatternFileWarning>)> {
828 843 let mut all_patterns = vec![];
829 844 let mut all_warnings = vec![];
830 845
831 846 // Sort to make the ordering of calls to `inspect_pattern_bytes`
832 847 // deterministic even if the ordering of `all_pattern_files` is not (such
833 848 // as when a iteration order of a Python dict or Rust HashMap is involved).
834 849 // Sort by "string" representation instead of the default by component
835 850 // (with a Rust-specific definition of a component)
836 851 all_pattern_files
837 852 .sort_unstable_by(|a, b| a.as_os_str().cmp(b.as_os_str()));
838 853
839 854 for pattern_file in &all_pattern_files {
840 855 let (patterns, warnings) = get_patterns_from_file(
841 856 pattern_file,
842 857 root_dir,
843 858 inspect_pattern_bytes,
844 859 )?;
845 860
846 861 all_patterns.extend(patterns.to_owned());
847 862 all_warnings.extend(warnings);
848 863 }
849 864 let matcher = IncludeMatcher::new(all_patterns)?;
850 865 Ok((matcher, all_warnings))
851 866 }
852 867
853 868 /// Parses all "ignore" files with their recursive includes and returns a
854 869 /// function that checks whether a given file (in the general sense) should be
855 870 /// ignored.
856 871 pub fn get_ignore_function<'a>(
857 872 all_pattern_files: Vec<PathBuf>,
858 873 root_dir: &Path,
859 874 inspect_pattern_bytes: &mut impl FnMut(&[u8]),
860 875 ) -> PatternResult<(IgnoreFnType<'a>, Vec<PatternFileWarning>)> {
861 876 let res =
862 877 get_ignore_matcher(all_pattern_files, root_dir, inspect_pattern_bytes);
863 878 res.map(|(matcher, all_warnings)| {
864 879 let res: IgnoreFnType<'a> =
865 880 Box::new(move |path: &HgPath| matcher.matches(path));
866 881
867 882 (res, all_warnings)
868 883 })
869 884 }
870 885
871 886 impl<'a> IncludeMatcher<'a> {
872 887 pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
873 888 let RootsDirsAndParents {
874 889 roots,
875 890 dirs,
876 891 parents,
877 892 } = roots_dirs_and_parents(&ignore_patterns)?;
878 893 let prefix = ignore_patterns.iter().all(|k| match k.syntax {
879 894 PatternSyntax::Path | PatternSyntax::RelPath => true,
880 895 _ => false,
881 896 });
882 897 let (patterns, match_fn) = build_match(ignore_patterns)?;
883 898
884 899 Ok(Self {
885 900 patterns,
886 901 match_fn,
887 902 prefix,
888 903 roots,
889 904 dirs,
890 905 parents,
891 906 })
892 907 }
893 908
894 909 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
895 910 // TODO cache
896 911 let thing = self
897 912 .dirs
898 913 .iter()
899 914 .chain(self.roots.iter())
900 915 .chain(self.parents.iter());
901 916 DirsChildrenMultiset::new(thing, Some(&self.parents))
902 917 }
903 918
904 919 pub fn debug_get_patterns(&self) -> &[u8] {
905 920 self.patterns.as_ref()
906 921 }
907 922 }
908 923
909 924 impl<'a> Display for IncludeMatcher<'a> {
910 925 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
911 926 // XXX What about exact matches?
912 927 // I'm not sure it's worth it to clone the HashSet and keep it
913 928 // around just in case someone wants to display the matcher, plus
914 929 // it's going to be unreadable after a few entries, but we need to
915 930 // inform in this display that exact matches are being used and are
916 931 // (on purpose) missing from the `includes`.
917 932 write!(
918 933 f,
919 934 "IncludeMatcher(includes='{}')",
920 935 String::from_utf8_lossy(&self.patterns.escaped_bytes())
921 936 )
922 937 }
923 938 }
924 939
925 940 #[cfg(test)]
926 941 mod tests {
927 942 use super::*;
928 943 use pretty_assertions::assert_eq;
929 944 use std::path::Path;
930 945
931 946 #[test]
932 947 fn test_roots_and_dirs() {
933 948 let pats = vec![
934 949 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
935 950 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
936 951 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
937 952 ];
938 953 let (roots, dirs) = roots_and_dirs(&pats);
939 954
940 955 assert_eq!(
941 956 roots,
942 957 vec!(
943 958 HgPathBuf::from_bytes(b"g/h"),
944 959 HgPathBuf::from_bytes(b"g/h"),
945 960 HgPathBuf::new()
946 961 ),
947 962 );
948 963 assert_eq!(dirs, vec!());
949 964 }
950 965
951 966 #[test]
952 967 fn test_roots_dirs_and_parents() {
953 968 let pats = vec![
954 969 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
955 970 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
956 971 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
957 972 ];
958 973
959 974 let mut roots = HashSet::new();
960 975 roots.insert(HgPathBuf::from_bytes(b"g/h"));
961 976 roots.insert(HgPathBuf::new());
962 977
963 978 let dirs = HashSet::new();
964 979
965 980 let mut parents = HashSet::new();
966 981 parents.insert(HgPathBuf::new());
967 982 parents.insert(HgPathBuf::from_bytes(b"g"));
968 983
969 984 assert_eq!(
970 985 roots_dirs_and_parents(&pats).unwrap(),
971 986 RootsDirsAndParents {
972 987 roots,
973 988 dirs,
974 989 parents
975 990 }
976 991 );
977 992 }
978 993
979 994 #[test]
980 995 fn test_filematcher_visit_children_set() {
981 996 // Visitchildrenset
982 997 let files = vec![HgPathBuf::from_bytes(b"dir/subdir/foo.txt")];
983 998 let matcher = FileMatcher::new(files).unwrap();
984 999
985 1000 let mut set = HashSet::new();
986 1001 set.insert(HgPathBuf::from_bytes(b"dir"));
987 1002 assert_eq!(
988 1003 matcher.visit_children_set(HgPath::new(b"")),
989 1004 VisitChildrenSet::Set(set)
990 1005 );
991 1006
992 1007 let mut set = HashSet::new();
993 1008 set.insert(HgPathBuf::from_bytes(b"subdir"));
994 1009 assert_eq!(
995 1010 matcher.visit_children_set(HgPath::new(b"dir")),
996 1011 VisitChildrenSet::Set(set)
997 1012 );
998 1013
999 1014 let mut set = HashSet::new();
1000 1015 set.insert(HgPathBuf::from_bytes(b"foo.txt"));
1001 1016 assert_eq!(
1002 1017 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1003 1018 VisitChildrenSet::Set(set)
1004 1019 );
1005 1020
1006 1021 assert_eq!(
1007 1022 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1008 1023 VisitChildrenSet::Empty
1009 1024 );
1010 1025 assert_eq!(
1011 1026 matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
1012 1027 VisitChildrenSet::Empty
1013 1028 );
1014 1029 assert_eq!(
1015 1030 matcher.visit_children_set(HgPath::new(b"folder")),
1016 1031 VisitChildrenSet::Empty
1017 1032 );
1018 1033 }
1019 1034
1020 1035 #[test]
1021 1036 fn test_filematcher_visit_children_set_files_and_dirs() {
1022 1037 let files = vec![
1023 1038 HgPathBuf::from_bytes(b"rootfile.txt"),
1024 1039 HgPathBuf::from_bytes(b"a/file1.txt"),
1025 1040 HgPathBuf::from_bytes(b"a/b/file2.txt"),
1026 1041 // No file in a/b/c
1027 1042 HgPathBuf::from_bytes(b"a/b/c/d/file4.txt"),
1028 1043 ];
1029 1044 let matcher = FileMatcher::new(files).unwrap();
1030 1045
1031 1046 let mut set = HashSet::new();
1032 1047 set.insert(HgPathBuf::from_bytes(b"a"));
1033 1048 set.insert(HgPathBuf::from_bytes(b"rootfile.txt"));
1034 1049 assert_eq!(
1035 1050 matcher.visit_children_set(HgPath::new(b"")),
1036 1051 VisitChildrenSet::Set(set)
1037 1052 );
1038 1053
1039 1054 let mut set = HashSet::new();
1040 1055 set.insert(HgPathBuf::from_bytes(b"b"));
1041 1056 set.insert(HgPathBuf::from_bytes(b"file1.txt"));
1042 1057 assert_eq!(
1043 1058 matcher.visit_children_set(HgPath::new(b"a")),
1044 1059 VisitChildrenSet::Set(set)
1045 1060 );
1046 1061
1047 1062 let mut set = HashSet::new();
1048 1063 set.insert(HgPathBuf::from_bytes(b"c"));
1049 1064 set.insert(HgPathBuf::from_bytes(b"file2.txt"));
1050 1065 assert_eq!(
1051 1066 matcher.visit_children_set(HgPath::new(b"a/b")),
1052 1067 VisitChildrenSet::Set(set)
1053 1068 );
1054 1069
1055 1070 let mut set = HashSet::new();
1056 1071 set.insert(HgPathBuf::from_bytes(b"d"));
1057 1072 assert_eq!(
1058 1073 matcher.visit_children_set(HgPath::new(b"a/b/c")),
1059 1074 VisitChildrenSet::Set(set)
1060 1075 );
1061 1076 let mut set = HashSet::new();
1062 1077 set.insert(HgPathBuf::from_bytes(b"file4.txt"));
1063 1078 assert_eq!(
1064 1079 matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
1065 1080 VisitChildrenSet::Set(set)
1066 1081 );
1067 1082
1068 1083 assert_eq!(
1069 1084 matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
1070 1085 VisitChildrenSet::Empty
1071 1086 );
1072 1087 assert_eq!(
1073 1088 matcher.visit_children_set(HgPath::new(b"folder")),
1074 1089 VisitChildrenSet::Empty
1075 1090 );
1076 1091 }
1077 1092
1078 1093 #[test]
1079 1094 fn test_includematcher() {
1080 1095 // VisitchildrensetPrefix
1081 1096 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1082 1097 PatternSyntax::RelPath,
1083 1098 b"dir/subdir",
1084 1099 Path::new(""),
1085 1100 )])
1086 1101 .unwrap();
1087 1102
1088 1103 let mut set = HashSet::new();
1089 1104 set.insert(HgPathBuf::from_bytes(b"dir"));
1090 1105 assert_eq!(
1091 1106 matcher.visit_children_set(HgPath::new(b"")),
1092 1107 VisitChildrenSet::Set(set)
1093 1108 );
1094 1109
1095 1110 let mut set = HashSet::new();
1096 1111 set.insert(HgPathBuf::from_bytes(b"subdir"));
1097 1112 assert_eq!(
1098 1113 matcher.visit_children_set(HgPath::new(b"dir")),
1099 1114 VisitChildrenSet::Set(set)
1100 1115 );
1101 1116 assert_eq!(
1102 1117 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1103 1118 VisitChildrenSet::Recursive
1104 1119 );
1105 1120 // OPT: This should probably be 'all' if its parent is?
1106 1121 assert_eq!(
1107 1122 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1108 1123 VisitChildrenSet::This
1109 1124 );
1110 1125 assert_eq!(
1111 1126 matcher.visit_children_set(HgPath::new(b"folder")),
1112 1127 VisitChildrenSet::Empty
1113 1128 );
1114 1129
1115 1130 // VisitchildrensetRootfilesin
1116 1131 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1117 1132 PatternSyntax::RootFiles,
1118 1133 b"dir/subdir",
1119 1134 Path::new(""),
1120 1135 )])
1121 1136 .unwrap();
1122 1137
1123 1138 let mut set = HashSet::new();
1124 1139 set.insert(HgPathBuf::from_bytes(b"dir"));
1125 1140 assert_eq!(
1126 1141 matcher.visit_children_set(HgPath::new(b"")),
1127 1142 VisitChildrenSet::Set(set)
1128 1143 );
1129 1144
1130 1145 let mut set = HashSet::new();
1131 1146 set.insert(HgPathBuf::from_bytes(b"subdir"));
1132 1147 assert_eq!(
1133 1148 matcher.visit_children_set(HgPath::new(b"dir")),
1134 1149 VisitChildrenSet::Set(set)
1135 1150 );
1136 1151
1137 1152 assert_eq!(
1138 1153 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1139 1154 VisitChildrenSet::This
1140 1155 );
1141 1156 assert_eq!(
1142 1157 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1143 1158 VisitChildrenSet::Empty
1144 1159 );
1145 1160 assert_eq!(
1146 1161 matcher.visit_children_set(HgPath::new(b"folder")),
1147 1162 VisitChildrenSet::Empty
1148 1163 );
1149 1164
1150 1165 // VisitchildrensetGlob
1151 1166 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1152 1167 PatternSyntax::Glob,
1153 1168 b"dir/z*",
1154 1169 Path::new(""),
1155 1170 )])
1156 1171 .unwrap();
1157 1172
1158 1173 let mut set = HashSet::new();
1159 1174 set.insert(HgPathBuf::from_bytes(b"dir"));
1160 1175 assert_eq!(
1161 1176 matcher.visit_children_set(HgPath::new(b"")),
1162 1177 VisitChildrenSet::Set(set)
1163 1178 );
1164 1179 assert_eq!(
1165 1180 matcher.visit_children_set(HgPath::new(b"folder")),
1166 1181 VisitChildrenSet::Empty
1167 1182 );
1168 1183 assert_eq!(
1169 1184 matcher.visit_children_set(HgPath::new(b"dir")),
1170 1185 VisitChildrenSet::This
1171 1186 );
1172 1187 // OPT: these should probably be set().
1173 1188 assert_eq!(
1174 1189 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1175 1190 VisitChildrenSet::This
1176 1191 );
1177 1192 assert_eq!(
1178 1193 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1179 1194 VisitChildrenSet::This
1180 1195 );
1181 1196
1182 1197 // Test multiple patterns
1183 1198 let matcher = IncludeMatcher::new(vec![
1184 1199 IgnorePattern::new(PatternSyntax::RelPath, b"foo", Path::new("")),
1185 1200 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
1186 1201 ])
1187 1202 .unwrap();
1188 1203
1189 1204 assert_eq!(
1190 1205 matcher.visit_children_set(HgPath::new(b"")),
1191 1206 VisitChildrenSet::This
1192 1207 );
1193 1208
1194 1209 // Test multiple patterns
1195 1210 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1196 1211 PatternSyntax::Glob,
1197 1212 b"**/*.exe",
1198 1213 Path::new(""),
1199 1214 )])
1200 1215 .unwrap();
1201 1216
1202 1217 assert_eq!(
1203 1218 matcher.visit_children_set(HgPath::new(b"")),
1204 1219 VisitChildrenSet::This
1205 1220 );
1206 1221 }
1207 1222
1208 1223 #[test]
1209 1224 fn test_unionmatcher() {
1210 1225 // Path + Rootfiles
1211 1226 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1212 1227 PatternSyntax::RelPath,
1213 1228 b"dir/subdir",
1214 1229 Path::new(""),
1215 1230 )])
1216 1231 .unwrap();
1217 1232 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1218 1233 PatternSyntax::RootFiles,
1219 1234 b"dir",
1220 1235 Path::new(""),
1221 1236 )])
1222 1237 .unwrap();
1223 1238 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1224 1239
1225 1240 let mut set = HashSet::new();
1226 1241 set.insert(HgPathBuf::from_bytes(b"dir"));
1227 1242 assert_eq!(
1228 1243 matcher.visit_children_set(HgPath::new(b"")),
1229 1244 VisitChildrenSet::Set(set)
1230 1245 );
1231 1246 assert_eq!(
1232 1247 matcher.visit_children_set(HgPath::new(b"dir")),
1233 1248 VisitChildrenSet::This
1234 1249 );
1235 1250 assert_eq!(
1236 1251 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1237 1252 VisitChildrenSet::Recursive
1238 1253 );
1239 1254 assert_eq!(
1240 1255 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1241 1256 VisitChildrenSet::Empty
1242 1257 );
1243 1258 assert_eq!(
1244 1259 matcher.visit_children_set(HgPath::new(b"folder")),
1245 1260 VisitChildrenSet::Empty
1246 1261 );
1247 1262 assert_eq!(
1248 1263 matcher.visit_children_set(HgPath::new(b"folder")),
1249 1264 VisitChildrenSet::Empty
1250 1265 );
1251 1266
1252 1267 // OPT: These next two could be 'all' instead of 'this'.
1253 1268 assert_eq!(
1254 1269 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1255 1270 VisitChildrenSet::This
1256 1271 );
1257 1272 assert_eq!(
1258 1273 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1259 1274 VisitChildrenSet::This
1260 1275 );
1261 1276
1262 1277 // Path + unrelated Path
1263 1278 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1264 1279 PatternSyntax::RelPath,
1265 1280 b"dir/subdir",
1266 1281 Path::new(""),
1267 1282 )])
1268 1283 .unwrap();
1269 1284 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1270 1285 PatternSyntax::RelPath,
1271 1286 b"folder",
1272 1287 Path::new(""),
1273 1288 )])
1274 1289 .unwrap();
1275 1290 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1276 1291
1277 1292 let mut set = HashSet::new();
1278 1293 set.insert(HgPathBuf::from_bytes(b"folder"));
1279 1294 set.insert(HgPathBuf::from_bytes(b"dir"));
1280 1295 assert_eq!(
1281 1296 matcher.visit_children_set(HgPath::new(b"")),
1282 1297 VisitChildrenSet::Set(set)
1283 1298 );
1284 1299 let mut set = HashSet::new();
1285 1300 set.insert(HgPathBuf::from_bytes(b"subdir"));
1286 1301 assert_eq!(
1287 1302 matcher.visit_children_set(HgPath::new(b"dir")),
1288 1303 VisitChildrenSet::Set(set)
1289 1304 );
1290 1305
1291 1306 assert_eq!(
1292 1307 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1293 1308 VisitChildrenSet::Recursive
1294 1309 );
1295 1310 assert_eq!(
1296 1311 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1297 1312 VisitChildrenSet::Empty
1298 1313 );
1299 1314
1300 1315 assert_eq!(
1301 1316 matcher.visit_children_set(HgPath::new(b"folder")),
1302 1317 VisitChildrenSet::Recursive
1303 1318 );
1304 1319 // OPT: These next two could be 'all' instead of 'this'.
1305 1320 assert_eq!(
1306 1321 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1307 1322 VisitChildrenSet::This
1308 1323 );
1309 1324 assert_eq!(
1310 1325 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1311 1326 VisitChildrenSet::This
1312 1327 );
1313 1328
1314 1329 // Path + subpath
1315 1330 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1316 1331 PatternSyntax::RelPath,
1317 1332 b"dir/subdir/x",
1318 1333 Path::new(""),
1319 1334 )])
1320 1335 .unwrap();
1321 1336 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1322 1337 PatternSyntax::RelPath,
1323 1338 b"dir/subdir",
1324 1339 Path::new(""),
1325 1340 )])
1326 1341 .unwrap();
1327 1342 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1328 1343
1329 1344 let mut set = HashSet::new();
1330 1345 set.insert(HgPathBuf::from_bytes(b"dir"));
1331 1346 assert_eq!(
1332 1347 matcher.visit_children_set(HgPath::new(b"")),
1333 1348 VisitChildrenSet::Set(set)
1334 1349 );
1335 1350 let mut set = HashSet::new();
1336 1351 set.insert(HgPathBuf::from_bytes(b"subdir"));
1337 1352 assert_eq!(
1338 1353 matcher.visit_children_set(HgPath::new(b"dir")),
1339 1354 VisitChildrenSet::Set(set)
1340 1355 );
1341 1356
1342 1357 assert_eq!(
1343 1358 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1344 1359 VisitChildrenSet::Recursive
1345 1360 );
1346 1361 assert_eq!(
1347 1362 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1348 1363 VisitChildrenSet::Empty
1349 1364 );
1350 1365
1351 1366 assert_eq!(
1352 1367 matcher.visit_children_set(HgPath::new(b"folder")),
1353 1368 VisitChildrenSet::Empty
1354 1369 );
1355 1370 assert_eq!(
1356 1371 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1357 1372 VisitChildrenSet::Recursive
1358 1373 );
1359 1374 // OPT: this should probably be 'all' not 'this'.
1360 1375 assert_eq!(
1361 1376 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1362 1377 VisitChildrenSet::This
1363 1378 );
1364 1379 }
1365 1380
1366 1381 #[test]
1367 1382 fn test_intersectionmatcher() {
1368 1383 // Include path + Include rootfiles
1369 1384 let m1 = Box::new(
1370 1385 IncludeMatcher::new(vec![IgnorePattern::new(
1371 1386 PatternSyntax::RelPath,
1372 1387 b"dir/subdir",
1373 1388 Path::new(""),
1374 1389 )])
1375 1390 .unwrap(),
1376 1391 );
1377 1392 let m2 = Box::new(
1378 1393 IncludeMatcher::new(vec![IgnorePattern::new(
1379 1394 PatternSyntax::RootFiles,
1380 1395 b"dir",
1381 1396 Path::new(""),
1382 1397 )])
1383 1398 .unwrap(),
1384 1399 );
1385 1400 let matcher = IntersectionMatcher::new(m1, m2);
1386 1401
1387 1402 let mut set = HashSet::new();
1388 1403 set.insert(HgPathBuf::from_bytes(b"dir"));
1389 1404 assert_eq!(
1390 1405 matcher.visit_children_set(HgPath::new(b"")),
1391 1406 VisitChildrenSet::Set(set)
1392 1407 );
1393 1408 assert_eq!(
1394 1409 matcher.visit_children_set(HgPath::new(b"dir")),
1395 1410 VisitChildrenSet::This
1396 1411 );
1397 1412 assert_eq!(
1398 1413 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1399 1414 VisitChildrenSet::Empty
1400 1415 );
1401 1416 assert_eq!(
1402 1417 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1403 1418 VisitChildrenSet::Empty
1404 1419 );
1405 1420 assert_eq!(
1406 1421 matcher.visit_children_set(HgPath::new(b"folder")),
1407 1422 VisitChildrenSet::Empty
1408 1423 );
1409 1424 assert_eq!(
1410 1425 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1411 1426 VisitChildrenSet::Empty
1412 1427 );
1413 1428 assert_eq!(
1414 1429 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1415 1430 VisitChildrenSet::Empty
1416 1431 );
1417 1432
1418 1433 // Non intersecting paths
1419 1434 let m1 = Box::new(
1420 1435 IncludeMatcher::new(vec![IgnorePattern::new(
1421 1436 PatternSyntax::RelPath,
1422 1437 b"dir/subdir",
1423 1438 Path::new(""),
1424 1439 )])
1425 1440 .unwrap(),
1426 1441 );
1427 1442 let m2 = Box::new(
1428 1443 IncludeMatcher::new(vec![IgnorePattern::new(
1429 1444 PatternSyntax::RelPath,
1430 1445 b"folder",
1431 1446 Path::new(""),
1432 1447 )])
1433 1448 .unwrap(),
1434 1449 );
1435 1450 let matcher = IntersectionMatcher::new(m1, m2);
1436 1451
1437 1452 assert_eq!(
1438 1453 matcher.visit_children_set(HgPath::new(b"")),
1439 1454 VisitChildrenSet::Empty
1440 1455 );
1441 1456 assert_eq!(
1442 1457 matcher.visit_children_set(HgPath::new(b"dir")),
1443 1458 VisitChildrenSet::Empty
1444 1459 );
1445 1460 assert_eq!(
1446 1461 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1447 1462 VisitChildrenSet::Empty
1448 1463 );
1449 1464 assert_eq!(
1450 1465 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1451 1466 VisitChildrenSet::Empty
1452 1467 );
1453 1468 assert_eq!(
1454 1469 matcher.visit_children_set(HgPath::new(b"folder")),
1455 1470 VisitChildrenSet::Empty
1456 1471 );
1457 1472 assert_eq!(
1458 1473 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1459 1474 VisitChildrenSet::Empty
1460 1475 );
1461 1476 assert_eq!(
1462 1477 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1463 1478 VisitChildrenSet::Empty
1464 1479 );
1465 1480
1466 1481 // Nested paths
1467 1482 let m1 = Box::new(
1468 1483 IncludeMatcher::new(vec![IgnorePattern::new(
1469 1484 PatternSyntax::RelPath,
1470 1485 b"dir/subdir/x",
1471 1486 Path::new(""),
1472 1487 )])
1473 1488 .unwrap(),
1474 1489 );
1475 1490 let m2 = Box::new(
1476 1491 IncludeMatcher::new(vec![IgnorePattern::new(
1477 1492 PatternSyntax::RelPath,
1478 1493 b"dir/subdir",
1479 1494 Path::new(""),
1480 1495 )])
1481 1496 .unwrap(),
1482 1497 );
1483 1498 let matcher = IntersectionMatcher::new(m1, m2);
1484 1499
1485 1500 let mut set = HashSet::new();
1486 1501 set.insert(HgPathBuf::from_bytes(b"dir"));
1487 1502 assert_eq!(
1488 1503 matcher.visit_children_set(HgPath::new(b"")),
1489 1504 VisitChildrenSet::Set(set)
1490 1505 );
1491 1506
1492 1507 let mut set = HashSet::new();
1493 1508 set.insert(HgPathBuf::from_bytes(b"subdir"));
1494 1509 assert_eq!(
1495 1510 matcher.visit_children_set(HgPath::new(b"dir")),
1496 1511 VisitChildrenSet::Set(set)
1497 1512 );
1498 1513 let mut set = HashSet::new();
1499 1514 set.insert(HgPathBuf::from_bytes(b"x"));
1500 1515 assert_eq!(
1501 1516 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1502 1517 VisitChildrenSet::Set(set)
1503 1518 );
1504 1519 assert_eq!(
1505 1520 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1506 1521 VisitChildrenSet::Empty
1507 1522 );
1508 1523 assert_eq!(
1509 1524 matcher.visit_children_set(HgPath::new(b"folder")),
1510 1525 VisitChildrenSet::Empty
1511 1526 );
1512 1527 assert_eq!(
1513 1528 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1514 1529 VisitChildrenSet::Empty
1515 1530 );
1516 1531 // OPT: this should probably be 'all' not 'this'.
1517 1532 assert_eq!(
1518 1533 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1519 1534 VisitChildrenSet::This
1520 1535 );
1521 1536
1522 1537 // Diverging paths
1523 1538 let m1 = Box::new(
1524 1539 IncludeMatcher::new(vec![IgnorePattern::new(
1525 1540 PatternSyntax::RelPath,
1526 1541 b"dir/subdir/x",
1527 1542 Path::new(""),
1528 1543 )])
1529 1544 .unwrap(),
1530 1545 );
1531 1546 let m2 = Box::new(
1532 1547 IncludeMatcher::new(vec![IgnorePattern::new(
1533 1548 PatternSyntax::RelPath,
1534 1549 b"dir/subdir/z",
1535 1550 Path::new(""),
1536 1551 )])
1537 1552 .unwrap(),
1538 1553 );
1539 1554 let matcher = IntersectionMatcher::new(m1, m2);
1540 1555
1541 1556 // OPT: these next two could probably be Empty as well.
1542 1557 let mut set = HashSet::new();
1543 1558 set.insert(HgPathBuf::from_bytes(b"dir"));
1544 1559 assert_eq!(
1545 1560 matcher.visit_children_set(HgPath::new(b"")),
1546 1561 VisitChildrenSet::Set(set)
1547 1562 );
1548 1563 // OPT: these next two could probably be Empty as well.
1549 1564 let mut set = HashSet::new();
1550 1565 set.insert(HgPathBuf::from_bytes(b"subdir"));
1551 1566 assert_eq!(
1552 1567 matcher.visit_children_set(HgPath::new(b"dir")),
1553 1568 VisitChildrenSet::Set(set)
1554 1569 );
1555 1570 assert_eq!(
1556 1571 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1557 1572 VisitChildrenSet::Empty
1558 1573 );
1559 1574 assert_eq!(
1560 1575 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1561 1576 VisitChildrenSet::Empty
1562 1577 );
1563 1578 assert_eq!(
1564 1579 matcher.visit_children_set(HgPath::new(b"folder")),
1565 1580 VisitChildrenSet::Empty
1566 1581 );
1567 1582 assert_eq!(
1568 1583 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1569 1584 VisitChildrenSet::Empty
1570 1585 );
1571 1586 assert_eq!(
1572 1587 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1573 1588 VisitChildrenSet::Empty
1574 1589 );
1575 1590 }
1576 1591
1577 1592 #[test]
1578 1593 fn test_differencematcher() {
1579 1594 // Two alwaysmatchers should function like a nevermatcher
1580 1595 let m1 = AlwaysMatcher;
1581 1596 let m2 = AlwaysMatcher;
1582 1597 let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2));
1583 1598
1584 1599 for case in &[
1585 1600 &b""[..],
1586 1601 b"dir",
1587 1602 b"dir/subdir",
1588 1603 b"dir/subdir/z",
1589 1604 b"dir/foo",
1590 1605 b"dir/subdir/x",
1591 1606 b"folder",
1592 1607 ] {
1593 1608 assert_eq!(
1594 1609 matcher.visit_children_set(HgPath::new(case)),
1595 1610 VisitChildrenSet::Empty
1596 1611 );
1597 1612 }
1598 1613
1599 1614 // One always and one never should behave the same as an always
1600 1615 let m1 = AlwaysMatcher;
1601 1616 let m2 = NeverMatcher;
1602 1617 let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2));
1603 1618
1604 1619 for case in &[
1605 1620 &b""[..],
1606 1621 b"dir",
1607 1622 b"dir/subdir",
1608 1623 b"dir/subdir/z",
1609 1624 b"dir/foo",
1610 1625 b"dir/subdir/x",
1611 1626 b"folder",
1612 1627 ] {
1613 1628 assert_eq!(
1614 1629 matcher.visit_children_set(HgPath::new(case)),
1615 1630 VisitChildrenSet::Recursive
1616 1631 );
1617 1632 }
1618 1633
1619 1634 // Two include matchers
1620 1635 let m1 = Box::new(
1621 1636 IncludeMatcher::new(vec![IgnorePattern::new(
1622 1637 PatternSyntax::RelPath,
1623 1638 b"dir/subdir",
1624 1639 Path::new("/repo"),
1625 1640 )])
1626 1641 .unwrap(),
1627 1642 );
1628 1643 let m2 = Box::new(
1629 1644 IncludeMatcher::new(vec![IgnorePattern::new(
1630 1645 PatternSyntax::RootFiles,
1631 1646 b"dir",
1632 1647 Path::new("/repo"),
1633 1648 )])
1634 1649 .unwrap(),
1635 1650 );
1636 1651
1637 1652 let matcher = DifferenceMatcher::new(m1, m2);
1638 1653
1639 1654 let mut set = HashSet::new();
1640 1655 set.insert(HgPathBuf::from_bytes(b"dir"));
1641 1656 assert_eq!(
1642 1657 matcher.visit_children_set(HgPath::new(b"")),
1643 1658 VisitChildrenSet::Set(set)
1644 1659 );
1645 1660
1646 1661 let mut set = HashSet::new();
1647 1662 set.insert(HgPathBuf::from_bytes(b"subdir"));
1648 1663 assert_eq!(
1649 1664 matcher.visit_children_set(HgPath::new(b"dir")),
1650 1665 VisitChildrenSet::Set(set)
1651 1666 );
1652 1667 assert_eq!(
1653 1668 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1654 1669 VisitChildrenSet::Recursive
1655 1670 );
1656 1671 assert_eq!(
1657 1672 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1658 1673 VisitChildrenSet::Empty
1659 1674 );
1660 1675 assert_eq!(
1661 1676 matcher.visit_children_set(HgPath::new(b"folder")),
1662 1677 VisitChildrenSet::Empty
1663 1678 );
1664 1679 assert_eq!(
1665 1680 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1666 1681 VisitChildrenSet::This
1667 1682 );
1668 1683 assert_eq!(
1669 1684 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1670 1685 VisitChildrenSet::This
1671 1686 );
1672 1687 }
1673 1688 }
General Comments 0
You need to be logged in to leave comments. Login now