##// END OF EJS Templates
rust-matchers: better support file_set in IntersectionMatcher...
Spencer Baugh -
r51749:e037af7d default
parent child Browse files
Show More
@@ -1,1753 +1,1759 b''
1 1 // matchers.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Structs and types for matching files and directories.
9 9
10 10 use crate::{
11 11 dirstate::dirs_multiset::DirsChildrenMultiset,
12 12 filepatterns::{
13 13 build_single_regex, filter_subincludes, get_patterns_from_file,
14 14 PatternFileWarning, PatternResult,
15 15 },
16 16 utils::{
17 17 files::find_dirs,
18 18 hg_path::{HgPath, HgPathBuf},
19 19 Escaped,
20 20 },
21 21 DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
22 22 PatternSyntax,
23 23 };
24 24
25 25 use crate::dirstate::status::IgnoreFnType;
26 26 use crate::filepatterns::normalize_path_bytes;
27 27 use std::borrow::ToOwned;
28 28 use std::collections::HashSet;
29 29 use std::fmt::{Display, Error, Formatter};
30 30 use std::ops::Deref;
31 31 use std::path::{Path, PathBuf};
32 32
33 33 #[derive(Debug, PartialEq)]
34 34 pub enum VisitChildrenSet {
35 35 /// Don't visit anything
36 36 Empty,
37 37 /// Only visit this directory
38 38 This,
39 39 /// Visit this directory and these subdirectories
40 40 /// TODO Should we implement a `NonEmptyHashSet`?
41 41 Set(HashSet<HgPathBuf>),
42 42 /// Visit this directory and all subdirectories
43 43 Recursive,
44 44 }
45 45
46 46 pub trait Matcher: core::fmt::Debug {
47 47 /// Explicitly listed files
48 48 fn file_set(&self) -> Option<&HashSet<HgPathBuf>>;
49 49 /// Returns whether `filename` is in `file_set`
50 50 fn exact_match(&self, filename: &HgPath) -> bool;
51 51 /// Returns whether `filename` is matched by this matcher
52 52 fn matches(&self, filename: &HgPath) -> bool;
53 53 /// Decides whether a directory should be visited based on whether it
54 54 /// has potential matches in it or one of its subdirectories, and
55 55 /// potentially lists which subdirectories of that directory should be
56 56 /// visited. This is based on the match's primary, included, and excluded
57 57 /// patterns.
58 58 ///
59 59 /// # Example
60 60 ///
61 61 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
62 62 /// return the following values (assuming the implementation of
63 63 /// visit_children_set is capable of recognizing this; some implementations
64 64 /// are not).
65 65 ///
66 66 /// ```text
67 67 /// ```ignore
68 68 /// '' -> {'foo', 'qux'}
69 69 /// 'baz' -> set()
70 70 /// 'foo' -> {'bar'}
71 71 /// // Ideally this would be `Recursive`, but since the prefix nature of
72 72 /// // matchers is applied to the entire matcher, we have to downgrade this
73 73 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
74 74 /// // `RootFilesIn'-kind matcher being mixed in.
75 75 /// 'foo/bar' -> 'this'
76 76 /// 'qux' -> 'this'
77 77 /// ```
78 78 /// # Important
79 79 ///
80 80 /// Most matchers do not know if they're representing files or
81 81 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
82 82 /// file or a directory, so `visit_children_set('dir')` for most matchers
83 83 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
84 84 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
85 85 /// it may return `VisitChildrenSet::This`.
86 86 /// Do not rely on the return being a `HashSet` indicating that there are
87 87 /// no files in this dir to investigate (or equivalently that if there are
88 88 /// files to investigate in 'dir' that it will always return
89 89 /// `VisitChildrenSet::This`).
90 90 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet;
91 91 /// Matcher will match everything and `files_set()` will be empty:
92 92 /// optimization might be possible.
93 93 fn matches_everything(&self) -> bool;
94 94 /// Matcher will match exactly the files in `files_set()`: optimization
95 95 /// might be possible.
96 96 fn is_exact(&self) -> bool;
97 97 }
98 98
99 99 /// Matches everything.
100 100 ///```
101 101 /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
102 102 ///
103 103 /// let matcher = AlwaysMatcher;
104 104 ///
105 105 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
106 106 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
107 107 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
108 108 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
109 109 /// ```
110 110 #[derive(Debug)]
111 111 pub struct AlwaysMatcher;
112 112
113 113 impl Matcher for AlwaysMatcher {
114 114 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
115 115 None
116 116 }
117 117 fn exact_match(&self, _filename: &HgPath) -> bool {
118 118 false
119 119 }
120 120 fn matches(&self, _filename: &HgPath) -> bool {
121 121 true
122 122 }
123 123 fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
124 124 VisitChildrenSet::Recursive
125 125 }
126 126 fn matches_everything(&self) -> bool {
127 127 true
128 128 }
129 129 fn is_exact(&self) -> bool {
130 130 false
131 131 }
132 132 }
133 133
134 134 /// Matches nothing.
135 135 #[derive(Debug)]
136 136 pub struct NeverMatcher;
137 137
138 138 impl Matcher for NeverMatcher {
139 139 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
140 140 None
141 141 }
142 142 fn exact_match(&self, _filename: &HgPath) -> bool {
143 143 false
144 144 }
145 145 fn matches(&self, _filename: &HgPath) -> bool {
146 146 false
147 147 }
148 148 fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
149 149 VisitChildrenSet::Empty
150 150 }
151 151 fn matches_everything(&self) -> bool {
152 152 false
153 153 }
154 154 fn is_exact(&self) -> bool {
155 155 true
156 156 }
157 157 }
158 158
159 159 /// Matches the input files exactly. They are interpreted as paths, not
160 160 /// patterns.
161 161 ///
162 162 ///```
163 163 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::{HgPath, HgPathBuf} };
164 164 ///
165 165 /// let files = vec![HgPathBuf::from_bytes(b"a.txt"), HgPathBuf::from_bytes(br"re:.*\.c$")];
166 166 /// let matcher = FileMatcher::new(files).unwrap();
167 167 ///
168 168 /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
169 169 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
170 170 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
171 171 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
172 172 /// ```
173 173 #[derive(Debug)]
174 174 pub struct FileMatcher {
175 175 files: HashSet<HgPathBuf>,
176 176 dirs: DirsMultiset,
177 177 }
178 178
179 179 impl FileMatcher {
180 180 pub fn new(files: Vec<HgPathBuf>) -> Result<Self, DirstateMapError> {
181 181 let dirs = DirsMultiset::from_manifest(&files)?;
182 182 Ok(Self {
183 183 files: HashSet::from_iter(files.into_iter()),
184 184 dirs,
185 185 })
186 186 }
187 187 fn inner_matches(&self, filename: &HgPath) -> bool {
188 188 self.files.contains(filename.as_ref())
189 189 }
190 190 }
191 191
192 192 impl Matcher for FileMatcher {
193 193 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
194 194 Some(&self.files)
195 195 }
196 196 fn exact_match(&self, filename: &HgPath) -> bool {
197 197 self.inner_matches(filename)
198 198 }
199 199 fn matches(&self, filename: &HgPath) -> bool {
200 200 self.inner_matches(filename)
201 201 }
202 202 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
203 203 if self.files.is_empty() || !self.dirs.contains(&directory) {
204 204 return VisitChildrenSet::Empty;
205 205 }
206 206 let mut candidates: HashSet<HgPathBuf> =
207 207 self.dirs.iter().cloned().collect();
208 208
209 209 candidates.extend(self.files.iter().cloned());
210 210 candidates.remove(HgPath::new(b""));
211 211
212 212 if !directory.as_ref().is_empty() {
213 213 let directory = [directory.as_ref().as_bytes(), b"/"].concat();
214 214 candidates = candidates
215 215 .iter()
216 216 .filter_map(|c| {
217 217 if c.as_bytes().starts_with(&directory) {
218 218 Some(HgPathBuf::from_bytes(
219 219 &c.as_bytes()[directory.len()..],
220 220 ))
221 221 } else {
222 222 None
223 223 }
224 224 })
225 225 .collect();
226 226 }
227 227
228 228 // `self.dirs` includes all of the directories, recursively, so if
229 229 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
230 230 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
231 231 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
232 232 // subdir will be in there without a slash.
233 233 VisitChildrenSet::Set(
234 234 candidates
235 235 .into_iter()
236 236 .filter_map(|c| {
237 237 if c.bytes().all(|b| *b != b'/') {
238 238 Some(c)
239 239 } else {
240 240 None
241 241 }
242 242 })
243 243 .collect(),
244 244 )
245 245 }
246 246 fn matches_everything(&self) -> bool {
247 247 false
248 248 }
249 249 fn is_exact(&self) -> bool {
250 250 true
251 251 }
252 252 }
253 253
254 254 /// Matches files that are included in the ignore rules.
255 255 /// ```
256 256 /// use hg::{
257 257 /// matchers::{IncludeMatcher, Matcher},
258 258 /// IgnorePattern,
259 259 /// PatternSyntax,
260 260 /// utils::hg_path::HgPath
261 261 /// };
262 262 /// use std::path::Path;
263 263 /// ///
264 264 /// let ignore_patterns =
265 265 /// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
266 266 /// let matcher = IncludeMatcher::new(ignore_patterns).unwrap();
267 267 /// ///
268 268 /// assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
269 269 /// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
270 270 /// assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
271 271 /// assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
272 272 /// ```
273 273 pub struct IncludeMatcher<'a> {
274 274 patterns: Vec<u8>,
275 275 match_fn: IgnoreFnType<'a>,
276 276 /// Whether all the patterns match a prefix (i.e. recursively)
277 277 prefix: bool,
278 278 roots: HashSet<HgPathBuf>,
279 279 dirs: HashSet<HgPathBuf>,
280 280 parents: HashSet<HgPathBuf>,
281 281 }
282 282
283 283 impl core::fmt::Debug for IncludeMatcher<'_> {
284 284 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
285 285 f.debug_struct("IncludeMatcher")
286 286 .field("patterns", &String::from_utf8_lossy(&self.patterns))
287 287 .field("prefix", &self.prefix)
288 288 .field("roots", &self.roots)
289 289 .field("dirs", &self.dirs)
290 290 .field("parents", &self.parents)
291 291 .finish()
292 292 }
293 293 }
294 294
295 295 impl<'a> Matcher for IncludeMatcher<'a> {
296 296 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
297 297 None
298 298 }
299 299
300 300 fn exact_match(&self, _filename: &HgPath) -> bool {
301 301 false
302 302 }
303 303
304 304 fn matches(&self, filename: &HgPath) -> bool {
305 305 (self.match_fn)(filename)
306 306 }
307 307
308 308 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
309 309 let dir = directory;
310 310 if self.prefix && self.roots.contains(dir) {
311 311 return VisitChildrenSet::Recursive;
312 312 }
313 313 if self.roots.contains(HgPath::new(b""))
314 314 || self.roots.contains(dir)
315 315 || self.dirs.contains(dir)
316 316 || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
317 317 {
318 318 return VisitChildrenSet::This;
319 319 }
320 320
321 321 if self.parents.contains(dir.as_ref()) {
322 322 let multiset = self.get_all_parents_children();
323 323 if let Some(children) = multiset.get(dir) {
324 324 return VisitChildrenSet::Set(
325 325 children.iter().map(HgPathBuf::from).collect(),
326 326 );
327 327 }
328 328 }
329 329 VisitChildrenSet::Empty
330 330 }
331 331
332 332 fn matches_everything(&self) -> bool {
333 333 false
334 334 }
335 335
336 336 fn is_exact(&self) -> bool {
337 337 false
338 338 }
339 339 }
340 340
341 341 /// The union of multiple matchers. Will match if any of the matchers match.
342 342 #[derive(Debug)]
343 343 pub struct UnionMatcher {
344 344 matchers: Vec<Box<dyn Matcher + Sync>>,
345 345 }
346 346
347 347 impl Matcher for UnionMatcher {
348 348 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
349 349 None
350 350 }
351 351
352 352 fn exact_match(&self, _filename: &HgPath) -> bool {
353 353 false
354 354 }
355 355
356 356 fn matches(&self, filename: &HgPath) -> bool {
357 357 self.matchers.iter().any(|m| m.matches(filename))
358 358 }
359 359
360 360 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
361 361 let mut result = HashSet::new();
362 362 let mut this = false;
363 363 for matcher in self.matchers.iter() {
364 364 let visit = matcher.visit_children_set(directory);
365 365 match visit {
366 366 VisitChildrenSet::Empty => continue,
367 367 VisitChildrenSet::This => {
368 368 this = true;
369 369 // Don't break, we might have an 'all' in here.
370 370 continue;
371 371 }
372 372 VisitChildrenSet::Set(set) => {
373 373 result.extend(set);
374 374 }
375 375 VisitChildrenSet::Recursive => {
376 376 return visit;
377 377 }
378 378 }
379 379 }
380 380 if this {
381 381 return VisitChildrenSet::This;
382 382 }
383 383 if result.is_empty() {
384 384 VisitChildrenSet::Empty
385 385 } else {
386 386 VisitChildrenSet::Set(result)
387 387 }
388 388 }
389 389
390 390 fn matches_everything(&self) -> bool {
391 391 // TODO Maybe if all are AlwaysMatcher?
392 392 false
393 393 }
394 394
395 395 fn is_exact(&self) -> bool {
396 396 false
397 397 }
398 398 }
399 399
400 400 impl UnionMatcher {
401 401 pub fn new(matchers: Vec<Box<dyn Matcher + Sync>>) -> Self {
402 402 Self { matchers }
403 403 }
404 404 }
405 405
406 406 #[derive(Debug)]
407 407 pub struct IntersectionMatcher {
408 408 m1: Box<dyn Matcher + Sync>,
409 409 m2: Box<dyn Matcher + Sync>,
410 410 files: Option<HashSet<HgPathBuf>>,
411 411 }
412 412
413 413 impl Matcher for IntersectionMatcher {
414 414 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
415 415 self.files.as_ref()
416 416 }
417 417
418 418 fn exact_match(&self, filename: &HgPath) -> bool {
419 419 self.files.as_ref().map_or(false, |f| f.contains(filename))
420 420 }
421 421
422 422 fn matches(&self, filename: &HgPath) -> bool {
423 423 self.m1.matches(filename) && self.m2.matches(filename)
424 424 }
425 425
426 426 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
427 427 let m1_set = self.m1.visit_children_set(directory);
428 428 if m1_set == VisitChildrenSet::Empty {
429 429 return VisitChildrenSet::Empty;
430 430 }
431 431 let m2_set = self.m2.visit_children_set(directory);
432 432 if m2_set == VisitChildrenSet::Empty {
433 433 return VisitChildrenSet::Empty;
434 434 }
435 435
436 436 if m1_set == VisitChildrenSet::Recursive {
437 437 return m2_set;
438 438 } else if m2_set == VisitChildrenSet::Recursive {
439 439 return m1_set;
440 440 }
441 441
442 442 match (&m1_set, &m2_set) {
443 443 (VisitChildrenSet::Recursive, _) => m2_set,
444 444 (_, VisitChildrenSet::Recursive) => m1_set,
445 445 (VisitChildrenSet::This, _) | (_, VisitChildrenSet::This) => {
446 446 VisitChildrenSet::This
447 447 }
448 448 (VisitChildrenSet::Set(m1), VisitChildrenSet::Set(m2)) => {
449 449 let set: HashSet<_> = m1.intersection(m2).cloned().collect();
450 450 if set.is_empty() {
451 451 VisitChildrenSet::Empty
452 452 } else {
453 453 VisitChildrenSet::Set(set)
454 454 }
455 455 }
456 456 _ => unreachable!(),
457 457 }
458 458 }
459 459
460 460 fn matches_everything(&self) -> bool {
461 461 self.m1.matches_everything() && self.m2.matches_everything()
462 462 }
463 463
464 464 fn is_exact(&self) -> bool {
465 465 self.m1.is_exact() || self.m2.is_exact()
466 466 }
467 467 }
468 468
469 469 impl IntersectionMatcher {
470 470 pub fn new(
471 471 mut m1: Box<dyn Matcher + Sync>,
472 472 mut m2: Box<dyn Matcher + Sync>,
473 473 ) -> Self {
474 474 let files = if m1.is_exact() || m2.is_exact() {
475 475 if !m1.is_exact() {
476 476 std::mem::swap(&mut m1, &mut m2);
477 477 }
478 478 m1.file_set().map(|m1_files| {
479 479 m1_files.iter().cloned().filter(|f| m2.matches(f)).collect()
480 480 })
481 481 } else {
482 None
482 // without exact input file sets, we can't do an exact
483 // intersection, so we must over-approximate by
484 // unioning instead
485 m1.file_set().map(|m1_files| match m2.file_set() {
486 Some(m2_files) => m1_files.union(m2_files).cloned().collect(),
487 None => m1_files.iter().cloned().collect(),
488 })
483 489 };
484 490 Self { m1, m2, files }
485 491 }
486 492 }
487 493
488 494 #[derive(Debug)]
489 495 pub struct DifferenceMatcher {
490 496 base: Box<dyn Matcher + Sync>,
491 497 excluded: Box<dyn Matcher + Sync>,
492 498 files: Option<HashSet<HgPathBuf>>,
493 499 }
494 500
495 501 impl Matcher for DifferenceMatcher {
496 502 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
497 503 self.files.as_ref()
498 504 }
499 505
500 506 fn exact_match(&self, filename: &HgPath) -> bool {
501 507 self.files.as_ref().map_or(false, |f| f.contains(filename))
502 508 }
503 509
504 510 fn matches(&self, filename: &HgPath) -> bool {
505 511 self.base.matches(filename) && !self.excluded.matches(filename)
506 512 }
507 513
508 514 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
509 515 let excluded_set = self.excluded.visit_children_set(directory);
510 516 if excluded_set == VisitChildrenSet::Recursive {
511 517 return VisitChildrenSet::Empty;
512 518 }
513 519 let base_set = self.base.visit_children_set(directory);
514 520 // Possible values for base: 'recursive', 'this', set(...), set()
515 521 // Possible values for excluded: 'this', set(...), set()
516 522 // If excluded has nothing under here that we care about, return base,
517 523 // even if it's 'recursive'.
518 524 if excluded_set == VisitChildrenSet::Empty {
519 525 return base_set;
520 526 }
521 527 match base_set {
522 528 VisitChildrenSet::This | VisitChildrenSet::Recursive => {
523 529 // Never return 'recursive' here if excluded_set is any kind of
524 530 // non-empty (either 'this' or set(foo)), since excluded might
525 531 // return set() for a subdirectory.
526 532 VisitChildrenSet::This
527 533 }
528 534 set => {
529 535 // Possible values for base: set(...), set()
530 536 // Possible values for excluded: 'this', set(...)
531 537 // We ignore excluded set results. They're possibly incorrect:
532 538 // base = path:dir/subdir
533 539 // excluded=rootfilesin:dir,
534 540 // visit_children_set(''):
535 541 // base returns {'dir'}, excluded returns {'dir'}, if we
536 542 // subtracted we'd return set(), which is *not* correct, we
537 543 // still need to visit 'dir'!
538 544 set
539 545 }
540 546 }
541 547 }
542 548
543 549 fn matches_everything(&self) -> bool {
544 550 false
545 551 }
546 552
547 553 fn is_exact(&self) -> bool {
548 554 self.base.is_exact()
549 555 }
550 556 }
551 557
552 558 impl DifferenceMatcher {
553 559 pub fn new(
554 560 base: Box<dyn Matcher + Sync>,
555 561 excluded: Box<dyn Matcher + Sync>,
556 562 ) -> Self {
557 563 let base_is_exact = base.is_exact();
558 564 let base_files = base.file_set().map(ToOwned::to_owned);
559 565 let mut new = Self {
560 566 base,
561 567 excluded,
562 568 files: None,
563 569 };
564 570 if base_is_exact {
565 571 new.files = base_files.map(|files| {
566 572 files.iter().cloned().filter(|f| new.matches(f)).collect()
567 573 });
568 574 }
569 575 new
570 576 }
571 577 }
572 578
573 579 /// Wraps [`regex::bytes::Regex`] to improve performance in multithreaded
574 580 /// contexts.
575 581 ///
576 582 /// The `status` algorithm makes heavy use of threads, and calling `is_match`
577 583 /// from many threads at once is prone to contention, probably within the
578 584 /// scratch space needed as the regex DFA is built lazily.
579 585 ///
580 586 /// We are in the process of raising the issue upstream, but for now
581 587 /// the workaround used here is to store the `Regex` in a lazily populated
582 588 /// thread-local variable, sharing the initial read-only compilation, but
583 589 /// not the lazy dfa scratch space mentioned above.
584 590 ///
585 591 /// This reduces the contention observed with 16+ threads, but does not
586 592 /// completely remove it. Hopefully this can be addressed upstream.
587 593 struct RegexMatcher {
588 594 /// Compiled at the start of the status algorithm, used as a base for
589 595 /// cloning in each thread-local `self.local`, thus sharing the expensive
590 596 /// first compilation.
591 597 base: regex::bytes::Regex,
592 598 /// Thread-local variable that holds the `Regex` that is actually queried
593 599 /// from each thread.
594 600 local: thread_local::ThreadLocal<regex::bytes::Regex>,
595 601 }
596 602
597 603 impl RegexMatcher {
598 604 /// Returns whether the path matches the stored `Regex`.
599 605 pub fn is_match(&self, path: &HgPath) -> bool {
600 606 self.local
601 607 .get_or(|| self.base.clone())
602 608 .is_match(path.as_bytes())
603 609 }
604 610 }
605 611
606 612 /// Returns a function that matches an `HgPath` against the given regex
607 613 /// pattern.
608 614 ///
609 615 /// This can fail when the pattern is invalid or not supported by the
610 616 /// underlying engine (the `regex` crate), for instance anything with
611 617 /// back-references.
612 618 #[logging_timer::time("trace")]
613 619 fn re_matcher(pattern: &[u8]) -> PatternResult<RegexMatcher> {
614 620 use std::io::Write;
615 621
616 622 // The `regex` crate adds `.*` to the start and end of expressions if there
617 623 // are no anchors, so add the start anchor.
618 624 let mut escaped_bytes = vec![b'^', b'(', b'?', b':'];
619 625 for byte in pattern {
620 626 if *byte > 127 {
621 627 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
622 628 } else {
623 629 escaped_bytes.push(*byte);
624 630 }
625 631 }
626 632 escaped_bytes.push(b')');
627 633
628 634 // Avoid the cost of UTF8 checking
629 635 //
630 636 // # Safety
631 637 // This is safe because we escaped all non-ASCII bytes.
632 638 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
633 639 let re = regex::bytes::RegexBuilder::new(&pattern_string)
634 640 .unicode(false)
635 641 // Big repos with big `.hgignore` will hit the default limit and
636 642 // incur a significant performance hit. One repo's `hg status` hit
637 643 // multiple *minutes*.
638 644 .dfa_size_limit(50 * (1 << 20))
639 645 .build()
640 646 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
641 647
642 648 Ok(RegexMatcher {
643 649 base: re,
644 650 local: Default::default(),
645 651 })
646 652 }
647 653
648 654 /// Returns the regex pattern and a function that matches an `HgPath` against
649 655 /// said regex formed by the given ignore patterns.
650 656 fn build_regex_match<'a, 'b>(
651 657 ignore_patterns: &'a [IgnorePattern],
652 658 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'b>)> {
653 659 let mut regexps = vec![];
654 660 let mut exact_set = HashSet::new();
655 661
656 662 for pattern in ignore_patterns {
657 663 if let Some(re) = build_single_regex(pattern)? {
658 664 regexps.push(re);
659 665 } else {
660 666 let exact = normalize_path_bytes(&pattern.pattern);
661 667 exact_set.insert(HgPathBuf::from_bytes(&exact));
662 668 }
663 669 }
664 670
665 671 let full_regex = regexps.join(&b'|');
666 672
667 673 // An empty pattern would cause the regex engine to incorrectly match the
668 674 // (empty) root directory
669 675 let func = if !(regexps.is_empty()) {
670 676 let matcher = re_matcher(&full_regex)?;
671 677 let func = move |filename: &HgPath| {
672 678 exact_set.contains(filename) || matcher.is_match(filename)
673 679 };
674 680 Box::new(func) as IgnoreFnType
675 681 } else {
676 682 let func = move |filename: &HgPath| exact_set.contains(filename);
677 683 Box::new(func) as IgnoreFnType
678 684 };
679 685
680 686 Ok((full_regex, func))
681 687 }
682 688
683 689 /// Returns roots and directories corresponding to each pattern.
684 690 ///
685 691 /// This calculates the roots and directories exactly matching the patterns and
686 692 /// returns a tuple of (roots, dirs). It does not return other directories
687 693 /// which may also need to be considered, like the parent directories.
688 694 fn roots_and_dirs(
689 695 ignore_patterns: &[IgnorePattern],
690 696 ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
691 697 let mut roots = Vec::new();
692 698 let mut dirs = Vec::new();
693 699
694 700 for ignore_pattern in ignore_patterns {
695 701 let IgnorePattern {
696 702 syntax, pattern, ..
697 703 } = ignore_pattern;
698 704 match syntax {
699 705 PatternSyntax::RootGlob | PatternSyntax::Glob => {
700 706 let mut root = HgPathBuf::new();
701 707 for p in pattern.split(|c| *c == b'/') {
702 708 if p.iter()
703 709 .any(|c| matches!(*c, b'[' | b'{' | b'*' | b'?'))
704 710 {
705 711 break;
706 712 }
707 713 root.push(HgPathBuf::from_bytes(p).as_ref());
708 714 }
709 715 roots.push(root);
710 716 }
711 717 PatternSyntax::Path
712 718 | PatternSyntax::RelPath
713 719 | PatternSyntax::FilePath => {
714 720 let pat = HgPath::new(if pattern == b"." {
715 721 &[] as &[u8]
716 722 } else {
717 723 pattern
718 724 });
719 725 roots.push(pat.to_owned());
720 726 }
721 727 PatternSyntax::RootFiles => {
722 728 let pat = if pattern == b"." {
723 729 &[] as &[u8]
724 730 } else {
725 731 pattern
726 732 };
727 733 dirs.push(HgPathBuf::from_bytes(pat));
728 734 }
729 735 _ => {
730 736 roots.push(HgPathBuf::new());
731 737 }
732 738 }
733 739 }
734 740 (roots, dirs)
735 741 }
736 742
737 743 /// Paths extracted from patterns
738 744 #[derive(Debug, PartialEq)]
739 745 struct RootsDirsAndParents {
740 746 /// Directories to match recursively
741 747 pub roots: HashSet<HgPathBuf>,
742 748 /// Directories to match non-recursively
743 749 pub dirs: HashSet<HgPathBuf>,
744 750 /// Implicitly required directories to go to items in either roots or dirs
745 751 pub parents: HashSet<HgPathBuf>,
746 752 }
747 753
748 754 /// Extract roots, dirs and parents from patterns.
749 755 fn roots_dirs_and_parents(
750 756 ignore_patterns: &[IgnorePattern],
751 757 ) -> PatternResult<RootsDirsAndParents> {
752 758 let (roots, dirs) = roots_and_dirs(ignore_patterns);
753 759
754 760 let mut parents = HashSet::new();
755 761
756 762 parents.extend(
757 763 DirsMultiset::from_manifest(&dirs)
758 764 .map_err(|e| match e {
759 765 DirstateMapError::InvalidPath(e) => e,
760 766 _ => unreachable!(),
761 767 })?
762 768 .iter()
763 769 .map(ToOwned::to_owned),
764 770 );
765 771 parents.extend(
766 772 DirsMultiset::from_manifest(&roots)
767 773 .map_err(|e| match e {
768 774 DirstateMapError::InvalidPath(e) => e,
769 775 _ => unreachable!(),
770 776 })?
771 777 .iter()
772 778 .map(ToOwned::to_owned),
773 779 );
774 780
775 781 Ok(RootsDirsAndParents {
776 782 roots: HashSet::from_iter(roots),
777 783 dirs: HashSet::from_iter(dirs),
778 784 parents,
779 785 })
780 786 }
781 787
782 788 /// Returns a function that checks whether a given file (in the general sense)
783 789 /// should be matched.
784 790 fn build_match<'a>(
785 791 ignore_patterns: Vec<IgnorePattern>,
786 792 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> {
787 793 let mut match_funcs: Vec<IgnoreFnType<'a>> = vec![];
788 794 // For debugging and printing
789 795 let mut patterns = vec![];
790 796
791 797 let (subincludes, ignore_patterns) = filter_subincludes(ignore_patterns)?;
792 798
793 799 if !subincludes.is_empty() {
794 800 // Build prefix-based matcher functions for subincludes
795 801 let mut submatchers = FastHashMap::default();
796 802 let mut prefixes = vec![];
797 803
798 804 for sub_include in subincludes {
799 805 let matcher = IncludeMatcher::new(sub_include.included_patterns)?;
800 806 let match_fn =
801 807 Box::new(move |path: &HgPath| matcher.matches(path));
802 808 prefixes.push(sub_include.prefix.clone());
803 809 submatchers.insert(sub_include.prefix.clone(), match_fn);
804 810 }
805 811
806 812 let match_subinclude = move |filename: &HgPath| {
807 813 for prefix in prefixes.iter() {
808 814 if let Some(rel) = filename.relative_to(prefix) {
809 815 if (submatchers[prefix])(rel) {
810 816 return true;
811 817 }
812 818 }
813 819 }
814 820 false
815 821 };
816 822
817 823 match_funcs.push(Box::new(match_subinclude));
818 824 }
819 825
820 826 if !ignore_patterns.is_empty() {
821 827 // Either do dumb matching if all patterns are rootfiles, or match
822 828 // with a regex.
823 829 if ignore_patterns
824 830 .iter()
825 831 .all(|k| k.syntax == PatternSyntax::RootFiles)
826 832 {
827 833 let dirs: HashSet<_> = ignore_patterns
828 834 .iter()
829 835 .map(|k| k.pattern.to_owned())
830 836 .collect();
831 837 let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
832 838
833 839 let match_func = move |path: &HgPath| -> bool {
834 840 let path = path.as_bytes();
835 841 let i = path.iter().rfind(|a| **a == b'/');
836 842 let dir = if let Some(i) = i {
837 843 &path[..*i as usize]
838 844 } else {
839 845 b"."
840 846 };
841 847 dirs.contains(dir.deref())
842 848 };
843 849 match_funcs.push(Box::new(match_func));
844 850
845 851 patterns.extend(b"rootfilesin: ");
846 852 dirs_vec.sort();
847 853 patterns.extend(dirs_vec.escaped_bytes());
848 854 } else {
849 855 let (new_re, match_func) = build_regex_match(&ignore_patterns)?;
850 856 patterns = new_re;
851 857 match_funcs.push(match_func)
852 858 }
853 859 }
854 860
855 861 Ok(if match_funcs.len() == 1 {
856 862 (patterns, match_funcs.remove(0))
857 863 } else {
858 864 (
859 865 patterns,
860 866 Box::new(move |f: &HgPath| -> bool {
861 867 match_funcs.iter().any(|match_func| match_func(f))
862 868 }),
863 869 )
864 870 })
865 871 }
866 872
867 873 /// Parses all "ignore" files with their recursive includes and returns a
868 874 /// function that checks whether a given file (in the general sense) should be
869 875 /// ignored.
870 876 pub fn get_ignore_matcher<'a>(
871 877 mut all_pattern_files: Vec<PathBuf>,
872 878 root_dir: &Path,
873 879 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
874 880 ) -> PatternResult<(IncludeMatcher<'a>, Vec<PatternFileWarning>)> {
875 881 let mut all_patterns = vec![];
876 882 let mut all_warnings = vec![];
877 883
878 884 // Sort to make the ordering of calls to `inspect_pattern_bytes`
879 885 // deterministic even if the ordering of `all_pattern_files` is not (such
880 886 // as when a iteration order of a Python dict or Rust HashMap is involved).
881 887 // Sort by "string" representation instead of the default by component
882 888 // (with a Rust-specific definition of a component)
883 889 all_pattern_files
884 890 .sort_unstable_by(|a, b| a.as_os_str().cmp(b.as_os_str()));
885 891
886 892 for pattern_file in &all_pattern_files {
887 893 let (patterns, warnings) = get_patterns_from_file(
888 894 pattern_file,
889 895 root_dir,
890 896 inspect_pattern_bytes,
891 897 )?;
892 898
893 899 all_patterns.extend(patterns.to_owned());
894 900 all_warnings.extend(warnings);
895 901 }
896 902 let matcher = IncludeMatcher::new(all_patterns)?;
897 903 Ok((matcher, all_warnings))
898 904 }
899 905
900 906 /// Parses all "ignore" files with their recursive includes and returns a
901 907 /// function that checks whether a given file (in the general sense) should be
902 908 /// ignored.
903 909 pub fn get_ignore_function<'a>(
904 910 all_pattern_files: Vec<PathBuf>,
905 911 root_dir: &Path,
906 912 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
907 913 ) -> PatternResult<(IgnoreFnType<'a>, Vec<PatternFileWarning>)> {
908 914 let res =
909 915 get_ignore_matcher(all_pattern_files, root_dir, inspect_pattern_bytes);
910 916 res.map(|(matcher, all_warnings)| {
911 917 let res: IgnoreFnType<'a> =
912 918 Box::new(move |path: &HgPath| matcher.matches(path));
913 919
914 920 (res, all_warnings)
915 921 })
916 922 }
917 923
918 924 impl<'a> IncludeMatcher<'a> {
919 925 pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
920 926 let RootsDirsAndParents {
921 927 roots,
922 928 dirs,
923 929 parents,
924 930 } = roots_dirs_and_parents(&ignore_patterns)?;
925 931 let prefix = ignore_patterns.iter().all(|k| {
926 932 matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath)
927 933 });
928 934 let (patterns, match_fn) = build_match(ignore_patterns)?;
929 935
930 936 Ok(Self {
931 937 patterns,
932 938 match_fn,
933 939 prefix,
934 940 roots,
935 941 dirs,
936 942 parents,
937 943 })
938 944 }
939 945
940 946 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
941 947 // TODO cache
942 948 let thing = self
943 949 .dirs
944 950 .iter()
945 951 .chain(self.roots.iter())
946 952 .chain(self.parents.iter());
947 953 DirsChildrenMultiset::new(thing, Some(&self.parents))
948 954 }
949 955
950 956 pub fn debug_get_patterns(&self) -> &[u8] {
951 957 self.patterns.as_ref()
952 958 }
953 959 }
954 960
955 961 impl<'a> Display for IncludeMatcher<'a> {
956 962 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
957 963 // XXX What about exact matches?
958 964 // I'm not sure it's worth it to clone the HashSet and keep it
959 965 // around just in case someone wants to display the matcher, plus
960 966 // it's going to be unreadable after a few entries, but we need to
961 967 // inform in this display that exact matches are being used and are
962 968 // (on purpose) missing from the `includes`.
963 969 write!(
964 970 f,
965 971 "IncludeMatcher(includes='{}')",
966 972 String::from_utf8_lossy(&self.patterns.escaped_bytes())
967 973 )
968 974 }
969 975 }
970 976
971 977 #[cfg(test)]
972 978 mod tests {
973 979 use super::*;
974 980 use pretty_assertions::assert_eq;
975 981 use std::path::Path;
976 982
977 983 #[test]
978 984 fn test_roots_and_dirs() {
979 985 let pats = vec![
980 986 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
981 987 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
982 988 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
983 989 ];
984 990 let (roots, dirs) = roots_and_dirs(&pats);
985 991
986 992 assert_eq!(
987 993 roots,
988 994 vec!(
989 995 HgPathBuf::from_bytes(b"g/h"),
990 996 HgPathBuf::from_bytes(b"g/h"),
991 997 HgPathBuf::new()
992 998 ),
993 999 );
994 1000 assert_eq!(dirs, vec!());
995 1001 }
996 1002
997 1003 #[test]
998 1004 fn test_roots_dirs_and_parents() {
999 1005 let pats = vec![
1000 1006 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
1001 1007 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
1002 1008 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
1003 1009 ];
1004 1010
1005 1011 let mut roots = HashSet::new();
1006 1012 roots.insert(HgPathBuf::from_bytes(b"g/h"));
1007 1013 roots.insert(HgPathBuf::new());
1008 1014
1009 1015 let dirs = HashSet::new();
1010 1016
1011 1017 let mut parents = HashSet::new();
1012 1018 parents.insert(HgPathBuf::new());
1013 1019 parents.insert(HgPathBuf::from_bytes(b"g"));
1014 1020
1015 1021 assert_eq!(
1016 1022 roots_dirs_and_parents(&pats).unwrap(),
1017 1023 RootsDirsAndParents {
1018 1024 roots,
1019 1025 dirs,
1020 1026 parents
1021 1027 }
1022 1028 );
1023 1029 }
1024 1030
1025 1031 #[test]
1026 1032 fn test_filematcher_visit_children_set() {
1027 1033 // Visitchildrenset
1028 1034 let files = vec![HgPathBuf::from_bytes(b"dir/subdir/foo.txt")];
1029 1035 let matcher = FileMatcher::new(files).unwrap();
1030 1036
1031 1037 let mut set = HashSet::new();
1032 1038 set.insert(HgPathBuf::from_bytes(b"dir"));
1033 1039 assert_eq!(
1034 1040 matcher.visit_children_set(HgPath::new(b"")),
1035 1041 VisitChildrenSet::Set(set)
1036 1042 );
1037 1043
1038 1044 let mut set = HashSet::new();
1039 1045 set.insert(HgPathBuf::from_bytes(b"subdir"));
1040 1046 assert_eq!(
1041 1047 matcher.visit_children_set(HgPath::new(b"dir")),
1042 1048 VisitChildrenSet::Set(set)
1043 1049 );
1044 1050
1045 1051 let mut set = HashSet::new();
1046 1052 set.insert(HgPathBuf::from_bytes(b"foo.txt"));
1047 1053 assert_eq!(
1048 1054 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1049 1055 VisitChildrenSet::Set(set)
1050 1056 );
1051 1057
1052 1058 assert_eq!(
1053 1059 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1054 1060 VisitChildrenSet::Empty
1055 1061 );
1056 1062 assert_eq!(
1057 1063 matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
1058 1064 VisitChildrenSet::Empty
1059 1065 );
1060 1066 assert_eq!(
1061 1067 matcher.visit_children_set(HgPath::new(b"folder")),
1062 1068 VisitChildrenSet::Empty
1063 1069 );
1064 1070 }
1065 1071
1066 1072 #[test]
1067 1073 fn test_filematcher_visit_children_set_files_and_dirs() {
1068 1074 let files = vec![
1069 1075 HgPathBuf::from_bytes(b"rootfile.txt"),
1070 1076 HgPathBuf::from_bytes(b"a/file1.txt"),
1071 1077 HgPathBuf::from_bytes(b"a/b/file2.txt"),
1072 1078 // No file in a/b/c
1073 1079 HgPathBuf::from_bytes(b"a/b/c/d/file4.txt"),
1074 1080 ];
1075 1081 let matcher = FileMatcher::new(files).unwrap();
1076 1082
1077 1083 let mut set = HashSet::new();
1078 1084 set.insert(HgPathBuf::from_bytes(b"a"));
1079 1085 set.insert(HgPathBuf::from_bytes(b"rootfile.txt"));
1080 1086 assert_eq!(
1081 1087 matcher.visit_children_set(HgPath::new(b"")),
1082 1088 VisitChildrenSet::Set(set)
1083 1089 );
1084 1090
1085 1091 let mut set = HashSet::new();
1086 1092 set.insert(HgPathBuf::from_bytes(b"b"));
1087 1093 set.insert(HgPathBuf::from_bytes(b"file1.txt"));
1088 1094 assert_eq!(
1089 1095 matcher.visit_children_set(HgPath::new(b"a")),
1090 1096 VisitChildrenSet::Set(set)
1091 1097 );
1092 1098
1093 1099 let mut set = HashSet::new();
1094 1100 set.insert(HgPathBuf::from_bytes(b"c"));
1095 1101 set.insert(HgPathBuf::from_bytes(b"file2.txt"));
1096 1102 assert_eq!(
1097 1103 matcher.visit_children_set(HgPath::new(b"a/b")),
1098 1104 VisitChildrenSet::Set(set)
1099 1105 );
1100 1106
1101 1107 let mut set = HashSet::new();
1102 1108 set.insert(HgPathBuf::from_bytes(b"d"));
1103 1109 assert_eq!(
1104 1110 matcher.visit_children_set(HgPath::new(b"a/b/c")),
1105 1111 VisitChildrenSet::Set(set)
1106 1112 );
1107 1113 let mut set = HashSet::new();
1108 1114 set.insert(HgPathBuf::from_bytes(b"file4.txt"));
1109 1115 assert_eq!(
1110 1116 matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
1111 1117 VisitChildrenSet::Set(set)
1112 1118 );
1113 1119
1114 1120 assert_eq!(
1115 1121 matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
1116 1122 VisitChildrenSet::Empty
1117 1123 );
1118 1124 assert_eq!(
1119 1125 matcher.visit_children_set(HgPath::new(b"folder")),
1120 1126 VisitChildrenSet::Empty
1121 1127 );
1122 1128 }
1123 1129
1124 1130 #[test]
1125 1131 fn test_includematcher() {
1126 1132 // VisitchildrensetPrefix
1127 1133 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1128 1134 PatternSyntax::RelPath,
1129 1135 b"dir/subdir",
1130 1136 Path::new(""),
1131 1137 )])
1132 1138 .unwrap();
1133 1139
1134 1140 let mut set = HashSet::new();
1135 1141 set.insert(HgPathBuf::from_bytes(b"dir"));
1136 1142 assert_eq!(
1137 1143 matcher.visit_children_set(HgPath::new(b"")),
1138 1144 VisitChildrenSet::Set(set)
1139 1145 );
1140 1146
1141 1147 let mut set = HashSet::new();
1142 1148 set.insert(HgPathBuf::from_bytes(b"subdir"));
1143 1149 assert_eq!(
1144 1150 matcher.visit_children_set(HgPath::new(b"dir")),
1145 1151 VisitChildrenSet::Set(set)
1146 1152 );
1147 1153 assert_eq!(
1148 1154 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1149 1155 VisitChildrenSet::Recursive
1150 1156 );
1151 1157 // OPT: This should probably be 'all' if its parent is?
1152 1158 assert_eq!(
1153 1159 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1154 1160 VisitChildrenSet::This
1155 1161 );
1156 1162 assert_eq!(
1157 1163 matcher.visit_children_set(HgPath::new(b"folder")),
1158 1164 VisitChildrenSet::Empty
1159 1165 );
1160 1166
1161 1167 // VisitchildrensetRootfilesin
1162 1168 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1163 1169 PatternSyntax::RootFiles,
1164 1170 b"dir/subdir",
1165 1171 Path::new(""),
1166 1172 )])
1167 1173 .unwrap();
1168 1174
1169 1175 let mut set = HashSet::new();
1170 1176 set.insert(HgPathBuf::from_bytes(b"dir"));
1171 1177 assert_eq!(
1172 1178 matcher.visit_children_set(HgPath::new(b"")),
1173 1179 VisitChildrenSet::Set(set)
1174 1180 );
1175 1181
1176 1182 let mut set = HashSet::new();
1177 1183 set.insert(HgPathBuf::from_bytes(b"subdir"));
1178 1184 assert_eq!(
1179 1185 matcher.visit_children_set(HgPath::new(b"dir")),
1180 1186 VisitChildrenSet::Set(set)
1181 1187 );
1182 1188
1183 1189 assert_eq!(
1184 1190 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1185 1191 VisitChildrenSet::This
1186 1192 );
1187 1193 assert_eq!(
1188 1194 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1189 1195 VisitChildrenSet::Empty
1190 1196 );
1191 1197 assert_eq!(
1192 1198 matcher.visit_children_set(HgPath::new(b"folder")),
1193 1199 VisitChildrenSet::Empty
1194 1200 );
1195 1201
1196 1202 // VisitchildrensetGlob
1197 1203 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1198 1204 PatternSyntax::Glob,
1199 1205 b"dir/z*",
1200 1206 Path::new(""),
1201 1207 )])
1202 1208 .unwrap();
1203 1209
1204 1210 let mut set = HashSet::new();
1205 1211 set.insert(HgPathBuf::from_bytes(b"dir"));
1206 1212 assert_eq!(
1207 1213 matcher.visit_children_set(HgPath::new(b"")),
1208 1214 VisitChildrenSet::Set(set)
1209 1215 );
1210 1216 assert_eq!(
1211 1217 matcher.visit_children_set(HgPath::new(b"folder")),
1212 1218 VisitChildrenSet::Empty
1213 1219 );
1214 1220 assert_eq!(
1215 1221 matcher.visit_children_set(HgPath::new(b"dir")),
1216 1222 VisitChildrenSet::This
1217 1223 );
1218 1224 // OPT: these should probably be set().
1219 1225 assert_eq!(
1220 1226 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1221 1227 VisitChildrenSet::This
1222 1228 );
1223 1229 assert_eq!(
1224 1230 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1225 1231 VisitChildrenSet::This
1226 1232 );
1227 1233
1228 1234 // VisitchildrensetFilePath
1229 1235 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1230 1236 PatternSyntax::FilePath,
1231 1237 b"dir/z",
1232 1238 Path::new(""),
1233 1239 )])
1234 1240 .unwrap();
1235 1241
1236 1242 let mut set = HashSet::new();
1237 1243 set.insert(HgPathBuf::from_bytes(b"dir"));
1238 1244 assert_eq!(
1239 1245 matcher.visit_children_set(HgPath::new(b"")),
1240 1246 VisitChildrenSet::Set(set)
1241 1247 );
1242 1248 assert_eq!(
1243 1249 matcher.visit_children_set(HgPath::new(b"folder")),
1244 1250 VisitChildrenSet::Empty
1245 1251 );
1246 1252 let mut set = HashSet::new();
1247 1253 set.insert(HgPathBuf::from_bytes(b"z"));
1248 1254 assert_eq!(
1249 1255 matcher.visit_children_set(HgPath::new(b"dir")),
1250 1256 VisitChildrenSet::Set(set)
1251 1257 );
1252 1258 // OPT: these should probably be set().
1253 1259 assert_eq!(
1254 1260 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1255 1261 VisitChildrenSet::Empty
1256 1262 );
1257 1263 assert_eq!(
1258 1264 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1259 1265 VisitChildrenSet::Empty
1260 1266 );
1261 1267
1262 1268 // Test multiple patterns
1263 1269 let matcher = IncludeMatcher::new(vec![
1264 1270 IgnorePattern::new(PatternSyntax::RelPath, b"foo", Path::new("")),
1265 1271 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
1266 1272 ])
1267 1273 .unwrap();
1268 1274
1269 1275 assert_eq!(
1270 1276 matcher.visit_children_set(HgPath::new(b"")),
1271 1277 VisitChildrenSet::This
1272 1278 );
1273 1279
1274 1280 // Test multiple patterns
1275 1281 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1276 1282 PatternSyntax::Glob,
1277 1283 b"**/*.exe",
1278 1284 Path::new(""),
1279 1285 )])
1280 1286 .unwrap();
1281 1287
1282 1288 assert_eq!(
1283 1289 matcher.visit_children_set(HgPath::new(b"")),
1284 1290 VisitChildrenSet::This
1285 1291 );
1286 1292 }
1287 1293
1288 1294 #[test]
1289 1295 fn test_unionmatcher() {
1290 1296 // Path + Rootfiles
1291 1297 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1292 1298 PatternSyntax::RelPath,
1293 1299 b"dir/subdir",
1294 1300 Path::new(""),
1295 1301 )])
1296 1302 .unwrap();
1297 1303 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1298 1304 PatternSyntax::RootFiles,
1299 1305 b"dir",
1300 1306 Path::new(""),
1301 1307 )])
1302 1308 .unwrap();
1303 1309 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1304 1310
1305 1311 let mut set = HashSet::new();
1306 1312 set.insert(HgPathBuf::from_bytes(b"dir"));
1307 1313 assert_eq!(
1308 1314 matcher.visit_children_set(HgPath::new(b"")),
1309 1315 VisitChildrenSet::Set(set)
1310 1316 );
1311 1317 assert_eq!(
1312 1318 matcher.visit_children_set(HgPath::new(b"dir")),
1313 1319 VisitChildrenSet::This
1314 1320 );
1315 1321 assert_eq!(
1316 1322 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1317 1323 VisitChildrenSet::Recursive
1318 1324 );
1319 1325 assert_eq!(
1320 1326 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1321 1327 VisitChildrenSet::Empty
1322 1328 );
1323 1329 assert_eq!(
1324 1330 matcher.visit_children_set(HgPath::new(b"folder")),
1325 1331 VisitChildrenSet::Empty
1326 1332 );
1327 1333 assert_eq!(
1328 1334 matcher.visit_children_set(HgPath::new(b"folder")),
1329 1335 VisitChildrenSet::Empty
1330 1336 );
1331 1337
1332 1338 // OPT: These next two could be 'all' instead of 'this'.
1333 1339 assert_eq!(
1334 1340 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1335 1341 VisitChildrenSet::This
1336 1342 );
1337 1343 assert_eq!(
1338 1344 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1339 1345 VisitChildrenSet::This
1340 1346 );
1341 1347
1342 1348 // Path + unrelated Path
1343 1349 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1344 1350 PatternSyntax::RelPath,
1345 1351 b"dir/subdir",
1346 1352 Path::new(""),
1347 1353 )])
1348 1354 .unwrap();
1349 1355 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1350 1356 PatternSyntax::RelPath,
1351 1357 b"folder",
1352 1358 Path::new(""),
1353 1359 )])
1354 1360 .unwrap();
1355 1361 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1356 1362
1357 1363 let mut set = HashSet::new();
1358 1364 set.insert(HgPathBuf::from_bytes(b"folder"));
1359 1365 set.insert(HgPathBuf::from_bytes(b"dir"));
1360 1366 assert_eq!(
1361 1367 matcher.visit_children_set(HgPath::new(b"")),
1362 1368 VisitChildrenSet::Set(set)
1363 1369 );
1364 1370 let mut set = HashSet::new();
1365 1371 set.insert(HgPathBuf::from_bytes(b"subdir"));
1366 1372 assert_eq!(
1367 1373 matcher.visit_children_set(HgPath::new(b"dir")),
1368 1374 VisitChildrenSet::Set(set)
1369 1375 );
1370 1376
1371 1377 assert_eq!(
1372 1378 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1373 1379 VisitChildrenSet::Recursive
1374 1380 );
1375 1381 assert_eq!(
1376 1382 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1377 1383 VisitChildrenSet::Empty
1378 1384 );
1379 1385
1380 1386 assert_eq!(
1381 1387 matcher.visit_children_set(HgPath::new(b"folder")),
1382 1388 VisitChildrenSet::Recursive
1383 1389 );
1384 1390 // OPT: These next two could be 'all' instead of 'this'.
1385 1391 assert_eq!(
1386 1392 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1387 1393 VisitChildrenSet::This
1388 1394 );
1389 1395 assert_eq!(
1390 1396 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1391 1397 VisitChildrenSet::This
1392 1398 );
1393 1399
1394 1400 // Path + subpath
1395 1401 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1396 1402 PatternSyntax::RelPath,
1397 1403 b"dir/subdir/x",
1398 1404 Path::new(""),
1399 1405 )])
1400 1406 .unwrap();
1401 1407 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1402 1408 PatternSyntax::RelPath,
1403 1409 b"dir/subdir",
1404 1410 Path::new(""),
1405 1411 )])
1406 1412 .unwrap();
1407 1413 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1408 1414
1409 1415 let mut set = HashSet::new();
1410 1416 set.insert(HgPathBuf::from_bytes(b"dir"));
1411 1417 assert_eq!(
1412 1418 matcher.visit_children_set(HgPath::new(b"")),
1413 1419 VisitChildrenSet::Set(set)
1414 1420 );
1415 1421 let mut set = HashSet::new();
1416 1422 set.insert(HgPathBuf::from_bytes(b"subdir"));
1417 1423 assert_eq!(
1418 1424 matcher.visit_children_set(HgPath::new(b"dir")),
1419 1425 VisitChildrenSet::Set(set)
1420 1426 );
1421 1427
1422 1428 assert_eq!(
1423 1429 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1424 1430 VisitChildrenSet::Recursive
1425 1431 );
1426 1432 assert_eq!(
1427 1433 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1428 1434 VisitChildrenSet::Empty
1429 1435 );
1430 1436
1431 1437 assert_eq!(
1432 1438 matcher.visit_children_set(HgPath::new(b"folder")),
1433 1439 VisitChildrenSet::Empty
1434 1440 );
1435 1441 assert_eq!(
1436 1442 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1437 1443 VisitChildrenSet::Recursive
1438 1444 );
1439 1445 // OPT: this should probably be 'all' not 'this'.
1440 1446 assert_eq!(
1441 1447 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1442 1448 VisitChildrenSet::This
1443 1449 );
1444 1450 }
1445 1451
1446 1452 #[test]
1447 1453 fn test_intersectionmatcher() {
1448 1454 // Include path + Include rootfiles
1449 1455 let m1 = Box::new(
1450 1456 IncludeMatcher::new(vec![IgnorePattern::new(
1451 1457 PatternSyntax::RelPath,
1452 1458 b"dir/subdir",
1453 1459 Path::new(""),
1454 1460 )])
1455 1461 .unwrap(),
1456 1462 );
1457 1463 let m2 = Box::new(
1458 1464 IncludeMatcher::new(vec![IgnorePattern::new(
1459 1465 PatternSyntax::RootFiles,
1460 1466 b"dir",
1461 1467 Path::new(""),
1462 1468 )])
1463 1469 .unwrap(),
1464 1470 );
1465 1471 let matcher = IntersectionMatcher::new(m1, m2);
1466 1472
1467 1473 let mut set = HashSet::new();
1468 1474 set.insert(HgPathBuf::from_bytes(b"dir"));
1469 1475 assert_eq!(
1470 1476 matcher.visit_children_set(HgPath::new(b"")),
1471 1477 VisitChildrenSet::Set(set)
1472 1478 );
1473 1479 assert_eq!(
1474 1480 matcher.visit_children_set(HgPath::new(b"dir")),
1475 1481 VisitChildrenSet::This
1476 1482 );
1477 1483 assert_eq!(
1478 1484 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1479 1485 VisitChildrenSet::Empty
1480 1486 );
1481 1487 assert_eq!(
1482 1488 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1483 1489 VisitChildrenSet::Empty
1484 1490 );
1485 1491 assert_eq!(
1486 1492 matcher.visit_children_set(HgPath::new(b"folder")),
1487 1493 VisitChildrenSet::Empty
1488 1494 );
1489 1495 assert_eq!(
1490 1496 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1491 1497 VisitChildrenSet::Empty
1492 1498 );
1493 1499 assert_eq!(
1494 1500 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1495 1501 VisitChildrenSet::Empty
1496 1502 );
1497 1503
1498 1504 // Non intersecting paths
1499 1505 let m1 = Box::new(
1500 1506 IncludeMatcher::new(vec![IgnorePattern::new(
1501 1507 PatternSyntax::RelPath,
1502 1508 b"dir/subdir",
1503 1509 Path::new(""),
1504 1510 )])
1505 1511 .unwrap(),
1506 1512 );
1507 1513 let m2 = Box::new(
1508 1514 IncludeMatcher::new(vec![IgnorePattern::new(
1509 1515 PatternSyntax::RelPath,
1510 1516 b"folder",
1511 1517 Path::new(""),
1512 1518 )])
1513 1519 .unwrap(),
1514 1520 );
1515 1521 let matcher = IntersectionMatcher::new(m1, m2);
1516 1522
1517 1523 assert_eq!(
1518 1524 matcher.visit_children_set(HgPath::new(b"")),
1519 1525 VisitChildrenSet::Empty
1520 1526 );
1521 1527 assert_eq!(
1522 1528 matcher.visit_children_set(HgPath::new(b"dir")),
1523 1529 VisitChildrenSet::Empty
1524 1530 );
1525 1531 assert_eq!(
1526 1532 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1527 1533 VisitChildrenSet::Empty
1528 1534 );
1529 1535 assert_eq!(
1530 1536 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1531 1537 VisitChildrenSet::Empty
1532 1538 );
1533 1539 assert_eq!(
1534 1540 matcher.visit_children_set(HgPath::new(b"folder")),
1535 1541 VisitChildrenSet::Empty
1536 1542 );
1537 1543 assert_eq!(
1538 1544 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1539 1545 VisitChildrenSet::Empty
1540 1546 );
1541 1547 assert_eq!(
1542 1548 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1543 1549 VisitChildrenSet::Empty
1544 1550 );
1545 1551
1546 1552 // Nested paths
1547 1553 let m1 = Box::new(
1548 1554 IncludeMatcher::new(vec![IgnorePattern::new(
1549 1555 PatternSyntax::RelPath,
1550 1556 b"dir/subdir/x",
1551 1557 Path::new(""),
1552 1558 )])
1553 1559 .unwrap(),
1554 1560 );
1555 1561 let m2 = Box::new(
1556 1562 IncludeMatcher::new(vec![IgnorePattern::new(
1557 1563 PatternSyntax::RelPath,
1558 1564 b"dir/subdir",
1559 1565 Path::new(""),
1560 1566 )])
1561 1567 .unwrap(),
1562 1568 );
1563 1569 let matcher = IntersectionMatcher::new(m1, m2);
1564 1570
1565 1571 let mut set = HashSet::new();
1566 1572 set.insert(HgPathBuf::from_bytes(b"dir"));
1567 1573 assert_eq!(
1568 1574 matcher.visit_children_set(HgPath::new(b"")),
1569 1575 VisitChildrenSet::Set(set)
1570 1576 );
1571 1577
1572 1578 let mut set = HashSet::new();
1573 1579 set.insert(HgPathBuf::from_bytes(b"subdir"));
1574 1580 assert_eq!(
1575 1581 matcher.visit_children_set(HgPath::new(b"dir")),
1576 1582 VisitChildrenSet::Set(set)
1577 1583 );
1578 1584 let mut set = HashSet::new();
1579 1585 set.insert(HgPathBuf::from_bytes(b"x"));
1580 1586 assert_eq!(
1581 1587 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1582 1588 VisitChildrenSet::Set(set)
1583 1589 );
1584 1590 assert_eq!(
1585 1591 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1586 1592 VisitChildrenSet::Empty
1587 1593 );
1588 1594 assert_eq!(
1589 1595 matcher.visit_children_set(HgPath::new(b"folder")),
1590 1596 VisitChildrenSet::Empty
1591 1597 );
1592 1598 assert_eq!(
1593 1599 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1594 1600 VisitChildrenSet::Empty
1595 1601 );
1596 1602 // OPT: this should probably be 'all' not 'this'.
1597 1603 assert_eq!(
1598 1604 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1599 1605 VisitChildrenSet::This
1600 1606 );
1601 1607
1602 1608 // Diverging paths
1603 1609 let m1 = Box::new(
1604 1610 IncludeMatcher::new(vec![IgnorePattern::new(
1605 1611 PatternSyntax::RelPath,
1606 1612 b"dir/subdir/x",
1607 1613 Path::new(""),
1608 1614 )])
1609 1615 .unwrap(),
1610 1616 );
1611 1617 let m2 = Box::new(
1612 1618 IncludeMatcher::new(vec![IgnorePattern::new(
1613 1619 PatternSyntax::RelPath,
1614 1620 b"dir/subdir/z",
1615 1621 Path::new(""),
1616 1622 )])
1617 1623 .unwrap(),
1618 1624 );
1619 1625 let matcher = IntersectionMatcher::new(m1, m2);
1620 1626
1621 1627 // OPT: these next two could probably be Empty as well.
1622 1628 let mut set = HashSet::new();
1623 1629 set.insert(HgPathBuf::from_bytes(b"dir"));
1624 1630 assert_eq!(
1625 1631 matcher.visit_children_set(HgPath::new(b"")),
1626 1632 VisitChildrenSet::Set(set)
1627 1633 );
1628 1634 // OPT: these next two could probably be Empty as well.
1629 1635 let mut set = HashSet::new();
1630 1636 set.insert(HgPathBuf::from_bytes(b"subdir"));
1631 1637 assert_eq!(
1632 1638 matcher.visit_children_set(HgPath::new(b"dir")),
1633 1639 VisitChildrenSet::Set(set)
1634 1640 );
1635 1641 assert_eq!(
1636 1642 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1637 1643 VisitChildrenSet::Empty
1638 1644 );
1639 1645 assert_eq!(
1640 1646 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1641 1647 VisitChildrenSet::Empty
1642 1648 );
1643 1649 assert_eq!(
1644 1650 matcher.visit_children_set(HgPath::new(b"folder")),
1645 1651 VisitChildrenSet::Empty
1646 1652 );
1647 1653 assert_eq!(
1648 1654 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1649 1655 VisitChildrenSet::Empty
1650 1656 );
1651 1657 assert_eq!(
1652 1658 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1653 1659 VisitChildrenSet::Empty
1654 1660 );
1655 1661 }
1656 1662
1657 1663 #[test]
1658 1664 fn test_differencematcher() {
1659 1665 // Two alwaysmatchers should function like a nevermatcher
1660 1666 let m1 = AlwaysMatcher;
1661 1667 let m2 = AlwaysMatcher;
1662 1668 let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2));
1663 1669
1664 1670 for case in &[
1665 1671 &b""[..],
1666 1672 b"dir",
1667 1673 b"dir/subdir",
1668 1674 b"dir/subdir/z",
1669 1675 b"dir/foo",
1670 1676 b"dir/subdir/x",
1671 1677 b"folder",
1672 1678 ] {
1673 1679 assert_eq!(
1674 1680 matcher.visit_children_set(HgPath::new(case)),
1675 1681 VisitChildrenSet::Empty
1676 1682 );
1677 1683 }
1678 1684
1679 1685 // One always and one never should behave the same as an always
1680 1686 let m1 = AlwaysMatcher;
1681 1687 let m2 = NeverMatcher;
1682 1688 let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2));
1683 1689
1684 1690 for case in &[
1685 1691 &b""[..],
1686 1692 b"dir",
1687 1693 b"dir/subdir",
1688 1694 b"dir/subdir/z",
1689 1695 b"dir/foo",
1690 1696 b"dir/subdir/x",
1691 1697 b"folder",
1692 1698 ] {
1693 1699 assert_eq!(
1694 1700 matcher.visit_children_set(HgPath::new(case)),
1695 1701 VisitChildrenSet::Recursive
1696 1702 );
1697 1703 }
1698 1704
1699 1705 // Two include matchers
1700 1706 let m1 = Box::new(
1701 1707 IncludeMatcher::new(vec![IgnorePattern::new(
1702 1708 PatternSyntax::RelPath,
1703 1709 b"dir/subdir",
1704 1710 Path::new("/repo"),
1705 1711 )])
1706 1712 .unwrap(),
1707 1713 );
1708 1714 let m2 = Box::new(
1709 1715 IncludeMatcher::new(vec![IgnorePattern::new(
1710 1716 PatternSyntax::RootFiles,
1711 1717 b"dir",
1712 1718 Path::new("/repo"),
1713 1719 )])
1714 1720 .unwrap(),
1715 1721 );
1716 1722
1717 1723 let matcher = DifferenceMatcher::new(m1, m2);
1718 1724
1719 1725 let mut set = HashSet::new();
1720 1726 set.insert(HgPathBuf::from_bytes(b"dir"));
1721 1727 assert_eq!(
1722 1728 matcher.visit_children_set(HgPath::new(b"")),
1723 1729 VisitChildrenSet::Set(set)
1724 1730 );
1725 1731
1726 1732 let mut set = HashSet::new();
1727 1733 set.insert(HgPathBuf::from_bytes(b"subdir"));
1728 1734 assert_eq!(
1729 1735 matcher.visit_children_set(HgPath::new(b"dir")),
1730 1736 VisitChildrenSet::Set(set)
1731 1737 );
1732 1738 assert_eq!(
1733 1739 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1734 1740 VisitChildrenSet::Recursive
1735 1741 );
1736 1742 assert_eq!(
1737 1743 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1738 1744 VisitChildrenSet::Empty
1739 1745 );
1740 1746 assert_eq!(
1741 1747 matcher.visit_children_set(HgPath::new(b"folder")),
1742 1748 VisitChildrenSet::Empty
1743 1749 );
1744 1750 assert_eq!(
1745 1751 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1746 1752 VisitChildrenSet::This
1747 1753 );
1748 1754 assert_eq!(
1749 1755 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1750 1756 VisitChildrenSet::This
1751 1757 );
1752 1758 }
1753 1759 }
General Comments 0
You need to be logged in to leave comments. Login now