##// END OF EJS Templates
rust-matchers: add TODO about incomplete `Display` for `IncludeMatcher`...
Raphaël Gomès -
r45312:de0fb446 default
parent child Browse files
Show More
@@ -1,948 +1,954 b''
1 1 // matchers.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Structs and types for matching files and directories.
9 9
10 10 #[cfg(feature = "with-re2")]
11 11 use crate::re2::Re2;
12 12 use crate::{
13 13 dirstate::dirs_multiset::DirsChildrenMultiset,
14 14 filepatterns::{
15 15 build_single_regex, filter_subincludes, get_patterns_from_file,
16 16 PatternFileWarning, PatternResult, SubInclude,
17 17 },
18 18 utils::{
19 19 files::find_dirs,
20 20 hg_path::{HgPath, HgPathBuf},
21 21 Escaped,
22 22 },
23 23 DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
24 24 PatternSyntax,
25 25 };
26 26
27 27 use crate::filepatterns::normalize_path_bytes;
28 28 use std::borrow::ToOwned;
29 29 use std::collections::HashSet;
30 30 use std::fmt::{Display, Error, Formatter};
31 31 use std::iter::FromIterator;
32 32 use std::ops::Deref;
33 33 use std::path::{Path, PathBuf};
34 34
35 35 use micro_timer::timed;
36 36
37 37 #[derive(Debug, PartialEq)]
38 38 pub enum VisitChildrenSet<'a> {
39 39 /// Don't visit anything
40 40 Empty,
41 41 /// Only visit this directory
42 42 This,
43 43 /// Visit this directory and these subdirectories
44 44 /// TODO Should we implement a `NonEmptyHashSet`?
45 45 Set(HashSet<&'a HgPath>),
46 46 /// Visit this directory and all subdirectories
47 47 Recursive,
48 48 }
49 49
50 50 pub trait Matcher {
51 51 /// Explicitly listed files
52 52 fn file_set(&self) -> Option<&HashSet<&HgPath>>;
53 53 /// Returns whether `filename` is in `file_set`
54 54 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool;
55 55 /// Returns whether `filename` is matched by this matcher
56 56 fn matches(&self, filename: impl AsRef<HgPath>) -> bool;
57 57 /// Decides whether a directory should be visited based on whether it
58 58 /// has potential matches in it or one of its subdirectories, and
59 59 /// potentially lists which subdirectories of that directory should be
60 60 /// visited. This is based on the match's primary, included, and excluded
61 61 /// patterns.
62 62 ///
63 63 /// # Example
64 64 ///
65 65 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
66 66 /// return the following values (assuming the implementation of
67 67 /// visit_children_set is capable of recognizing this; some implementations
68 68 /// are not).
69 69 ///
70 70 /// ```text
71 71 /// ```ignore
72 72 /// '' -> {'foo', 'qux'}
73 73 /// 'baz' -> set()
74 74 /// 'foo' -> {'bar'}
75 75 /// // Ideally this would be `Recursive`, but since the prefix nature of
76 76 /// // matchers is applied to the entire matcher, we have to downgrade this
77 77 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
78 78 /// // `RootFilesIn'-kind matcher being mixed in.
79 79 /// 'foo/bar' -> 'this'
80 80 /// 'qux' -> 'this'
81 81 /// ```
82 82 /// # Important
83 83 ///
84 84 /// Most matchers do not know if they're representing files or
85 85 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
86 86 /// file or a directory, so `visit_children_set('dir')` for most matchers
87 87 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
88 88 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
89 89 /// it may return `VisitChildrenSet::This`.
90 90 /// Do not rely on the return being a `HashSet` indicating that there are
91 91 /// no files in this dir to investigate (or equivalently that if there are
92 92 /// files to investigate in 'dir' that it will always return
93 93 /// `VisitChildrenSet::This`).
94 94 fn visit_children_set(
95 95 &self,
96 96 directory: impl AsRef<HgPath>,
97 97 ) -> VisitChildrenSet;
98 98 /// Matcher will match everything and `files_set()` will be empty:
99 99 /// optimization might be possible.
100 100 fn matches_everything(&self) -> bool;
101 101 /// Matcher will match exactly the files in `files_set()`: optimization
102 102 /// might be possible.
103 103 fn is_exact(&self) -> bool;
104 104 }
105 105
106 106 /// Matches everything.
107 107 ///```
108 108 /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
109 109 ///
110 110 /// let matcher = AlwaysMatcher;
111 111 ///
112 112 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
113 113 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
114 114 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
115 115 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
116 116 /// ```
117 117 #[derive(Debug)]
118 118 pub struct AlwaysMatcher;
119 119
120 120 impl Matcher for AlwaysMatcher {
121 121 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
122 122 None
123 123 }
124 124 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
125 125 false
126 126 }
127 127 fn matches(&self, _filename: impl AsRef<HgPath>) -> bool {
128 128 true
129 129 }
130 130 fn visit_children_set(
131 131 &self,
132 132 _directory: impl AsRef<HgPath>,
133 133 ) -> VisitChildrenSet {
134 134 VisitChildrenSet::Recursive
135 135 }
136 136 fn matches_everything(&self) -> bool {
137 137 true
138 138 }
139 139 fn is_exact(&self) -> bool {
140 140 false
141 141 }
142 142 }
143 143
144 144 /// Matches the input files exactly. They are interpreted as paths, not
145 145 /// patterns.
146 146 ///
147 147 ///```
148 148 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::HgPath };
149 149 ///
150 150 /// let files = [HgPath::new(b"a.txt"), HgPath::new(br"re:.*\.c$")];
151 151 /// let matcher = FileMatcher::new(&files).unwrap();
152 152 ///
153 153 /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
154 154 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
155 155 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
156 156 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
157 157 /// ```
158 158 #[derive(Debug)]
159 159 pub struct FileMatcher<'a> {
160 160 files: HashSet<&'a HgPath>,
161 161 dirs: DirsMultiset,
162 162 }
163 163
164 164 impl<'a> FileMatcher<'a> {
165 165 pub fn new(
166 166 files: &'a [impl AsRef<HgPath>],
167 167 ) -> Result<Self, DirstateMapError> {
168 168 Ok(Self {
169 169 files: HashSet::from_iter(files.iter().map(|f| f.as_ref())),
170 170 dirs: DirsMultiset::from_manifest(files)?,
171 171 })
172 172 }
173 173 fn inner_matches(&self, filename: impl AsRef<HgPath>) -> bool {
174 174 self.files.contains(filename.as_ref())
175 175 }
176 176 }
177 177
178 178 impl<'a> Matcher for FileMatcher<'a> {
179 179 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
180 180 Some(&self.files)
181 181 }
182 182 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool {
183 183 self.inner_matches(filename)
184 184 }
185 185 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
186 186 self.inner_matches(filename)
187 187 }
188 188 fn visit_children_set(
189 189 &self,
190 190 directory: impl AsRef<HgPath>,
191 191 ) -> VisitChildrenSet {
192 192 if self.files.is_empty() || !self.dirs.contains(&directory) {
193 193 return VisitChildrenSet::Empty;
194 194 }
195 195 let dirs_as_set = self.dirs.iter().map(|k| k.deref()).collect();
196 196
197 197 let mut candidates: HashSet<&HgPath> =
198 198 self.files.union(&dirs_as_set).map(|k| *k).collect();
199 199 candidates.remove(HgPath::new(b""));
200 200
201 201 if !directory.as_ref().is_empty() {
202 202 let directory = [directory.as_ref().as_bytes(), b"/"].concat();
203 203 candidates = candidates
204 204 .iter()
205 205 .filter_map(|c| {
206 206 if c.as_bytes().starts_with(&directory) {
207 207 Some(HgPath::new(&c.as_bytes()[directory.len()..]))
208 208 } else {
209 209 None
210 210 }
211 211 })
212 212 .collect();
213 213 }
214 214
215 215 // `self.dirs` includes all of the directories, recursively, so if
216 216 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
217 217 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
218 218 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
219 219 // subdir will be in there without a slash.
220 220 VisitChildrenSet::Set(
221 221 candidates
222 222 .iter()
223 223 .filter_map(|c| {
224 224 if c.bytes().all(|b| *b != b'/') {
225 225 Some(*c)
226 226 } else {
227 227 None
228 228 }
229 229 })
230 230 .collect(),
231 231 )
232 232 }
233 233 fn matches_everything(&self) -> bool {
234 234 false
235 235 }
236 236 fn is_exact(&self) -> bool {
237 237 true
238 238 }
239 239 }
240 240
241 241 /// Matches files that are included in the ignore rules.
242 242 #[cfg_attr(
243 243 feature = "with-re2",
244 244 doc = r##"
245 245 ```
246 246 use hg::{
247 247 matchers::{IncludeMatcher, Matcher},
248 248 IgnorePattern,
249 249 PatternSyntax,
250 250 utils::hg_path::HgPath
251 251 };
252 252 use std::path::Path;
253 253 ///
254 254 let ignore_patterns =
255 255 vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
256 256 let (matcher, _) = IncludeMatcher::new(ignore_patterns, "").unwrap();
257 257 ///
258 258 assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
259 259 assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
260 260 assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
261 261 assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
262 262 ```
263 263 "##
264 264 )]
265 265 pub struct IncludeMatcher<'a> {
266 266 patterns: Vec<u8>,
267 267 match_fn: Box<dyn for<'r> Fn(&'r HgPath) -> bool + 'a + Sync>,
268 268 /// Whether all the patterns match a prefix (i.e. recursively)
269 269 prefix: bool,
270 270 roots: HashSet<HgPathBuf>,
271 271 dirs: HashSet<HgPathBuf>,
272 272 parents: HashSet<HgPathBuf>,
273 273 }
274 274
275 275 impl<'a> Matcher for IncludeMatcher<'a> {
276 276 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
277 277 None
278 278 }
279 279
280 280 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
281 281 false
282 282 }
283 283
284 284 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
285 285 (self.match_fn)(filename.as_ref())
286 286 }
287 287
288 288 fn visit_children_set(
289 289 &self,
290 290 directory: impl AsRef<HgPath>,
291 291 ) -> VisitChildrenSet {
292 292 let dir = directory.as_ref();
293 293 if self.prefix && self.roots.contains(dir) {
294 294 return VisitChildrenSet::Recursive;
295 295 }
296 296 if self.roots.contains(HgPath::new(b""))
297 297 || self.roots.contains(dir)
298 298 || self.dirs.contains(dir)
299 299 || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
300 300 {
301 301 return VisitChildrenSet::This;
302 302 }
303 303
304 304 if self.parents.contains(directory.as_ref()) {
305 305 let multiset = self.get_all_parents_children();
306 306 if let Some(children) = multiset.get(dir) {
307 307 return VisitChildrenSet::Set(children.to_owned());
308 308 }
309 309 }
310 310 VisitChildrenSet::Empty
311 311 }
312 312
313 313 fn matches_everything(&self) -> bool {
314 314 false
315 315 }
316 316
317 317 fn is_exact(&self) -> bool {
318 318 false
319 319 }
320 320 }
321 321
322 322 #[cfg(feature = "with-re2")]
323 323 /// Returns a function that matches an `HgPath` against the given regex
324 324 /// pattern.
325 325 ///
326 326 /// This can fail when the pattern is invalid or not supported by the
327 327 /// underlying engine `Re2`, for instance anything with back-references.
328 328 #[timed]
329 329 fn re_matcher(
330 330 pattern: &[u8],
331 331 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
332 332 let regex = Re2::new(pattern);
333 333 let regex = regex.map_err(|e| PatternError::UnsupportedSyntax(e))?;
334 334 Ok(move |path: &HgPath| regex.is_match(path.as_bytes()))
335 335 }
336 336
337 337 #[cfg(not(feature = "with-re2"))]
338 338 /// Returns a function that matches an `HgPath` against the given regex
339 339 /// pattern.
340 340 ///
341 341 /// This can fail when the pattern is invalid or not supported by the
342 342 /// underlying engine (the `regex` crate), for instance anything with
343 343 /// back-references.
344 344 #[timed]
345 345 fn re_matcher(
346 346 pattern: &[u8],
347 347 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
348 348 use std::io::Write;
349 349
350 350 let mut escaped_bytes = vec![];
351 351 for byte in pattern {
352 352 if *byte > 127 {
353 353 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
354 354 } else {
355 355 escaped_bytes.push(*byte);
356 356 }
357 357 }
358 358
359 359 // Avoid the cost of UTF8 checking
360 360 //
361 361 // # Safety
362 362 // This is safe because we escaped all non-ASCII bytes.
363 363 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
364 364 let re = regex::bytes::RegexBuilder::new(&pattern_string)
365 365 .unicode(false)
366 366 .build()
367 367 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
368 368
369 369 Ok(move |path: &HgPath| re.is_match(path.as_bytes()))
370 370 }
371 371
372 372 /// Returns the regex pattern and a function that matches an `HgPath` against
373 373 /// said regex formed by the given ignore patterns.
374 374 fn build_regex_match<'a>(
375 375 ignore_patterns: &'a [&'a IgnorePattern],
376 376 ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)> {
377 377 let mut regexps = vec![];
378 378 let mut exact_set = HashSet::new();
379 379
380 380 for pattern in ignore_patterns {
381 381 if let Some(re) = build_single_regex(pattern)? {
382 382 regexps.push(re);
383 383 } else {
384 384 let exact = normalize_path_bytes(&pattern.pattern);
385 385 exact_set.insert(HgPathBuf::from_bytes(&exact));
386 386 }
387 387 }
388 388
389 389 let full_regex = regexps.join(&b'|');
390 390
391 391 // An empty pattern would cause the regex engine to incorrectly match the
392 392 // (empty) root directory
393 393 let func = if !(regexps.is_empty()) {
394 394 let matcher = re_matcher(&full_regex)?;
395 395 let func = move |filename: &HgPath| {
396 396 exact_set.contains(filename) || matcher(filename)
397 397 };
398 398 Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
399 399 } else {
400 400 let func = move |filename: &HgPath| exact_set.contains(filename);
401 401 Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
402 402 };
403 403
404 404 Ok((full_regex, func))
405 405 }
406 406
407 407 /// Returns roots and directories corresponding to each pattern.
408 408 ///
409 409 /// This calculates the roots and directories exactly matching the patterns and
410 410 /// returns a tuple of (roots, dirs). It does not return other directories
411 411 /// which may also need to be considered, like the parent directories.
412 412 fn roots_and_dirs(
413 413 ignore_patterns: &[IgnorePattern],
414 414 ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
415 415 let mut roots = Vec::new();
416 416 let mut dirs = Vec::new();
417 417
418 418 for ignore_pattern in ignore_patterns {
419 419 let IgnorePattern {
420 420 syntax, pattern, ..
421 421 } = ignore_pattern;
422 422 match syntax {
423 423 PatternSyntax::RootGlob | PatternSyntax::Glob => {
424 424 let mut root = vec![];
425 425
426 426 for p in pattern.split(|c| *c == b'/') {
427 427 if p.iter().any(|c| match *c {
428 428 b'[' | b'{' | b'*' | b'?' => true,
429 429 _ => false,
430 430 }) {
431 431 break;
432 432 }
433 433 root.push(HgPathBuf::from_bytes(p));
434 434 }
435 435 let buf =
436 436 root.iter().fold(HgPathBuf::new(), |acc, r| acc.join(r));
437 437 roots.push(buf);
438 438 }
439 439 PatternSyntax::Path | PatternSyntax::RelPath => {
440 440 let pat = HgPath::new(if pattern == b"." {
441 441 &[] as &[u8]
442 442 } else {
443 443 pattern
444 444 });
445 445 roots.push(pat.to_owned());
446 446 }
447 447 PatternSyntax::RootFiles => {
448 448 let pat = if pattern == b"." {
449 449 &[] as &[u8]
450 450 } else {
451 451 pattern
452 452 };
453 453 dirs.push(HgPathBuf::from_bytes(pat));
454 454 }
455 455 _ => {
456 456 roots.push(HgPathBuf::new());
457 457 }
458 458 }
459 459 }
460 460 (roots, dirs)
461 461 }
462 462
463 463 /// Paths extracted from patterns
464 464 #[derive(Debug, PartialEq)]
465 465 struct RootsDirsAndParents {
466 466 /// Directories to match recursively
467 467 pub roots: HashSet<HgPathBuf>,
468 468 /// Directories to match non-recursively
469 469 pub dirs: HashSet<HgPathBuf>,
470 470 /// Implicitly required directories to go to items in either roots or dirs
471 471 pub parents: HashSet<HgPathBuf>,
472 472 }
473 473
474 474 /// Extract roots, dirs and parents from patterns.
475 475 fn roots_dirs_and_parents(
476 476 ignore_patterns: &[IgnorePattern],
477 477 ) -> PatternResult<RootsDirsAndParents> {
478 478 let (roots, dirs) = roots_and_dirs(ignore_patterns);
479 479
480 480 let mut parents = HashSet::new();
481 481
482 482 parents.extend(
483 483 DirsMultiset::from_manifest(&dirs)
484 484 .map_err(|e| match e {
485 485 DirstateMapError::InvalidPath(e) => e,
486 486 _ => unreachable!(),
487 487 })?
488 488 .iter()
489 489 .map(|k| k.to_owned()),
490 490 );
491 491 parents.extend(
492 492 DirsMultiset::from_manifest(&roots)
493 493 .map_err(|e| match e {
494 494 DirstateMapError::InvalidPath(e) => e,
495 495 _ => unreachable!(),
496 496 })?
497 497 .iter()
498 498 .map(|k| k.to_owned()),
499 499 );
500 500
501 501 Ok(RootsDirsAndParents {
502 502 roots: HashSet::from_iter(roots),
503 503 dirs: HashSet::from_iter(dirs),
504 504 parents,
505 505 })
506 506 }
507 507
508 508 /// Returns a function that checks whether a given file (in the general sense)
509 509 /// should be matched.
510 510 fn build_match<'a, 'b>(
511 511 ignore_patterns: &'a [IgnorePattern],
512 512 root_dir: impl AsRef<Path>,
513 513 ) -> PatternResult<(
514 514 Vec<u8>,
515 515 Box<dyn Fn(&HgPath) -> bool + 'b + Sync>,
516 516 Vec<PatternFileWarning>,
517 517 )> {
518 518 let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![];
519 519 // For debugging and printing
520 520 let mut patterns = vec![];
521 521 let mut all_warnings = vec![];
522 522
523 523 let (subincludes, ignore_patterns) =
524 524 filter_subincludes(ignore_patterns, root_dir)?;
525 525
526 526 if !subincludes.is_empty() {
527 527 // Build prefix-based matcher functions for subincludes
528 528 let mut submatchers = FastHashMap::default();
529 529 let mut prefixes = vec![];
530 530
531 531 for SubInclude { prefix, root, path } in subincludes.into_iter() {
532 532 let (match_fn, warnings) =
533 533 get_ignore_function(vec![path.to_path_buf()], root)?;
534 534 all_warnings.extend(warnings);
535 535 prefixes.push(prefix.to_owned());
536 536 submatchers.insert(prefix.to_owned(), match_fn);
537 537 }
538 538
539 539 let match_subinclude = move |filename: &HgPath| {
540 540 for prefix in prefixes.iter() {
541 541 if let Some(rel) = filename.relative_to(prefix) {
542 542 if (submatchers.get(prefix).unwrap())(rel) {
543 543 return true;
544 544 }
545 545 }
546 546 }
547 547 false
548 548 };
549 549
550 550 match_funcs.push(Box::new(match_subinclude));
551 551 }
552 552
553 553 if !ignore_patterns.is_empty() {
554 554 // Either do dumb matching if all patterns are rootfiles, or match
555 555 // with a regex.
556 556 if ignore_patterns
557 557 .iter()
558 558 .all(|k| k.syntax == PatternSyntax::RootFiles)
559 559 {
560 560 let dirs: HashSet<_> = ignore_patterns
561 561 .iter()
562 562 .map(|k| k.pattern.to_owned())
563 563 .collect();
564 564 let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
565 565
566 566 let match_func = move |path: &HgPath| -> bool {
567 567 let path = path.as_bytes();
568 568 let i = path.iter().rfind(|a| **a == b'/');
569 569 let dir = if let Some(i) = i {
570 570 &path[..*i as usize]
571 571 } else {
572 572 b"."
573 573 };
574 574 dirs.contains(dir.deref())
575 575 };
576 576 match_funcs.push(Box::new(match_func));
577 577
578 578 patterns.extend(b"rootfilesin: ");
579 579 dirs_vec.sort();
580 580 patterns.extend(dirs_vec.escaped_bytes());
581 581 } else {
582 582 let (new_re, match_func) = build_regex_match(&ignore_patterns)?;
583 583 patterns = new_re;
584 584 match_funcs.push(match_func)
585 585 }
586 586 }
587 587
588 588 Ok(if match_funcs.len() == 1 {
589 589 (patterns, match_funcs.remove(0), all_warnings)
590 590 } else {
591 591 (
592 592 patterns,
593 593 Box::new(move |f: &HgPath| -> bool {
594 594 match_funcs.iter().any(|match_func| match_func(f))
595 595 }),
596 596 all_warnings,
597 597 )
598 598 })
599 599 }
600 600
601 601 /// Parses all "ignore" files with their recursive includes and returns a
602 602 /// function that checks whether a given file (in the general sense) should be
603 603 /// ignored.
604 604 pub fn get_ignore_function<'a>(
605 605 all_pattern_files: Vec<PathBuf>,
606 606 root_dir: impl AsRef<Path>,
607 607 ) -> PatternResult<(
608 608 Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>,
609 609 Vec<PatternFileWarning>,
610 610 )> {
611 611 let mut all_patterns = vec![];
612 612 let mut all_warnings = vec![];
613 613
614 614 for pattern_file in all_pattern_files.into_iter() {
615 615 let (patterns, warnings) =
616 616 get_patterns_from_file(pattern_file, &root_dir)?;
617 617
618 618 all_patterns.extend(patterns.to_owned());
619 619 all_warnings.extend(warnings);
620 620 }
621 621 let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?;
622 622 all_warnings.extend(warnings);
623 623 Ok((
624 624 Box::new(move |path: &HgPath| matcher.matches(path)),
625 625 all_warnings,
626 626 ))
627 627 }
628 628
629 629 impl<'a> IncludeMatcher<'a> {
630 630 pub fn new(
631 631 ignore_patterns: Vec<IgnorePattern>,
632 632 root_dir: impl AsRef<Path>,
633 633 ) -> PatternResult<(Self, Vec<PatternFileWarning>)> {
634 634 let (patterns, match_fn, warnings) =
635 635 build_match(&ignore_patterns, root_dir)?;
636 636 let RootsDirsAndParents {
637 637 roots,
638 638 dirs,
639 639 parents,
640 640 } = roots_dirs_and_parents(&ignore_patterns)?;
641 641
642 642 let prefix = ignore_patterns.iter().any(|k| match k.syntax {
643 643 PatternSyntax::Path | PatternSyntax::RelPath => true,
644 644 _ => false,
645 645 });
646 646
647 647 Ok((
648 648 Self {
649 649 patterns,
650 650 match_fn,
651 651 prefix,
652 652 roots,
653 653 dirs,
654 654 parents,
655 655 },
656 656 warnings,
657 657 ))
658 658 }
659 659
660 660 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
661 661 // TODO cache
662 662 let thing = self
663 663 .dirs
664 664 .iter()
665 665 .chain(self.roots.iter())
666 666 .chain(self.parents.iter());
667 667 DirsChildrenMultiset::new(thing, Some(&self.parents))
668 668 }
669 669 }
670 670
671 671 impl<'a> Display for IncludeMatcher<'a> {
672 672 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
673 // XXX What about exact matches?
674 // I'm not sure it's worth it to clone the HashSet and keep it
675 // around just in case someone wants to display the matcher, plus
676 // it's going to be unreadable after a few entries, but we need to
677 // inform in this display that exact matches are being used and are
678 // (on purpose) missing from the `includes`.
673 679 write!(
674 680 f,
675 681 "IncludeMatcher(includes='{}')",
676 682 String::from_utf8_lossy(&self.patterns.escaped_bytes())
677 683 )
678 684 }
679 685 }
680 686
681 687 #[cfg(test)]
682 688 mod tests {
683 689 use super::*;
684 690 use pretty_assertions::assert_eq;
685 691 use std::path::Path;
686 692
687 693 #[test]
688 694 fn test_roots_and_dirs() {
689 695 let pats = vec![
690 696 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
691 697 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
692 698 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
693 699 ];
694 700 let (roots, dirs) = roots_and_dirs(&pats);
695 701
696 702 assert_eq!(
697 703 roots,
698 704 vec!(
699 705 HgPathBuf::from_bytes(b"g/h"),
700 706 HgPathBuf::from_bytes(b"g/h"),
701 707 HgPathBuf::new()
702 708 ),
703 709 );
704 710 assert_eq!(dirs, vec!());
705 711 }
706 712
707 713 #[test]
708 714 fn test_roots_dirs_and_parents() {
709 715 let pats = vec![
710 716 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
711 717 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
712 718 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
713 719 ];
714 720
715 721 let mut roots = HashSet::new();
716 722 roots.insert(HgPathBuf::from_bytes(b"g/h"));
717 723 roots.insert(HgPathBuf::new());
718 724
719 725 let dirs = HashSet::new();
720 726
721 727 let mut parents = HashSet::new();
722 728 parents.insert(HgPathBuf::new());
723 729 parents.insert(HgPathBuf::from_bytes(b"g"));
724 730
725 731 assert_eq!(
726 732 roots_dirs_and_parents(&pats).unwrap(),
727 733 RootsDirsAndParents {
728 734 roots,
729 735 dirs,
730 736 parents
731 737 }
732 738 );
733 739 }
734 740
735 741 #[test]
736 742 fn test_filematcher_visit_children_set() {
737 743 // Visitchildrenset
738 744 let files = vec![HgPath::new(b"dir/subdir/foo.txt")];
739 745 let matcher = FileMatcher::new(&files).unwrap();
740 746
741 747 let mut set = HashSet::new();
742 748 set.insert(HgPath::new(b"dir"));
743 749 assert_eq!(
744 750 matcher.visit_children_set(HgPath::new(b"")),
745 751 VisitChildrenSet::Set(set)
746 752 );
747 753
748 754 let mut set = HashSet::new();
749 755 set.insert(HgPath::new(b"subdir"));
750 756 assert_eq!(
751 757 matcher.visit_children_set(HgPath::new(b"dir")),
752 758 VisitChildrenSet::Set(set)
753 759 );
754 760
755 761 let mut set = HashSet::new();
756 762 set.insert(HgPath::new(b"foo.txt"));
757 763 assert_eq!(
758 764 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
759 765 VisitChildrenSet::Set(set)
760 766 );
761 767
762 768 assert_eq!(
763 769 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
764 770 VisitChildrenSet::Empty
765 771 );
766 772 assert_eq!(
767 773 matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
768 774 VisitChildrenSet::Empty
769 775 );
770 776 assert_eq!(
771 777 matcher.visit_children_set(HgPath::new(b"folder")),
772 778 VisitChildrenSet::Empty
773 779 );
774 780 }
775 781
776 782 #[test]
777 783 fn test_filematcher_visit_children_set_files_and_dirs() {
778 784 let files = vec![
779 785 HgPath::new(b"rootfile.txt"),
780 786 HgPath::new(b"a/file1.txt"),
781 787 HgPath::new(b"a/b/file2.txt"),
782 788 // No file in a/b/c
783 789 HgPath::new(b"a/b/c/d/file4.txt"),
784 790 ];
785 791 let matcher = FileMatcher::new(&files).unwrap();
786 792
787 793 let mut set = HashSet::new();
788 794 set.insert(HgPath::new(b"a"));
789 795 set.insert(HgPath::new(b"rootfile.txt"));
790 796 assert_eq!(
791 797 matcher.visit_children_set(HgPath::new(b"")),
792 798 VisitChildrenSet::Set(set)
793 799 );
794 800
795 801 let mut set = HashSet::new();
796 802 set.insert(HgPath::new(b"b"));
797 803 set.insert(HgPath::new(b"file1.txt"));
798 804 assert_eq!(
799 805 matcher.visit_children_set(HgPath::new(b"a")),
800 806 VisitChildrenSet::Set(set)
801 807 );
802 808
803 809 let mut set = HashSet::new();
804 810 set.insert(HgPath::new(b"c"));
805 811 set.insert(HgPath::new(b"file2.txt"));
806 812 assert_eq!(
807 813 matcher.visit_children_set(HgPath::new(b"a/b")),
808 814 VisitChildrenSet::Set(set)
809 815 );
810 816
811 817 let mut set = HashSet::new();
812 818 set.insert(HgPath::new(b"d"));
813 819 assert_eq!(
814 820 matcher.visit_children_set(HgPath::new(b"a/b/c")),
815 821 VisitChildrenSet::Set(set)
816 822 );
817 823 let mut set = HashSet::new();
818 824 set.insert(HgPath::new(b"file4.txt"));
819 825 assert_eq!(
820 826 matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
821 827 VisitChildrenSet::Set(set)
822 828 );
823 829
824 830 assert_eq!(
825 831 matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
826 832 VisitChildrenSet::Empty
827 833 );
828 834 assert_eq!(
829 835 matcher.visit_children_set(HgPath::new(b"folder")),
830 836 VisitChildrenSet::Empty
831 837 );
832 838 }
833 839
834 840 #[cfg(feature = "with-re2")]
835 841 #[test]
836 842 fn test_includematcher() {
837 843 // VisitchildrensetPrefix
838 844 let (matcher, _) = IncludeMatcher::new(
839 845 vec![IgnorePattern::new(
840 846 PatternSyntax::RelPath,
841 847 b"dir/subdir",
842 848 Path::new(""),
843 849 )],
844 850 "",
845 851 )
846 852 .unwrap();
847 853
848 854 let mut set = HashSet::new();
849 855 set.insert(HgPath::new(b"dir"));
850 856 assert_eq!(
851 857 matcher.visit_children_set(HgPath::new(b"")),
852 858 VisitChildrenSet::Set(set)
853 859 );
854 860
855 861 let mut set = HashSet::new();
856 862 set.insert(HgPath::new(b"subdir"));
857 863 assert_eq!(
858 864 matcher.visit_children_set(HgPath::new(b"dir")),
859 865 VisitChildrenSet::Set(set)
860 866 );
861 867 assert_eq!(
862 868 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
863 869 VisitChildrenSet::Recursive
864 870 );
865 871 // OPT: This should probably be 'all' if its parent is?
866 872 assert_eq!(
867 873 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
868 874 VisitChildrenSet::This
869 875 );
870 876 assert_eq!(
871 877 matcher.visit_children_set(HgPath::new(b"folder")),
872 878 VisitChildrenSet::Empty
873 879 );
874 880
875 881 // VisitchildrensetRootfilesin
876 882 let (matcher, _) = IncludeMatcher::new(
877 883 vec![IgnorePattern::new(
878 884 PatternSyntax::RootFiles,
879 885 b"dir/subdir",
880 886 Path::new(""),
881 887 )],
882 888 "",
883 889 )
884 890 .unwrap();
885 891
886 892 let mut set = HashSet::new();
887 893 set.insert(HgPath::new(b"dir"));
888 894 assert_eq!(
889 895 matcher.visit_children_set(HgPath::new(b"")),
890 896 VisitChildrenSet::Set(set)
891 897 );
892 898
893 899 let mut set = HashSet::new();
894 900 set.insert(HgPath::new(b"subdir"));
895 901 assert_eq!(
896 902 matcher.visit_children_set(HgPath::new(b"dir")),
897 903 VisitChildrenSet::Set(set)
898 904 );
899 905
900 906 assert_eq!(
901 907 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
902 908 VisitChildrenSet::This
903 909 );
904 910 assert_eq!(
905 911 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
906 912 VisitChildrenSet::Empty
907 913 );
908 914 assert_eq!(
909 915 matcher.visit_children_set(HgPath::new(b"folder")),
910 916 VisitChildrenSet::Empty
911 917 );
912 918
913 919 // VisitchildrensetGlob
914 920 let (matcher, _) = IncludeMatcher::new(
915 921 vec![IgnorePattern::new(
916 922 PatternSyntax::Glob,
917 923 b"dir/z*",
918 924 Path::new(""),
919 925 )],
920 926 "",
921 927 )
922 928 .unwrap();
923 929
924 930 let mut set = HashSet::new();
925 931 set.insert(HgPath::new(b"dir"));
926 932 assert_eq!(
927 933 matcher.visit_children_set(HgPath::new(b"")),
928 934 VisitChildrenSet::Set(set)
929 935 );
930 936 assert_eq!(
931 937 matcher.visit_children_set(HgPath::new(b"folder")),
932 938 VisitChildrenSet::Empty
933 939 );
934 940 assert_eq!(
935 941 matcher.visit_children_set(HgPath::new(b"dir")),
936 942 VisitChildrenSet::This
937 943 );
938 944 // OPT: these should probably be set().
939 945 assert_eq!(
940 946 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
941 947 VisitChildrenSet::This
942 948 );
943 949 assert_eq!(
944 950 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
945 951 VisitChildrenSet::This
946 952 );
947 953 }
948 954 }
General Comments 0
You need to be logged in to leave comments. Login now