##// END OF EJS Templates
rust-matchers: add timing tracing to regex compilation...
Raphaël Gomès -
r45288:83c97c0b default
parent child Browse files
Show More
@@ -1,926 +1,930 b''
1 1 // matchers.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Structs and types for matching files and directories.
9 9
10 10 #[cfg(feature = "with-re2")]
11 11 use crate::re2::Re2;
12 12 use crate::{
13 13 dirstate::dirs_multiset::DirsChildrenMultiset,
14 14 filepatterns::{
15 15 build_single_regex, filter_subincludes, get_patterns_from_file,
16 16 PatternFileWarning, PatternResult, SubInclude,
17 17 },
18 18 utils::{
19 19 files::find_dirs,
20 20 hg_path::{HgPath, HgPathBuf},
21 21 Escaped,
22 22 },
23 23 DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
24 24 PatternSyntax,
25 25 };
26 26
27 27 use std::borrow::ToOwned;
28 28 use std::collections::HashSet;
29 29 use std::fmt::{Display, Error, Formatter};
30 30 use std::iter::FromIterator;
31 31 use std::ops::Deref;
32 32 use std::path::{Path, PathBuf};
33 33
34 use micro_timer::timed;
35
34 36 #[derive(Debug, PartialEq)]
35 37 pub enum VisitChildrenSet<'a> {
36 38 /// Don't visit anything
37 39 Empty,
38 40 /// Only visit this directory
39 41 This,
40 42 /// Visit this directory and these subdirectories
41 43 /// TODO Should we implement a `NonEmptyHashSet`?
42 44 Set(HashSet<&'a HgPath>),
43 45 /// Visit this directory and all subdirectories
44 46 Recursive,
45 47 }
46 48
47 49 pub trait Matcher {
48 50 /// Explicitly listed files
49 51 fn file_set(&self) -> Option<&HashSet<&HgPath>>;
50 52 /// Returns whether `filename` is in `file_set`
51 53 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool;
52 54 /// Returns whether `filename` is matched by this matcher
53 55 fn matches(&self, filename: impl AsRef<HgPath>) -> bool;
54 56 /// Decides whether a directory should be visited based on whether it
55 57 /// has potential matches in it or one of its subdirectories, and
56 58 /// potentially lists which subdirectories of that directory should be
57 59 /// visited. This is based on the match's primary, included, and excluded
58 60 /// patterns.
59 61 ///
60 62 /// # Example
61 63 ///
62 64 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
63 65 /// return the following values (assuming the implementation of
64 66 /// visit_children_set is capable of recognizing this; some implementations
65 67 /// are not).
66 68 ///
67 69 /// ```text
68 70 /// ```ignore
69 71 /// '' -> {'foo', 'qux'}
70 72 /// 'baz' -> set()
71 73 /// 'foo' -> {'bar'}
72 74 /// // Ideally this would be `Recursive`, but since the prefix nature of
73 75 /// // matchers is applied to the entire matcher, we have to downgrade this
74 76 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
75 77 /// // `RootFilesIn'-kind matcher being mixed in.
76 78 /// 'foo/bar' -> 'this'
77 79 /// 'qux' -> 'this'
78 80 /// ```
79 81 /// # Important
80 82 ///
81 83 /// Most matchers do not know if they're representing files or
82 84 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
83 85 /// file or a directory, so `visit_children_set('dir')` for most matchers
84 86 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
85 87 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
86 88 /// it may return `VisitChildrenSet::This`.
87 89 /// Do not rely on the return being a `HashSet` indicating that there are
88 90 /// no files in this dir to investigate (or equivalently that if there are
89 91 /// files to investigate in 'dir' that it will always return
90 92 /// `VisitChildrenSet::This`).
91 93 fn visit_children_set(
92 94 &self,
93 95 directory: impl AsRef<HgPath>,
94 96 ) -> VisitChildrenSet;
95 97 /// Matcher will match everything and `files_set()` will be empty:
96 98 /// optimization might be possible.
97 99 fn matches_everything(&self) -> bool;
98 100 /// Matcher will match exactly the files in `files_set()`: optimization
99 101 /// might be possible.
100 102 fn is_exact(&self) -> bool;
101 103 }
102 104
103 105 /// Matches everything.
104 106 ///```
105 107 /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
106 108 ///
107 109 /// let matcher = AlwaysMatcher;
108 110 ///
109 111 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
110 112 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
111 113 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
112 114 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
113 115 /// ```
114 116 #[derive(Debug)]
115 117 pub struct AlwaysMatcher;
116 118
117 119 impl Matcher for AlwaysMatcher {
118 120 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
119 121 None
120 122 }
121 123 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
122 124 false
123 125 }
124 126 fn matches(&self, _filename: impl AsRef<HgPath>) -> bool {
125 127 true
126 128 }
127 129 fn visit_children_set(
128 130 &self,
129 131 _directory: impl AsRef<HgPath>,
130 132 ) -> VisitChildrenSet {
131 133 VisitChildrenSet::Recursive
132 134 }
133 135 fn matches_everything(&self) -> bool {
134 136 true
135 137 }
136 138 fn is_exact(&self) -> bool {
137 139 false
138 140 }
139 141 }
140 142
141 143 /// Matches the input files exactly. They are interpreted as paths, not
142 144 /// patterns.
143 145 ///
144 146 ///```
145 147 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::HgPath };
146 148 ///
147 149 /// let files = [HgPath::new(b"a.txt"), HgPath::new(br"re:.*\.c$")];
148 150 /// let matcher = FileMatcher::new(&files).unwrap();
149 151 ///
150 152 /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
151 153 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
152 154 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
153 155 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
154 156 /// ```
155 157 #[derive(Debug)]
156 158 pub struct FileMatcher<'a> {
157 159 files: HashSet<&'a HgPath>,
158 160 dirs: DirsMultiset,
159 161 }
160 162
161 163 impl<'a> FileMatcher<'a> {
162 164 pub fn new(
163 165 files: &'a [impl AsRef<HgPath>],
164 166 ) -> Result<Self, DirstateMapError> {
165 167 Ok(Self {
166 168 files: HashSet::from_iter(files.iter().map(|f| f.as_ref())),
167 169 dirs: DirsMultiset::from_manifest(files)?,
168 170 })
169 171 }
170 172 fn inner_matches(&self, filename: impl AsRef<HgPath>) -> bool {
171 173 self.files.contains(filename.as_ref())
172 174 }
173 175 }
174 176
175 177 impl<'a> Matcher for FileMatcher<'a> {
176 178 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
177 179 Some(&self.files)
178 180 }
179 181 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool {
180 182 self.inner_matches(filename)
181 183 }
182 184 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
183 185 self.inner_matches(filename)
184 186 }
185 187 fn visit_children_set(
186 188 &self,
187 189 directory: impl AsRef<HgPath>,
188 190 ) -> VisitChildrenSet {
189 191 if self.files.is_empty() || !self.dirs.contains(&directory) {
190 192 return VisitChildrenSet::Empty;
191 193 }
192 194 let dirs_as_set = self.dirs.iter().map(|k| k.deref()).collect();
193 195
194 196 let mut candidates: HashSet<&HgPath> =
195 197 self.files.union(&dirs_as_set).map(|k| *k).collect();
196 198 candidates.remove(HgPath::new(b""));
197 199
198 200 if !directory.as_ref().is_empty() {
199 201 let directory = [directory.as_ref().as_bytes(), b"/"].concat();
200 202 candidates = candidates
201 203 .iter()
202 204 .filter_map(|c| {
203 205 if c.as_bytes().starts_with(&directory) {
204 206 Some(HgPath::new(&c.as_bytes()[directory.len()..]))
205 207 } else {
206 208 None
207 209 }
208 210 })
209 211 .collect();
210 212 }
211 213
212 214 // `self.dirs` includes all of the directories, recursively, so if
213 215 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
214 216 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
215 217 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
216 218 // subdir will be in there without a slash.
217 219 VisitChildrenSet::Set(
218 220 candidates
219 221 .iter()
220 222 .filter_map(|c| {
221 223 if c.bytes().all(|b| *b != b'/') {
222 224 Some(*c)
223 225 } else {
224 226 None
225 227 }
226 228 })
227 229 .collect(),
228 230 )
229 231 }
230 232 fn matches_everything(&self) -> bool {
231 233 false
232 234 }
233 235 fn is_exact(&self) -> bool {
234 236 true
235 237 }
236 238 }
237 239
238 240 /// Matches files that are included in the ignore rules.
239 241 #[cfg_attr(
240 242 feature = "with-re2",
241 243 doc = r##"
242 244 ```
243 245 use hg::{
244 246 matchers::{IncludeMatcher, Matcher},
245 247 IgnorePattern,
246 248 PatternSyntax,
247 249 utils::hg_path::HgPath
248 250 };
249 251 use std::path::Path;
250 252 ///
251 253 let ignore_patterns =
252 254 vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
253 255 let (matcher, _) = IncludeMatcher::new(ignore_patterns, "").unwrap();
254 256 ///
255 257 assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
256 258 assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
257 259 assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
258 260 assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
259 261 ```
260 262 "##
261 263 )]
262 264 pub struct IncludeMatcher<'a> {
263 265 patterns: Vec<u8>,
264 266 match_fn: Box<dyn for<'r> Fn(&'r HgPath) -> bool + 'a + Sync>,
265 267 /// Whether all the patterns match a prefix (i.e. recursively)
266 268 prefix: bool,
267 269 roots: HashSet<HgPathBuf>,
268 270 dirs: HashSet<HgPathBuf>,
269 271 parents: HashSet<HgPathBuf>,
270 272 }
271 273
272 274 impl<'a> Matcher for IncludeMatcher<'a> {
273 275 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
274 276 None
275 277 }
276 278
277 279 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
278 280 false
279 281 }
280 282
281 283 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
282 284 (self.match_fn)(filename.as_ref())
283 285 }
284 286
285 287 fn visit_children_set(
286 288 &self,
287 289 directory: impl AsRef<HgPath>,
288 290 ) -> VisitChildrenSet {
289 291 let dir = directory.as_ref();
290 292 if self.prefix && self.roots.contains(dir) {
291 293 return VisitChildrenSet::Recursive;
292 294 }
293 295 if self.roots.contains(HgPath::new(b""))
294 296 || self.roots.contains(dir)
295 297 || self.dirs.contains(dir)
296 298 || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
297 299 {
298 300 return VisitChildrenSet::This;
299 301 }
300 302
301 303 if self.parents.contains(directory.as_ref()) {
302 304 let multiset = self.get_all_parents_children();
303 305 if let Some(children) = multiset.get(dir) {
304 306 return VisitChildrenSet::Set(children.to_owned());
305 307 }
306 308 }
307 309 VisitChildrenSet::Empty
308 310 }
309 311
310 312 fn matches_everything(&self) -> bool {
311 313 false
312 314 }
313 315
314 316 fn is_exact(&self) -> bool {
315 317 false
316 318 }
317 319 }
318 320
319 321 #[cfg(feature = "with-re2")]
320 322 /// Returns a function that matches an `HgPath` against the given regex
321 323 /// pattern.
322 324 ///
323 325 /// This can fail when the pattern is invalid or not supported by the
324 326 /// underlying engine `Re2`, for instance anything with back-references.
327 #[timed]
325 328 fn re_matcher(
326 329 pattern: &[u8],
327 330 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
328 331 let regex = Re2::new(pattern);
329 332 let regex = regex.map_err(|e| PatternError::UnsupportedSyntax(e))?;
330 333 Ok(move |path: &HgPath| regex.is_match(path.as_bytes()))
331 334 }
332 335
333 336 #[cfg(not(feature = "with-re2"))]
334 337 /// Returns a function that matches an `HgPath` against the given regex
335 338 /// pattern.
336 339 ///
337 340 /// This can fail when the pattern is invalid or not supported by the
338 341 /// underlying engine (the `regex` crate), for instance anything with
339 342 /// back-references.
343 #[timed]
340 344 fn re_matcher(
341 345 pattern: &[u8],
342 346 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
343 347 use std::io::Write;
344 348
345 349 let mut escaped_bytes = vec![];
346 350 for byte in pattern {
347 351 if *byte > 127 {
348 352 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
349 353 } else {
350 354 escaped_bytes.push(*byte);
351 355 }
352 356 }
353 357
354 358 // Avoid the cost of UTF8 checking
355 359 //
356 360 // # Safety
357 361 // This is safe because we escaped all non-ASCII bytes.
358 362 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
359 363 let re = regex::bytes::RegexBuilder::new(&pattern_string)
360 364 .unicode(false)
361 365 .build()
362 366 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
363 367
364 368 Ok(move |path: &HgPath| re.is_match(path.as_bytes()))
365 369 }
366 370
367 371 /// Returns the regex pattern and a function that matches an `HgPath` against
368 372 /// said regex formed by the given ignore patterns.
369 373 fn build_regex_match<'a>(
370 374 ignore_patterns: &'a [&'a IgnorePattern],
371 375 ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)> {
372 376 let regexps: Result<Vec<_>, PatternError> = ignore_patterns
373 377 .into_iter()
374 378 .map(|k| build_single_regex(*k))
375 379 .collect();
376 380 let regexps = regexps?;
377 381 let full_regex = regexps.join(&b'|');
378 382
379 383 let matcher = re_matcher(&full_regex)?;
380 384 let func = Box::new(move |filename: &HgPath| matcher(filename));
381 385
382 386 Ok((full_regex, func))
383 387 }
384 388
385 389 /// Returns roots and directories corresponding to each pattern.
386 390 ///
387 391 /// This calculates the roots and directories exactly matching the patterns and
388 392 /// returns a tuple of (roots, dirs). It does not return other directories
389 393 /// which may also need to be considered, like the parent directories.
390 394 fn roots_and_dirs(
391 395 ignore_patterns: &[IgnorePattern],
392 396 ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
393 397 let mut roots = Vec::new();
394 398 let mut dirs = Vec::new();
395 399
396 400 for ignore_pattern in ignore_patterns {
397 401 let IgnorePattern {
398 402 syntax, pattern, ..
399 403 } = ignore_pattern;
400 404 match syntax {
401 405 PatternSyntax::RootGlob | PatternSyntax::Glob => {
402 406 let mut root = vec![];
403 407
404 408 for p in pattern.split(|c| *c == b'/') {
405 409 if p.iter().any(|c| match *c {
406 410 b'[' | b'{' | b'*' | b'?' => true,
407 411 _ => false,
408 412 }) {
409 413 break;
410 414 }
411 415 root.push(HgPathBuf::from_bytes(p));
412 416 }
413 417 let buf =
414 418 root.iter().fold(HgPathBuf::new(), |acc, r| acc.join(r));
415 419 roots.push(buf);
416 420 }
417 421 PatternSyntax::Path | PatternSyntax::RelPath => {
418 422 let pat = HgPath::new(if pattern == b"." {
419 423 &[] as &[u8]
420 424 } else {
421 425 pattern
422 426 });
423 427 roots.push(pat.to_owned());
424 428 }
425 429 PatternSyntax::RootFiles => {
426 430 let pat = if pattern == b"." {
427 431 &[] as &[u8]
428 432 } else {
429 433 pattern
430 434 };
431 435 dirs.push(HgPathBuf::from_bytes(pat));
432 436 }
433 437 _ => {
434 438 roots.push(HgPathBuf::new());
435 439 }
436 440 }
437 441 }
438 442 (roots, dirs)
439 443 }
440 444
441 445 /// Paths extracted from patterns
442 446 #[derive(Debug, PartialEq)]
443 447 struct RootsDirsAndParents {
444 448 /// Directories to match recursively
445 449 pub roots: HashSet<HgPathBuf>,
446 450 /// Directories to match non-recursively
447 451 pub dirs: HashSet<HgPathBuf>,
448 452 /// Implicitly required directories to go to items in either roots or dirs
449 453 pub parents: HashSet<HgPathBuf>,
450 454 }
451 455
452 456 /// Extract roots, dirs and parents from patterns.
453 457 fn roots_dirs_and_parents(
454 458 ignore_patterns: &[IgnorePattern],
455 459 ) -> PatternResult<RootsDirsAndParents> {
456 460 let (roots, dirs) = roots_and_dirs(ignore_patterns);
457 461
458 462 let mut parents = HashSet::new();
459 463
460 464 parents.extend(
461 465 DirsMultiset::from_manifest(&dirs)
462 466 .map_err(|e| match e {
463 467 DirstateMapError::InvalidPath(e) => e,
464 468 _ => unreachable!(),
465 469 })?
466 470 .iter()
467 471 .map(|k| k.to_owned()),
468 472 );
469 473 parents.extend(
470 474 DirsMultiset::from_manifest(&roots)
471 475 .map_err(|e| match e {
472 476 DirstateMapError::InvalidPath(e) => e,
473 477 _ => unreachable!(),
474 478 })?
475 479 .iter()
476 480 .map(|k| k.to_owned()),
477 481 );
478 482
479 483 Ok(RootsDirsAndParents {
480 484 roots: HashSet::from_iter(roots),
481 485 dirs: HashSet::from_iter(dirs),
482 486 parents,
483 487 })
484 488 }
485 489
486 490 /// Returns a function that checks whether a given file (in the general sense)
487 491 /// should be matched.
488 492 fn build_match<'a, 'b>(
489 493 ignore_patterns: &'a [IgnorePattern],
490 494 root_dir: impl AsRef<Path>,
491 495 ) -> PatternResult<(
492 496 Vec<u8>,
493 497 Box<dyn Fn(&HgPath) -> bool + 'b + Sync>,
494 498 Vec<PatternFileWarning>,
495 499 )> {
496 500 let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![];
497 501 // For debugging and printing
498 502 let mut patterns = vec![];
499 503 let mut all_warnings = vec![];
500 504
501 505 let (subincludes, ignore_patterns) =
502 506 filter_subincludes(ignore_patterns, root_dir)?;
503 507
504 508 if !subincludes.is_empty() {
505 509 // Build prefix-based matcher functions for subincludes
506 510 let mut submatchers = FastHashMap::default();
507 511 let mut prefixes = vec![];
508 512
509 513 for SubInclude { prefix, root, path } in subincludes.into_iter() {
510 514 let (match_fn, warnings) =
511 515 get_ignore_function(vec![path.to_path_buf()], root)?;
512 516 all_warnings.extend(warnings);
513 517 prefixes.push(prefix.to_owned());
514 518 submatchers.insert(prefix.to_owned(), match_fn);
515 519 }
516 520
517 521 let match_subinclude = move |filename: &HgPath| {
518 522 for prefix in prefixes.iter() {
519 523 if let Some(rel) = filename.relative_to(prefix) {
520 524 if (submatchers.get(prefix).unwrap())(rel) {
521 525 return true;
522 526 }
523 527 }
524 528 }
525 529 false
526 530 };
527 531
528 532 match_funcs.push(Box::new(match_subinclude));
529 533 }
530 534
531 535 if !ignore_patterns.is_empty() {
532 536 // Either do dumb matching if all patterns are rootfiles, or match
533 537 // with a regex.
534 538 if ignore_patterns
535 539 .iter()
536 540 .all(|k| k.syntax == PatternSyntax::RootFiles)
537 541 {
538 542 let dirs: HashSet<_> = ignore_patterns
539 543 .iter()
540 544 .map(|k| k.pattern.to_owned())
541 545 .collect();
542 546 let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
543 547
544 548 let match_func = move |path: &HgPath| -> bool {
545 549 let path = path.as_bytes();
546 550 let i = path.iter().rfind(|a| **a == b'/');
547 551 let dir = if let Some(i) = i {
548 552 &path[..*i as usize]
549 553 } else {
550 554 b"."
551 555 };
552 556 dirs.contains(dir.deref())
553 557 };
554 558 match_funcs.push(Box::new(match_func));
555 559
556 560 patterns.extend(b"rootfilesin: ");
557 561 dirs_vec.sort();
558 562 patterns.extend(dirs_vec.escaped_bytes());
559 563 } else {
560 564 let (new_re, match_func) = build_regex_match(&ignore_patterns)?;
561 565 patterns = new_re;
562 566 match_funcs.push(match_func)
563 567 }
564 568 }
565 569
566 570 Ok(if match_funcs.len() == 1 {
567 571 (patterns, match_funcs.remove(0), all_warnings)
568 572 } else {
569 573 (
570 574 patterns,
571 575 Box::new(move |f: &HgPath| -> bool {
572 576 match_funcs.iter().any(|match_func| match_func(f))
573 577 }),
574 578 all_warnings,
575 579 )
576 580 })
577 581 }
578 582
579 583 /// Parses all "ignore" files with their recursive includes and returns a
580 584 /// function that checks whether a given file (in the general sense) should be
581 585 /// ignored.
582 586 pub fn get_ignore_function<'a>(
583 587 all_pattern_files: Vec<PathBuf>,
584 588 root_dir: impl AsRef<Path>,
585 589 ) -> PatternResult<(
586 590 Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>,
587 591 Vec<PatternFileWarning>,
588 592 )> {
589 593 let mut all_patterns = vec![];
590 594 let mut all_warnings = vec![];
591 595
592 596 for pattern_file in all_pattern_files.into_iter() {
593 597 let (patterns, warnings) =
594 598 get_patterns_from_file(pattern_file, &root_dir)?;
595 599
596 600 all_patterns.extend(patterns.to_owned());
597 601 all_warnings.extend(warnings);
598 602 }
599 603 let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?;
600 604 all_warnings.extend(warnings);
601 605 Ok((
602 606 Box::new(move |path: &HgPath| matcher.matches(path)),
603 607 all_warnings,
604 608 ))
605 609 }
606 610
607 611 impl<'a> IncludeMatcher<'a> {
608 612 pub fn new(
609 613 ignore_patterns: Vec<IgnorePattern>,
610 614 root_dir: impl AsRef<Path>,
611 615 ) -> PatternResult<(Self, Vec<PatternFileWarning>)> {
612 616 let (patterns, match_fn, warnings) =
613 617 build_match(&ignore_patterns, root_dir)?;
614 618 let RootsDirsAndParents {
615 619 roots,
616 620 dirs,
617 621 parents,
618 622 } = roots_dirs_and_parents(&ignore_patterns)?;
619 623
620 624 let prefix = ignore_patterns.iter().any(|k| match k.syntax {
621 625 PatternSyntax::Path | PatternSyntax::RelPath => true,
622 626 _ => false,
623 627 });
624 628
625 629 Ok((
626 630 Self {
627 631 patterns,
628 632 match_fn,
629 633 prefix,
630 634 roots,
631 635 dirs,
632 636 parents,
633 637 },
634 638 warnings,
635 639 ))
636 640 }
637 641
638 642 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
639 643 // TODO cache
640 644 let thing = self
641 645 .dirs
642 646 .iter()
643 647 .chain(self.roots.iter())
644 648 .chain(self.parents.iter());
645 649 DirsChildrenMultiset::new(thing, Some(&self.parents))
646 650 }
647 651 }
648 652
649 653 impl<'a> Display for IncludeMatcher<'a> {
650 654 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
651 655 write!(
652 656 f,
653 657 "IncludeMatcher(includes='{}')",
654 658 String::from_utf8_lossy(&self.patterns.escaped_bytes())
655 659 )
656 660 }
657 661 }
658 662
659 663 #[cfg(test)]
660 664 mod tests {
661 665 use super::*;
662 666 use pretty_assertions::assert_eq;
663 667 use std::path::Path;
664 668
665 669 #[test]
666 670 fn test_roots_and_dirs() {
667 671 let pats = vec![
668 672 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
669 673 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
670 674 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
671 675 ];
672 676 let (roots, dirs) = roots_and_dirs(&pats);
673 677
674 678 assert_eq!(
675 679 roots,
676 680 vec!(
677 681 HgPathBuf::from_bytes(b"g/h"),
678 682 HgPathBuf::from_bytes(b"g/h"),
679 683 HgPathBuf::new()
680 684 ),
681 685 );
682 686 assert_eq!(dirs, vec!());
683 687 }
684 688
685 689 #[test]
686 690 fn test_roots_dirs_and_parents() {
687 691 let pats = vec![
688 692 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
689 693 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
690 694 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
691 695 ];
692 696
693 697 let mut roots = HashSet::new();
694 698 roots.insert(HgPathBuf::from_bytes(b"g/h"));
695 699 roots.insert(HgPathBuf::new());
696 700
697 701 let dirs = HashSet::new();
698 702
699 703 let mut parents = HashSet::new();
700 704 parents.insert(HgPathBuf::new());
701 705 parents.insert(HgPathBuf::from_bytes(b"g"));
702 706
703 707 assert_eq!(
704 708 roots_dirs_and_parents(&pats).unwrap(),
705 709 RootsDirsAndParents {
706 710 roots,
707 711 dirs,
708 712 parents
709 713 }
710 714 );
711 715 }
712 716
713 717 #[test]
714 718 fn test_filematcher_visit_children_set() {
715 719 // Visitchildrenset
716 720 let files = vec![HgPath::new(b"dir/subdir/foo.txt")];
717 721 let matcher = FileMatcher::new(&files).unwrap();
718 722
719 723 let mut set = HashSet::new();
720 724 set.insert(HgPath::new(b"dir"));
721 725 assert_eq!(
722 726 matcher.visit_children_set(HgPath::new(b"")),
723 727 VisitChildrenSet::Set(set)
724 728 );
725 729
726 730 let mut set = HashSet::new();
727 731 set.insert(HgPath::new(b"subdir"));
728 732 assert_eq!(
729 733 matcher.visit_children_set(HgPath::new(b"dir")),
730 734 VisitChildrenSet::Set(set)
731 735 );
732 736
733 737 let mut set = HashSet::new();
734 738 set.insert(HgPath::new(b"foo.txt"));
735 739 assert_eq!(
736 740 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
737 741 VisitChildrenSet::Set(set)
738 742 );
739 743
740 744 assert_eq!(
741 745 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
742 746 VisitChildrenSet::Empty
743 747 );
744 748 assert_eq!(
745 749 matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
746 750 VisitChildrenSet::Empty
747 751 );
748 752 assert_eq!(
749 753 matcher.visit_children_set(HgPath::new(b"folder")),
750 754 VisitChildrenSet::Empty
751 755 );
752 756 }
753 757
754 758 #[test]
755 759 fn test_filematcher_visit_children_set_files_and_dirs() {
756 760 let files = vec![
757 761 HgPath::new(b"rootfile.txt"),
758 762 HgPath::new(b"a/file1.txt"),
759 763 HgPath::new(b"a/b/file2.txt"),
760 764 // No file in a/b/c
761 765 HgPath::new(b"a/b/c/d/file4.txt"),
762 766 ];
763 767 let matcher = FileMatcher::new(&files).unwrap();
764 768
765 769 let mut set = HashSet::new();
766 770 set.insert(HgPath::new(b"a"));
767 771 set.insert(HgPath::new(b"rootfile.txt"));
768 772 assert_eq!(
769 773 matcher.visit_children_set(HgPath::new(b"")),
770 774 VisitChildrenSet::Set(set)
771 775 );
772 776
773 777 let mut set = HashSet::new();
774 778 set.insert(HgPath::new(b"b"));
775 779 set.insert(HgPath::new(b"file1.txt"));
776 780 assert_eq!(
777 781 matcher.visit_children_set(HgPath::new(b"a")),
778 782 VisitChildrenSet::Set(set)
779 783 );
780 784
781 785 let mut set = HashSet::new();
782 786 set.insert(HgPath::new(b"c"));
783 787 set.insert(HgPath::new(b"file2.txt"));
784 788 assert_eq!(
785 789 matcher.visit_children_set(HgPath::new(b"a/b")),
786 790 VisitChildrenSet::Set(set)
787 791 );
788 792
789 793 let mut set = HashSet::new();
790 794 set.insert(HgPath::new(b"d"));
791 795 assert_eq!(
792 796 matcher.visit_children_set(HgPath::new(b"a/b/c")),
793 797 VisitChildrenSet::Set(set)
794 798 );
795 799 let mut set = HashSet::new();
796 800 set.insert(HgPath::new(b"file4.txt"));
797 801 assert_eq!(
798 802 matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
799 803 VisitChildrenSet::Set(set)
800 804 );
801 805
802 806 assert_eq!(
803 807 matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
804 808 VisitChildrenSet::Empty
805 809 );
806 810 assert_eq!(
807 811 matcher.visit_children_set(HgPath::new(b"folder")),
808 812 VisitChildrenSet::Empty
809 813 );
810 814 }
811 815
812 816 #[cfg(feature = "with-re2")]
813 817 #[test]
814 818 fn test_includematcher() {
815 819 // VisitchildrensetPrefix
816 820 let (matcher, _) = IncludeMatcher::new(
817 821 vec![IgnorePattern::new(
818 822 PatternSyntax::RelPath,
819 823 b"dir/subdir",
820 824 Path::new(""),
821 825 )],
822 826 "",
823 827 )
824 828 .unwrap();
825 829
826 830 let mut set = HashSet::new();
827 831 set.insert(HgPath::new(b"dir"));
828 832 assert_eq!(
829 833 matcher.visit_children_set(HgPath::new(b"")),
830 834 VisitChildrenSet::Set(set)
831 835 );
832 836
833 837 let mut set = HashSet::new();
834 838 set.insert(HgPath::new(b"subdir"));
835 839 assert_eq!(
836 840 matcher.visit_children_set(HgPath::new(b"dir")),
837 841 VisitChildrenSet::Set(set)
838 842 );
839 843 assert_eq!(
840 844 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
841 845 VisitChildrenSet::Recursive
842 846 );
843 847 // OPT: This should probably be 'all' if its parent is?
844 848 assert_eq!(
845 849 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
846 850 VisitChildrenSet::This
847 851 );
848 852 assert_eq!(
849 853 matcher.visit_children_set(HgPath::new(b"folder")),
850 854 VisitChildrenSet::Empty
851 855 );
852 856
853 857 // VisitchildrensetRootfilesin
854 858 let (matcher, _) = IncludeMatcher::new(
855 859 vec![IgnorePattern::new(
856 860 PatternSyntax::RootFiles,
857 861 b"dir/subdir",
858 862 Path::new(""),
859 863 )],
860 864 "",
861 865 )
862 866 .unwrap();
863 867
864 868 let mut set = HashSet::new();
865 869 set.insert(HgPath::new(b"dir"));
866 870 assert_eq!(
867 871 matcher.visit_children_set(HgPath::new(b"")),
868 872 VisitChildrenSet::Set(set)
869 873 );
870 874
871 875 let mut set = HashSet::new();
872 876 set.insert(HgPath::new(b"subdir"));
873 877 assert_eq!(
874 878 matcher.visit_children_set(HgPath::new(b"dir")),
875 879 VisitChildrenSet::Set(set)
876 880 );
877 881
878 882 assert_eq!(
879 883 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
880 884 VisitChildrenSet::This
881 885 );
882 886 assert_eq!(
883 887 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
884 888 VisitChildrenSet::Empty
885 889 );
886 890 assert_eq!(
887 891 matcher.visit_children_set(HgPath::new(b"folder")),
888 892 VisitChildrenSet::Empty
889 893 );
890 894
891 895 // VisitchildrensetGlob
892 896 let (matcher, _) = IncludeMatcher::new(
893 897 vec![IgnorePattern::new(
894 898 PatternSyntax::Glob,
895 899 b"dir/z*",
896 900 Path::new(""),
897 901 )],
898 902 "",
899 903 )
900 904 .unwrap();
901 905
902 906 let mut set = HashSet::new();
903 907 set.insert(HgPath::new(b"dir"));
904 908 assert_eq!(
905 909 matcher.visit_children_set(HgPath::new(b"")),
906 910 VisitChildrenSet::Set(set)
907 911 );
908 912 assert_eq!(
909 913 matcher.visit_children_set(HgPath::new(b"folder")),
910 914 VisitChildrenSet::Empty
911 915 );
912 916 assert_eq!(
913 917 matcher.visit_children_set(HgPath::new(b"dir")),
914 918 VisitChildrenSet::This
915 919 );
916 920 // OPT: these should probably be set().
917 921 assert_eq!(
918 922 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
919 923 VisitChildrenSet::This
920 924 );
921 925 assert_eq!(
922 926 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
923 927 VisitChildrenSet::This
924 928 );
925 929 }
926 930 }
General Comments 0
You need to be logged in to leave comments. Login now