##// END OF EJS Templates
hgignore: add a GlobSuffix type, instead of passing byte arrays...
Arseniy Alekseyev -
r53249:2ff004fb default
parent child Browse files
Show More
@@ -1,913 +1,940
1 1 // filepatterns.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Handling of Mercurial-specific patterns.
9 9
10 10 use crate::{
11 11 utils::{
12 12 files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
13 13 hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
14 14 SliceExt,
15 15 },
16 16 FastHashMap,
17 17 };
18 18 use lazy_static::lazy_static;
19 19 use regex::bytes::{NoExpand, Regex};
20 20 use std::path::{Path, PathBuf};
21 21 use std::vec::Vec;
22 22 use std::{fmt, ops::Deref};
23 23
24 24 #[derive(Debug, derive_more::From)]
25 25 pub enum PatternError {
26 26 #[from]
27 27 Path(HgPathError),
28 28 UnsupportedSyntax(String),
29 29 UnsupportedSyntaxInFile(String, String, usize),
30 30 TooLong(usize),
31 31 #[from]
32 32 IO(std::io::Error),
33 33 /// Needed a pattern that can be turned into a regex but got one that
34 34 /// can't. This should only happen through programmer error.
35 35 NonRegexPattern(IgnorePattern),
36 36 }
37 37
38 38 impl fmt::Display for PatternError {
39 39 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
40 40 match self {
41 41 PatternError::UnsupportedSyntax(syntax) => {
42 42 write!(f, "Unsupported syntax {}", syntax)
43 43 }
44 44 PatternError::UnsupportedSyntaxInFile(syntax, file_path, line) => {
45 45 write!(
46 46 f,
47 47 "{}:{}: unsupported syntax {}",
48 48 file_path, line, syntax
49 49 )
50 50 }
51 51 PatternError::TooLong(size) => {
52 52 write!(f, "matcher pattern is too long ({} bytes)", size)
53 53 }
54 54 PatternError::IO(error) => error.fmt(f),
55 55 PatternError::Path(error) => error.fmt(f),
56 56 PatternError::NonRegexPattern(pattern) => {
57 57 write!(f, "'{:?}' cannot be turned into a regex", pattern)
58 58 }
59 59 }
60 60 }
61 61 }
62 62
63 63 lazy_static! {
64 64 static ref RE_ESCAPE: Vec<Vec<u8>> = {
65 65 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
66 66 let to_escape = b"()[]{}?*+-|^$\\.&~#\t\n\r\x0b\x0c";
67 67 for byte in to_escape {
68 68 v[*byte as usize].insert(0, b'\\');
69 69 }
70 70 v
71 71 };
72 72 }
73 73
74 74 /// These are matched in order
75 75 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
76 76 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
77 77
78 78 #[derive(Debug, Clone, PartialEq, Eq)]
79 79 pub enum PatternSyntax {
80 80 /// A regular expression
81 81 Regexp,
82 82 /// Glob that matches at the front of the path
83 83 RootGlob,
84 84 /// Glob that matches at any suffix of the path (still anchored at
85 85 /// slashes)
86 86 Glob,
87 87 /// a path relative to repository root, which is matched recursively
88 88 Path,
89 89 /// a single exact path relative to repository root
90 90 FilePath,
91 91 /// A path relative to cwd
92 92 RelPath,
93 93 /// an unrooted glob (*.rs matches Rust files in all dirs)
94 94 RelGlob,
95 95 /// A regexp that needn't match the start of a name
96 96 RelRegexp,
97 97 /// A path relative to repository root, which is matched non-recursively
98 98 /// (will not match subdirectories)
99 99 RootFilesIn,
100 100 /// A file of patterns to read and include
101 101 Include,
102 102 /// A file of patterns to match against files under the same directory
103 103 SubInclude,
104 104 /// SubInclude with the result of parsing the included file
105 105 ///
106 106 /// Note: there is no ExpandedInclude because that expansion can be done
107 107 /// in place by replacing the Include pattern by the included patterns.
108 108 /// SubInclude requires more handling.
109 109 ///
110 110 /// Note: `Box` is used to minimize size impact on other enum variants
111 111 ExpandedSubInclude(Box<SubInclude>),
112 112 }
113 113
114 114 /// Transforms a glob pattern into a regex
115 115 pub fn glob_to_re(pat: &[u8]) -> Vec<u8> {
116 116 let mut input = pat;
117 117 let mut res: Vec<u8> = vec![];
118 118 let mut group_depth = 0;
119 119
120 120 while let Some((c, rest)) = input.split_first() {
121 121 input = rest;
122 122
123 123 match c {
124 124 b'*' => {
125 125 for (source, repl) in GLOB_REPLACEMENTS {
126 126 if let Some(rest) = input.drop_prefix(source) {
127 127 input = rest;
128 128 res.extend(*repl);
129 129 break;
130 130 }
131 131 }
132 132 }
133 133 b'?' => res.extend(b"."),
134 134 b'[' => {
135 135 match input.iter().skip(1).position(|b| *b == b']') {
136 136 None => res.extend(b"\\["),
137 137 Some(end) => {
138 138 // Account for the one we skipped
139 139 let end = end + 1;
140 140
141 141 res.extend(b"[");
142 142
143 143 for (i, b) in input[..end].iter().enumerate() {
144 144 if *b == b'!' && i == 0 {
145 145 res.extend(b"^")
146 146 } else if *b == b'^' && i == 0 {
147 147 res.extend(b"\\^")
148 148 } else if *b == b'\\' {
149 149 res.extend(b"\\\\")
150 150 } else {
151 151 res.push(*b)
152 152 }
153 153 }
154 154 res.extend(b"]");
155 155 input = &input[end + 1..];
156 156 }
157 157 }
158 158 }
159 159 b'{' => {
160 160 group_depth += 1;
161 161 res.extend(b"(?:")
162 162 }
163 163 b'}' if group_depth > 0 => {
164 164 group_depth -= 1;
165 165 res.extend(b")");
166 166 }
167 167 b',' if group_depth > 0 => res.extend(b"|"),
168 168 b'\\' => {
169 169 let c = {
170 170 if let Some((c, rest)) = input.split_first() {
171 171 input = rest;
172 172 c
173 173 } else {
174 174 c
175 175 }
176 176 };
177 177 res.extend(&RE_ESCAPE[*c as usize])
178 178 }
179 179 _ => res.extend(&RE_ESCAPE[*c as usize]),
180 180 }
181 181 }
182 182 res
183 183 }
184 184
185 185 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
186 186 pattern
187 187 .iter()
188 188 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
189 189 .collect()
190 190 }
191 191
192 192 pub fn parse_pattern_syntax_kind(
193 193 kind: &[u8],
194 194 ) -> Result<PatternSyntax, PatternError> {
195 195 match kind {
196 196 b"re" => Ok(PatternSyntax::Regexp),
197 197 b"path" => Ok(PatternSyntax::Path),
198 198 b"filepath" => Ok(PatternSyntax::FilePath),
199 199 b"relpath" => Ok(PatternSyntax::RelPath),
200 200 b"rootfilesin" => Ok(PatternSyntax::RootFilesIn),
201 201 b"relglob" => Ok(PatternSyntax::RelGlob),
202 202 b"relre" => Ok(PatternSyntax::RelRegexp),
203 203 b"glob" => Ok(PatternSyntax::Glob),
204 204 b"rootglob" => Ok(PatternSyntax::RootGlob),
205 205 b"include" => Ok(PatternSyntax::Include),
206 206 b"subinclude" => Ok(PatternSyntax::SubInclude),
207 207 _ => Err(PatternError::UnsupportedSyntax(
208 208 String::from_utf8_lossy(kind).to_string(),
209 209 )),
210 210 }
211 211 }
212 212
213 213 lazy_static! {
214 214 static ref FLAG_RE: Regex = Regex::new(r"^\(\?[aiLmsux]+\)").unwrap();
215 215 }
216 216
217 /// Extra path components to match at the end of the pattern
218 #[derive(Clone, Copy)]
219 pub enum GlobSuffix {
220 /// `Empty` means the pattern only matches files, not directories,
221 /// so the path needs to match exactly.
222 Empty,
223 /// `MoreComponents` means the pattern matches directories as well,
224 /// so any path that has the pattern as a prefix, should match.
225 MoreComponents,
226 }
227
228 impl GlobSuffix {
229 fn to_re(self) -> &'static [u8] {
230 match self {
231 Self::Empty => b"$",
232 Self::MoreComponents => b"(?:/|$)",
233 }
234 }
235 }
236
217 237 /// Builds the regex that corresponds to the given pattern.
218 238 /// If within a `syntax: regexp` context, returns the pattern,
219 239 /// otherwise, returns the corresponding regex.
220 fn _build_single_regex(entry: &IgnorePattern, glob_suffix: &[u8]) -> Vec<u8> {
240 fn _build_single_regex(
241 entry: &IgnorePattern,
242 glob_suffix: GlobSuffix,
243 ) -> Vec<u8> {
221 244 let IgnorePattern {
222 245 syntax, pattern, ..
223 246 } = entry;
224 247 if pattern.is_empty() {
225 248 return vec![];
226 249 }
227 250 match syntax {
228 251 PatternSyntax::Regexp => pattern.to_owned(),
229 252 PatternSyntax::RelRegexp => {
230 253 // The `regex` crate accepts `**` while `re2` and Python's `re`
231 254 // do not. Checking for `*` correctly triggers the same error all
232 255 // engines.
233 256 if pattern[0] == b'^'
234 257 || pattern[0] == b'*'
235 258 || pattern.starts_with(b".*")
236 259 {
237 260 return pattern.to_owned();
238 261 }
239 262 match FLAG_RE.find(pattern) {
240 263 Some(mat) => {
241 264 let s = mat.start();
242 265 let e = mat.end();
243 266 [
244 267 &b"(?"[..],
245 268 &pattern[s + 2..e - 1],
246 269 &b":"[..],
247 270 if pattern[e] == b'^'
248 271 || pattern[e] == b'*'
249 272 || pattern[e..].starts_with(b".*")
250 273 {
251 274 &b""[..]
252 275 } else {
253 276 &b".*"[..]
254 277 },
255 278 &pattern[e..],
256 279 &b")"[..],
257 280 ]
258 281 .concat()
259 282 }
260 283 None => [&b".*"[..], pattern].concat(),
261 284 }
262 285 }
263 286 PatternSyntax::Path | PatternSyntax::RelPath => {
264 287 if pattern == b"." {
265 288 return vec![];
266 289 }
267 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
290 [
291 escape_pattern(pattern).as_slice(),
292 GlobSuffix::MoreComponents.to_re(),
293 ]
294 .concat()
268 295 }
269 296 PatternSyntax::RootFilesIn => {
270 297 let mut res = if pattern == b"." {
271 298 vec![]
272 299 } else {
273 300 // Pattern is a directory name.
274 301 [escape_pattern(pattern).as_slice(), b"/"].concat()
275 302 };
276 303
277 304 // Anything after the pattern must be a non-directory.
278 305 res.extend(b"[^/]+$");
279 306 res
280 307 }
281 308 PatternSyntax::RelGlob => {
282 309 let glob_re = glob_to_re(pattern);
283 310 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
284 [b".*", rest, glob_suffix].concat()
311 [b".*", rest, glob_suffix.to_re()].concat()
285 312 } else {
286 [b"(?:.*/)?", glob_re.as_slice(), glob_suffix].concat()
313 [b"(?:.*/)?", glob_re.as_slice(), glob_suffix.to_re()].concat()
287 314 }
288 315 }
289 316 PatternSyntax::Glob | PatternSyntax::RootGlob => {
290 [glob_to_re(pattern).as_slice(), glob_suffix].concat()
317 [glob_to_re(pattern).as_slice(), glob_suffix.to_re()].concat()
291 318 }
292 319 PatternSyntax::Include
293 320 | PatternSyntax::SubInclude
294 321 | PatternSyntax::ExpandedSubInclude(_)
295 322 | PatternSyntax::FilePath => unreachable!(),
296 323 }
297 324 }
298 325
299 326 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
300 327 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
301 328
302 329 /// TODO support other platforms
303 330 #[cfg(unix)]
304 331 pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
305 332 if bytes.is_empty() {
306 333 return b".".to_vec();
307 334 }
308 335 let sep = b'/';
309 336
310 337 let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
311 338 if initial_slashes > 2 {
312 339 // POSIX allows one or two initial slashes, but treats three or more
313 340 // as single slash.
314 341 initial_slashes = 1;
315 342 }
316 343 let components = bytes
317 344 .split(|b| *b == sep)
318 345 .filter(|c| !(c.is_empty() || c == b"."))
319 346 .fold(vec![], |mut acc, component| {
320 347 if component != b".."
321 348 || (initial_slashes == 0 && acc.is_empty())
322 349 || (!acc.is_empty() && acc[acc.len() - 1] == b"..")
323 350 {
324 351 acc.push(component)
325 352 } else if !acc.is_empty() {
326 353 acc.pop();
327 354 }
328 355 acc
329 356 });
330 357 let mut new_bytes = components.join(&sep);
331 358
332 359 if initial_slashes > 0 {
333 360 let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
334 361 buf.extend(new_bytes);
335 362 new_bytes = buf;
336 363 }
337 364 if new_bytes.is_empty() {
338 365 b".".to_vec()
339 366 } else {
340 367 new_bytes
341 368 }
342 369 }
343 370
344 371 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
345 372 /// that don't need to be transformed into a regex.
346 373 pub fn build_single_regex(
347 374 entry: &IgnorePattern,
348 glob_suffix: &[u8],
375 glob_suffix: GlobSuffix,
349 376 ) -> Result<Option<Vec<u8>>, PatternError> {
350 377 let IgnorePattern {
351 378 pattern, syntax, ..
352 379 } = entry;
353 380 let pattern = match syntax {
354 381 PatternSyntax::RootGlob
355 382 | PatternSyntax::Path
356 383 | PatternSyntax::RelGlob
357 384 | PatternSyntax::RelPath
358 385 | PatternSyntax::RootFilesIn => normalize_path_bytes(pattern),
359 386 PatternSyntax::Include | PatternSyntax::SubInclude => {
360 387 return Err(PatternError::NonRegexPattern(entry.clone()))
361 388 }
362 389 _ => pattern.to_owned(),
363 390 };
364 391 let is_simple_rootglob = *syntax == PatternSyntax::RootGlob
365 392 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b));
366 393 if is_simple_rootglob || syntax == &PatternSyntax::FilePath {
367 394 Ok(None)
368 395 } else {
369 396 let mut entry = entry.clone();
370 397 entry.pattern = pattern;
371 398 Ok(Some(_build_single_regex(&entry, glob_suffix)))
372 399 }
373 400 }
374 401
375 402 lazy_static! {
376 403 static ref SYNTAXES: FastHashMap<&'static [u8], PatternSyntax> = {
377 404 let mut m = FastHashMap::default();
378 405
379 406 m.insert(b"re:".as_ref(), PatternSyntax::Regexp);
380 407 m.insert(b"regexp:".as_ref(), PatternSyntax::Regexp);
381 408 m.insert(b"path:".as_ref(), PatternSyntax::Path);
382 409 m.insert(b"filepath:".as_ref(), PatternSyntax::FilePath);
383 410 m.insert(b"relpath:".as_ref(), PatternSyntax::RelPath);
384 411 m.insert(b"rootfilesin:".as_ref(), PatternSyntax::RootFilesIn);
385 412 m.insert(b"relglob:".as_ref(), PatternSyntax::RelGlob);
386 413 m.insert(b"relre:".as_ref(), PatternSyntax::RelRegexp);
387 414 m.insert(b"glob:".as_ref(), PatternSyntax::Glob);
388 415 m.insert(b"rootglob:".as_ref(), PatternSyntax::RootGlob);
389 416 m.insert(b"include:".as_ref(), PatternSyntax::Include);
390 417 m.insert(b"subinclude:".as_ref(), PatternSyntax::SubInclude);
391 418
392 419 m
393 420 };
394 421 }
395 422
396 423 #[derive(Debug)]
397 424 pub enum PatternFileWarning {
398 425 /// (file path, syntax bytes)
399 426 InvalidSyntax(PathBuf, Vec<u8>),
400 427 /// File path
401 428 NoSuchFile(PathBuf),
402 429 }
403 430
404 431 pub fn parse_one_pattern(
405 432 pattern: &[u8],
406 433 source: &Path,
407 434 default: PatternSyntax,
408 435 normalize: bool,
409 436 ) -> IgnorePattern {
410 437 let mut pattern_bytes: &[u8] = pattern;
411 438 let mut syntax = default;
412 439
413 440 for (s, val) in SYNTAXES.iter() {
414 441 if let Some(rest) = pattern_bytes.drop_prefix(s) {
415 442 syntax = val.clone();
416 443 pattern_bytes = rest;
417 444 break;
418 445 }
419 446 }
420 447
421 448 let pattern = match syntax {
422 449 PatternSyntax::RootGlob
423 450 | PatternSyntax::Path
424 451 | PatternSyntax::Glob
425 452 | PatternSyntax::RelGlob
426 453 | PatternSyntax::RelPath
427 454 | PatternSyntax::RootFilesIn
428 455 if normalize =>
429 456 {
430 457 normalize_path_bytes(pattern_bytes)
431 458 }
432 459 _ => pattern_bytes.to_vec(),
433 460 };
434 461
435 462 IgnorePattern {
436 463 syntax,
437 464 pattern,
438 465 source: source.to_owned(),
439 466 }
440 467 }
441 468
442 469 pub fn parse_pattern_file_contents(
443 470 lines: &[u8],
444 471 file_path: &Path,
445 472 default_syntax_override: Option<PatternSyntax>,
446 473 warn: bool,
447 474 relativize: bool,
448 475 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
449 476 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
450 477
451 478 #[allow(clippy::trivial_regex)]
452 479 let comment_escape_regex = Regex::new(r"\\#").unwrap();
453 480 let mut inputs: Vec<IgnorePattern> = vec![];
454 481 let mut warnings: Vec<PatternFileWarning> = vec![];
455 482
456 483 let mut current_syntax =
457 484 default_syntax_override.unwrap_or(PatternSyntax::RelRegexp);
458 485
459 486 for mut line in lines.split(|c| *c == b'\n') {
460 487 let line_buf;
461 488 if line.contains(&b'#') {
462 489 if let Some(cap) = comment_regex.captures(line) {
463 490 line = &line[..cap.get(1).unwrap().end()]
464 491 }
465 492 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
466 493 line = &line_buf;
467 494 }
468 495
469 496 let line = line.trim_end();
470 497
471 498 if line.is_empty() {
472 499 continue;
473 500 }
474 501
475 502 if let Some(syntax) = line.drop_prefix(b"syntax:") {
476 503 let syntax = syntax.trim();
477 504
478 505 if let Some(parsed) =
479 506 SYNTAXES.get([syntax, &b":"[..]].concat().as_slice())
480 507 {
481 508 current_syntax = parsed.clone();
482 509 } else if warn {
483 510 warnings.push(PatternFileWarning::InvalidSyntax(
484 511 file_path.to_owned(),
485 512 syntax.to_owned(),
486 513 ));
487 514 }
488 515 } else {
489 516 let pattern = parse_one_pattern(
490 517 line,
491 518 file_path,
492 519 current_syntax.clone(),
493 520 false,
494 521 );
495 522 inputs.push(if relativize {
496 523 pattern.to_relative()
497 524 } else {
498 525 pattern
499 526 })
500 527 }
501 528 }
502 529 Ok((inputs, warnings))
503 530 }
504 531
505 532 pub fn parse_pattern_args(
506 533 patterns: Vec<Vec<u8>>,
507 534 cwd: &Path,
508 535 root: &Path,
509 536 ) -> Result<Vec<IgnorePattern>, HgPathError> {
510 537 let mut ignore_patterns: Vec<IgnorePattern> = Vec::new();
511 538 for pattern in patterns {
512 539 let pattern = parse_one_pattern(
513 540 &pattern,
514 541 Path::new("<args>"),
515 542 PatternSyntax::RelPath,
516 543 true,
517 544 );
518 545 match pattern.syntax {
519 546 PatternSyntax::RelGlob | PatternSyntax::RelPath => {
520 547 let name = get_path_from_bytes(&pattern.pattern);
521 548 let canon = canonical_path(root, cwd, name)?;
522 549 ignore_patterns.push(IgnorePattern {
523 550 syntax: pattern.syntax,
524 551 pattern: get_bytes_from_path(canon),
525 552 source: pattern.source,
526 553 })
527 554 }
528 555 _ => ignore_patterns.push(pattern.to_owned()),
529 556 };
530 557 }
531 558 Ok(ignore_patterns)
532 559 }
533 560
534 561 pub fn read_pattern_file(
535 562 file_path: &Path,
536 563 warn: bool,
537 564 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
538 565 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
539 566 match std::fs::read(file_path) {
540 567 Ok(contents) => {
541 568 inspect_pattern_bytes(file_path, &contents);
542 569 parse_pattern_file_contents(&contents, file_path, None, warn, true)
543 570 }
544 571 Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok((
545 572 vec![],
546 573 vec![PatternFileWarning::NoSuchFile(file_path.to_owned())],
547 574 )),
548 575 Err(e) => Err(e.into()),
549 576 }
550 577 }
551 578
552 579 /// Represents an entry in an "ignore" file.
553 580 #[derive(Debug, Eq, PartialEq, Clone)]
554 581 pub struct IgnorePattern {
555 582 pub syntax: PatternSyntax,
556 583 pub pattern: Vec<u8>,
557 584 pub source: PathBuf,
558 585 }
559 586
560 587 impl IgnorePattern {
561 588 pub fn new(syntax: PatternSyntax, pattern: &[u8], source: &Path) -> Self {
562 589 Self {
563 590 syntax,
564 591 pattern: pattern.to_owned(),
565 592 source: source.to_owned(),
566 593 }
567 594 }
568 595
569 596 pub fn to_relative(self) -> Self {
570 597 let Self {
571 598 syntax,
572 599 pattern,
573 600 source,
574 601 } = self;
575 602 Self {
576 603 syntax: match syntax {
577 604 PatternSyntax::Regexp => PatternSyntax::RelRegexp,
578 605 PatternSyntax::Glob => PatternSyntax::RelGlob,
579 606 x => x,
580 607 },
581 608 pattern,
582 609 source,
583 610 }
584 611 }
585 612 }
586 613
587 614 pub type PatternResult<T> = Result<T, PatternError>;
588 615
589 616 /// Wrapper for `read_pattern_file` that also recursively expands `include:`
590 617 /// and `subinclude:` patterns.
591 618 ///
592 619 /// The former are expanded in place, while `PatternSyntax::ExpandedSubInclude`
593 620 /// is used for the latter to form a tree of patterns.
594 621 pub fn get_patterns_from_file(
595 622 pattern_file: &Path,
596 623 root_dir: &Path,
597 624 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
598 625 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
599 626 let (patterns, mut warnings) =
600 627 read_pattern_file(pattern_file, true, inspect_pattern_bytes)?;
601 628 let patterns = patterns
602 629 .into_iter()
603 630 .flat_map(|entry| -> PatternResult<_> {
604 631 Ok(match &entry.syntax {
605 632 PatternSyntax::Include => {
606 633 let inner_include =
607 634 root_dir.join(get_path_from_bytes(&entry.pattern));
608 635 let (inner_pats, inner_warnings) = get_patterns_from_file(
609 636 &inner_include,
610 637 root_dir,
611 638 inspect_pattern_bytes,
612 639 )?;
613 640 warnings.extend(inner_warnings);
614 641 inner_pats
615 642 }
616 643 PatternSyntax::SubInclude => {
617 644 let mut sub_include = SubInclude::new(
618 645 root_dir,
619 646 &entry.pattern,
620 647 &entry.source,
621 648 )?;
622 649 let (inner_patterns, inner_warnings) =
623 650 get_patterns_from_file(
624 651 &sub_include.path,
625 652 &sub_include.root,
626 653 inspect_pattern_bytes,
627 654 )?;
628 655 sub_include.included_patterns = inner_patterns;
629 656 warnings.extend(inner_warnings);
630 657 vec![IgnorePattern {
631 658 syntax: PatternSyntax::ExpandedSubInclude(Box::new(
632 659 sub_include,
633 660 )),
634 661 ..entry
635 662 }]
636 663 }
637 664 _ => vec![entry],
638 665 })
639 666 })
640 667 .flatten()
641 668 .collect();
642 669
643 670 Ok((patterns, warnings))
644 671 }
645 672
646 673 /// Holds all the information needed to handle a `subinclude:` pattern.
647 674 #[derive(Debug, PartialEq, Eq, Clone)]
648 675 pub struct SubInclude {
649 676 /// Will be used for repository (hg) paths that start with this prefix.
650 677 /// It is relative to the current working directory, so comparing against
651 678 /// repository paths is painless.
652 679 pub prefix: HgPathBuf,
653 680 /// The file itself, containing the patterns
654 681 pub path: PathBuf,
655 682 /// Folder in the filesystem where this it applies
656 683 pub root: PathBuf,
657 684
658 685 pub included_patterns: Vec<IgnorePattern>,
659 686 }
660 687
661 688 impl SubInclude {
662 689 pub fn new(
663 690 root_dir: &Path,
664 691 pattern: &[u8],
665 692 source: &Path,
666 693 ) -> Result<SubInclude, HgPathError> {
667 694 let normalized_source =
668 695 normalize_path_bytes(&get_bytes_from_path(source));
669 696
670 697 let source_root = get_path_from_bytes(&normalized_source);
671 698 let source_root = source_root.parent().unwrap_or(source_root);
672 699
673 700 let path = source_root.join(get_path_from_bytes(pattern));
674 701 let new_root = path.parent().unwrap_or_else(|| path.deref());
675 702
676 703 let prefix = canonical_path(root_dir, root_dir, new_root)?;
677 704
678 705 Ok(Self {
679 706 prefix: path_to_hg_path_buf(prefix).map(|mut p| {
680 707 if !p.is_empty() {
681 708 p.push_byte(b'/');
682 709 }
683 710 p
684 711 })?,
685 712 path: path.to_owned(),
686 713 root: new_root.to_owned(),
687 714 included_patterns: Vec::new(),
688 715 })
689 716 }
690 717 }
691 718
692 719 /// Separate and pre-process subincludes from other patterns for the "ignore"
693 720 /// phase.
694 721 pub fn filter_subincludes(
695 722 ignore_patterns: Vec<IgnorePattern>,
696 723 ) -> Result<(Vec<SubInclude>, Vec<IgnorePattern>), HgPathError> {
697 724 let mut subincludes = vec![];
698 725 let mut others = vec![];
699 726
700 727 for pattern in ignore_patterns {
701 728 if let PatternSyntax::ExpandedSubInclude(sub_include) = pattern.syntax
702 729 {
703 730 subincludes.push(*sub_include);
704 731 } else {
705 732 others.push(pattern)
706 733 }
707 734 }
708 735 Ok((subincludes, others))
709 736 }
710 737
711 738 #[cfg(test)]
712 739 mod tests {
713 740 use super::*;
714 741 use pretty_assertions::assert_eq;
715 742
716 743 #[test]
717 744 fn escape_pattern_test() {
718 745 let untouched =
719 746 br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
720 747 assert_eq!(escape_pattern(untouched), untouched.to_vec());
721 748 // All escape codes
722 749 assert_eq!(
723 750 escape_pattern(br"()[]{}?*+-|^$\\.&~#\t\n\r\v\f"),
724 751 br"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\\t\\n\\r\\v\\f".to_vec()
725 752 );
726 753 }
727 754
728 755 #[test]
729 756 fn glob_test() {
730 757 assert_eq!(glob_to_re(br"?"), br".");
731 758 assert_eq!(glob_to_re(br"*"), br"[^/]*");
732 759 assert_eq!(glob_to_re(br"**"), br".*");
733 760 assert_eq!(glob_to_re(br"**/a"), br"(?:.*/)?a");
734 761 assert_eq!(glob_to_re(br"a/**/b"), br"a/(?:.*/)?b");
735 762 assert_eq!(glob_to_re(br"[a*?!^][^b][!c]"), br"[a*?!^][\^b][^c]");
736 763 assert_eq!(glob_to_re(br"{a,b}"), br"(?:a|b)");
737 764 assert_eq!(glob_to_re(br".\*\?"), br"\.\*\?");
738 765 }
739 766
740 767 #[test]
741 768 fn test_parse_pattern_file_contents() {
742 769 let lines = b"syntax: glob\n*.elc";
743 770
744 771 assert_eq!(
745 772 parse_pattern_file_contents(
746 773 lines,
747 774 Path::new("file_path"),
748 775 None,
749 776 false,
750 777 true,
751 778 )
752 779 .unwrap()
753 780 .0,
754 781 vec![IgnorePattern::new(
755 782 PatternSyntax::RelGlob,
756 783 b"*.elc",
757 784 Path::new("file_path")
758 785 )],
759 786 );
760 787
761 788 let lines = b"syntax: include\nsyntax: glob";
762 789
763 790 assert_eq!(
764 791 parse_pattern_file_contents(
765 792 lines,
766 793 Path::new("file_path"),
767 794 None,
768 795 false,
769 796 true,
770 797 )
771 798 .unwrap()
772 799 .0,
773 800 vec![]
774 801 );
775 802 let lines = b"glob:**.o";
776 803 assert_eq!(
777 804 parse_pattern_file_contents(
778 805 lines,
779 806 Path::new("file_path"),
780 807 None,
781 808 false,
782 809 true,
783 810 )
784 811 .unwrap()
785 812 .0,
786 813 vec![IgnorePattern::new(
787 814 PatternSyntax::RelGlob,
788 815 b"**.o",
789 816 Path::new("file_path")
790 817 )]
791 818 );
792 819 }
793 820
794 821 #[test]
795 822 fn test_build_single_regex() {
796 823 assert_eq!(
797 824 build_single_regex(
798 825 &IgnorePattern::new(
799 826 PatternSyntax::RelGlob,
800 827 b"rust/target/",
801 828 Path::new("")
802 829 ),
803 b"(?:/|$)"
830 GlobSuffix::MoreComponents
804 831 )
805 832 .unwrap(),
806 833 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
807 834 );
808 835 assert_eq!(
809 836 build_single_regex(
810 837 &IgnorePattern::new(
811 838 PatternSyntax::Regexp,
812 839 br"rust/target/\d+",
813 840 Path::new("")
814 841 ),
815 b"(?:/|$)"
842 GlobSuffix::MoreComponents
816 843 )
817 844 .unwrap(),
818 845 Some(br"rust/target/\d+".to_vec()),
819 846 );
820 847 }
821 848
822 849 #[test]
823 850 fn test_build_single_regex_shortcut() {
824 851 assert_eq!(
825 852 build_single_regex(
826 853 &IgnorePattern::new(
827 854 PatternSyntax::RootGlob,
828 855 b"",
829 856 Path::new("")
830 857 ),
831 b"(?:/|$)"
858 GlobSuffix::MoreComponents
832 859 )
833 860 .unwrap(),
834 861 None,
835 862 );
836 863 assert_eq!(
837 864 build_single_regex(
838 865 &IgnorePattern::new(
839 866 PatternSyntax::RootGlob,
840 867 b"whatever",
841 868 Path::new("")
842 869 ),
843 b"(?:/|$)"
870 GlobSuffix::MoreComponents
844 871 )
845 872 .unwrap(),
846 873 None,
847 874 );
848 875 assert_eq!(
849 876 build_single_regex(
850 877 &IgnorePattern::new(
851 878 PatternSyntax::RootGlob,
852 879 b"*.o",
853 880 Path::new("")
854 881 ),
855 b"(?:/|$)"
882 GlobSuffix::MoreComponents
856 883 )
857 884 .unwrap(),
858 885 Some(br"[^/]*\.o(?:/|$)".to_vec()),
859 886 );
860 887 }
861 888
862 889 #[test]
863 890 fn test_build_single_relregex() {
864 891 assert_eq!(
865 892 build_single_regex(
866 893 &IgnorePattern::new(
867 894 PatternSyntax::RelRegexp,
868 895 b"^ba{2}r",
869 896 Path::new("")
870 897 ),
871 b"(?:/|$)"
898 GlobSuffix::MoreComponents
872 899 )
873 900 .unwrap(),
874 901 Some(b"^ba{2}r".to_vec()),
875 902 );
876 903 assert_eq!(
877 904 build_single_regex(
878 905 &IgnorePattern::new(
879 906 PatternSyntax::RelRegexp,
880 907 b"ba{2}r",
881 908 Path::new("")
882 909 ),
883 b"(?:/|$)"
910 GlobSuffix::MoreComponents
884 911 )
885 912 .unwrap(),
886 913 Some(b".*ba{2}r".to_vec()),
887 914 );
888 915 assert_eq!(
889 916 build_single_regex(
890 917 &IgnorePattern::new(
891 918 PatternSyntax::RelRegexp,
892 919 b"(?ia)ba{2}r",
893 920 Path::new("")
894 921 ),
895 b"(?:/|$)"
922 GlobSuffix::MoreComponents
896 923 )
897 924 .unwrap(),
898 925 Some(b"(?ia:.*ba{2}r)".to_vec()),
899 926 );
900 927 assert_eq!(
901 928 build_single_regex(
902 929 &IgnorePattern::new(
903 930 PatternSyntax::RelRegexp,
904 931 b"(?ia)^ba{2}r",
905 932 Path::new("")
906 933 ),
907 b"(?:/|$)"
934 GlobSuffix::MoreComponents
908 935 )
909 936 .unwrap(),
910 937 Some(b"(?ia:^ba{2}r)".to_vec()),
911 938 );
912 939 }
913 940 }
@@ -1,2453 +1,2455
1 1 // matchers.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Structs and types for matching files and directories.
9 9
10 10 use format_bytes::format_bytes;
11 11 use once_cell::sync::OnceCell;
12 12
13 13 use crate::{
14 14 dirstate::dirs_multiset::{DirsChildrenMultiset, DirsMultiset},
15 15 filepatterns::{
16 16 build_single_regex, filter_subincludes, get_patterns_from_file,
17 IgnorePattern, PatternError, PatternFileWarning, PatternResult,
18 PatternSyntax,
17 GlobSuffix, IgnorePattern, PatternError, PatternFileWarning,
18 PatternResult, PatternSyntax,
19 19 },
20 20 utils::{
21 21 files::{dir_ancestors, find_dirs},
22 22 hg_path::{HgPath, HgPathBuf, HgPathError},
23 23 Escaped,
24 24 },
25 25 FastHashMap,
26 26 };
27 27
28 28 use crate::dirstate::status::IgnoreFnType;
29 29 use crate::filepatterns::normalize_path_bytes;
30 30 use std::collections::HashSet;
31 31 use std::fmt::{Display, Error, Formatter};
32 32 use std::path::{Path, PathBuf};
33 33 use std::{borrow::ToOwned, collections::BTreeSet};
34 34
35 35 #[derive(Debug, PartialEq)]
36 36 pub enum VisitChildrenSet {
37 37 /// Don't visit anything
38 38 Empty,
39 39 /// Visit this directory and probably its children
40 40 This,
41 41 /// Only visit the children (both files and directories) if they
42 42 /// are mentioned in this set. (empty set corresponds to [`Self::Empty`])
43 43 /// TODO Should we implement a `NonEmptyHashSet`?
44 44 Set(HashSet<HgPathBuf>),
45 45 /// Visit this directory and all subdirectories
46 46 /// (you can stop asking about the children set)
47 47 Recursive,
48 48 }
49 49
50 50 pub trait Matcher: core::fmt::Debug {
51 51 /// Explicitly listed files
52 52 fn file_set(&self) -> Option<&HashSet<HgPathBuf>>;
53 53 /// Returns whether `filename` is in `file_set`
54 54 fn exact_match(&self, filename: &HgPath) -> bool;
55 55 /// Returns whether `filename` is matched by this matcher
56 56 fn matches(&self, filename: &HgPath) -> bool;
57 57 /// Decides whether a directory should be visited based on whether it
58 58 /// has potential matches in it or one of its subdirectories, and
59 59 /// potentially lists which subdirectories of that directory should be
60 60 /// visited. This is based on the match's primary, included, and excluded
61 61 /// patterns.
62 62 ///
63 63 /// # Example
64 64 ///
65 65 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
66 66 /// return the following values (assuming the implementation of
67 67 /// visit_children_set is capable of recognizing this; some implementations
68 68 /// are not).
69 69 ///
70 70 /// ```text
71 71 /// ```ignore
72 72 /// '' -> {'foo', 'qux'}
73 73 /// 'baz' -> set()
74 74 /// 'foo' -> {'bar'}
75 75 /// // Ideally this would be `Recursive`, but since the prefix nature of
76 76 /// // matchers is applied to the entire matcher, we have to downgrade this
77 77 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
78 78 /// // `RootFilesIn'-kind matcher being mixed in.
79 79 /// 'foo/bar' -> 'this'
80 80 /// 'qux' -> 'this'
81 81 /// ```
82 82 /// # Important
83 83 ///
84 84 /// Most matchers do not know if they're representing files or
85 85 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
86 86 /// file or a directory, so `visit_children_set('dir')` for most matchers
87 87 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
88 88 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
89 89 /// it may return `VisitChildrenSet::This`.
90 90 /// Do not rely on the return being a `HashSet` indicating that there are
91 91 /// no files in this dir to investigate (or equivalently that if there are
92 92 /// files to investigate in 'dir' that it will always return
93 93 /// `VisitChildrenSet::This`).
94 94 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet;
95 95 /// Matcher will match everything and `files_set()` will be empty:
96 96 /// optimization might be possible.
97 97 fn matches_everything(&self) -> bool;
98 98 /// Matcher will match exactly the files in `files_set()`: optimization
99 99 /// might be possible.
100 100 fn is_exact(&self) -> bool;
101 101 }
102 102
103 103 /// Matches everything.
104 104 ///```
105 105 /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
106 106 ///
107 107 /// let matcher = AlwaysMatcher;
108 108 ///
109 109 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
110 110 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
111 111 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
112 112 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
113 113 /// ```
114 114 #[derive(Debug)]
115 115 pub struct AlwaysMatcher;
116 116
117 117 impl Matcher for AlwaysMatcher {
118 118 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
119 119 None
120 120 }
121 121 fn exact_match(&self, _filename: &HgPath) -> bool {
122 122 false
123 123 }
124 124 fn matches(&self, _filename: &HgPath) -> bool {
125 125 true
126 126 }
127 127 fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
128 128 VisitChildrenSet::Recursive
129 129 }
130 130 fn matches_everything(&self) -> bool {
131 131 true
132 132 }
133 133 fn is_exact(&self) -> bool {
134 134 false
135 135 }
136 136 }
137 137
138 138 /// Matches nothing.
139 139 #[derive(Debug)]
140 140 pub struct NeverMatcher;
141 141
142 142 impl Matcher for NeverMatcher {
143 143 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
144 144 None
145 145 }
146 146 fn exact_match(&self, _filename: &HgPath) -> bool {
147 147 false
148 148 }
149 149 fn matches(&self, _filename: &HgPath) -> bool {
150 150 false
151 151 }
152 152 fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
153 153 VisitChildrenSet::Empty
154 154 }
155 155 fn matches_everything(&self) -> bool {
156 156 false
157 157 }
158 158 fn is_exact(&self) -> bool {
159 159 true
160 160 }
161 161 }
162 162
163 163 /// Matches the input files exactly. They are interpreted as paths, not
164 164 /// patterns.
165 165 ///
166 166 ///```
167 167 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::{HgPath, HgPathBuf} };
168 168 ///
169 169 /// let files = vec![HgPathBuf::from_bytes(b"a.txt"), HgPathBuf::from_bytes(br"re:.*\.c$")];
170 170 /// let matcher = FileMatcher::new(files).unwrap();
171 171 ///
172 172 /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
173 173 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
174 174 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
175 175 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
176 176 /// ```
177 177 #[derive(Debug)]
178 178 pub struct FileMatcher {
179 179 files: HashSet<HgPathBuf>,
180 180 dirs: DirsMultiset,
181 181 sorted_visitchildrenset_candidates: OnceCell<BTreeSet<HgPathBuf>>,
182 182 }
183 183
184 184 impl FileMatcher {
185 185 pub fn new(files: Vec<HgPathBuf>) -> Result<Self, HgPathError> {
186 186 let dirs = DirsMultiset::from_manifest(&files)?;
187 187 Ok(Self {
188 188 files: HashSet::from_iter(files),
189 189 dirs,
190 190 sorted_visitchildrenset_candidates: OnceCell::new(),
191 191 })
192 192 }
193 193 fn inner_matches(&self, filename: &HgPath) -> bool {
194 194 self.files.contains(filename.as_ref())
195 195 }
196 196 }
197 197
198 198 impl Matcher for FileMatcher {
199 199 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
200 200 Some(&self.files)
201 201 }
202 202 fn exact_match(&self, filename: &HgPath) -> bool {
203 203 self.inner_matches(filename)
204 204 }
205 205 fn matches(&self, filename: &HgPath) -> bool {
206 206 self.inner_matches(filename)
207 207 }
208 208 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
209 209 if self.files.is_empty() || !self.dirs.contains(directory) {
210 210 return VisitChildrenSet::Empty;
211 211 }
212 212
213 213 let compute_candidates = || -> BTreeSet<HgPathBuf> {
214 214 let mut candidates: BTreeSet<HgPathBuf> =
215 215 self.dirs.iter().cloned().collect();
216 216 candidates.extend(self.files.iter().cloned());
217 217 candidates.remove(HgPath::new(b""));
218 218 candidates
219 219 };
220 220 let candidates =
221 221 if directory.as_ref().is_empty() {
222 222 compute_candidates()
223 223 } else {
224 224 let sorted_candidates = self
225 225 .sorted_visitchildrenset_candidates
226 226 .get_or_init(compute_candidates);
227 227 let directory_bytes = directory.as_ref().as_bytes();
228 228 let start: HgPathBuf =
229 229 format_bytes!(b"{}/", directory_bytes).into();
230 230 let start_len = start.len();
231 231 // `0` sorts after `/`
232 232 let end = format_bytes!(b"{}0", directory_bytes).into();
233 233 BTreeSet::from_iter(sorted_candidates.range(start..end).map(
234 234 |c| HgPathBuf::from_bytes(&c.as_bytes()[start_len..]),
235 235 ))
236 236 };
237 237
238 238 // `self.dirs` includes all of the directories, recursively, so if
239 239 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
240 240 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
241 241 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
242 242 // subdir will be in there without a slash.
243 243 VisitChildrenSet::Set(
244 244 candidates
245 245 .into_iter()
246 246 .filter_map(|c| {
247 247 if c.bytes().all(|b| *b != b'/') {
248 248 Some(c)
249 249 } else {
250 250 None
251 251 }
252 252 })
253 253 .collect(),
254 254 )
255 255 }
256 256 fn matches_everything(&self) -> bool {
257 257 false
258 258 }
259 259 fn is_exact(&self) -> bool {
260 260 true
261 261 }
262 262 }
263 263
264 264 /// Matches a set of (kind, pat, source) against a 'root' directory.
265 265 /// (Currently the 'root' directory is effectively always empty)
266 266 /// ```
267 267 /// use hg::{
268 268 /// matchers::{PatternMatcher, Matcher},
269 269 /// filepatterns::{IgnorePattern, PatternSyntax},
270 270 /// utils::hg_path::{HgPath, HgPathBuf}
271 271 /// };
272 272 /// use std::collections::HashSet;
273 273 /// use std::path::Path;
274 274 /// ///
275 275 /// let ignore_patterns : Vec<IgnorePattern> =
276 276 /// vec![IgnorePattern::new(PatternSyntax::Regexp, br".*\.c$", Path::new("")),
277 277 /// IgnorePattern::new(PatternSyntax::Path, b"foo/a", Path::new("")),
278 278 /// IgnorePattern::new(PatternSyntax::RelPath, b"b", Path::new("")),
279 279 /// IgnorePattern::new(PatternSyntax::Glob, b"*.h", Path::new("")),
280 280 /// ];
281 281 /// let matcher = PatternMatcher::new(ignore_patterns).unwrap();
282 282 /// ///
283 283 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true); // matches re:.*\.c$
284 284 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
285 285 /// assert_eq!(matcher.matches(HgPath::new(b"foo/a")), true); // matches path:foo/a
286 286 /// assert_eq!(matcher.matches(HgPath::new(b"a")), false); // does not match path:b, since 'root' is 'foo'
287 287 /// assert_eq!(matcher.matches(HgPath::new(b"b")), true); // matches relpath:b, since 'root' is 'foo'
288 288 /// assert_eq!(matcher.matches(HgPath::new(b"lib.h")), true); // matches glob:*.h
289 289 /// assert_eq!(matcher.file_set().unwrap(),
290 290 /// &HashSet::from([HgPathBuf::from_bytes(b""), HgPathBuf::from_bytes(b"foo/a"),
291 291 /// HgPathBuf::from_bytes(b""), HgPathBuf::from_bytes(b"b")]));
292 292 /// assert_eq!(matcher.exact_match(HgPath::new(b"foo/a")), true);
293 293 /// assert_eq!(matcher.exact_match(HgPath::new(b"b")), true);
294 294 /// assert_eq!(matcher.exact_match(HgPath::new(b"lib.h")), false); // exact matches are for (rel)path kinds
295 295 /// ```
296 296 pub struct PatternMatcher<'a> {
297 297 patterns: Vec<u8>,
298 298 match_fn: IgnoreFnType<'a>,
299 299 /// Whether all the patterns match a prefix (i.e. recursively)
300 300 prefix: bool,
301 301 files: HashSet<HgPathBuf>,
302 302 dirs_explicit: HashSet<HgPathBuf>,
303 303 dirs: DirsMultiset,
304 304 }
305 305
306 306 impl core::fmt::Debug for PatternMatcher<'_> {
307 307 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
308 308 f.debug_struct("PatternMatcher")
309 309 .field("patterns", &String::from_utf8_lossy(&self.patterns))
310 310 .field("prefix", &self.prefix)
311 311 .field("files", &self.files)
312 312 .field("dirs", &self.dirs)
313 313 .finish()
314 314 }
315 315 }
316 316
317 317 impl<'a> PatternMatcher<'a> {
318 318 pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
319 319 let RootsDirsAndParents {
320 320 roots,
321 321 dirs: dirs_explicit,
322 322 parents,
323 323 } = roots_dirs_and_parents(&ignore_patterns)?;
324 324 let files = roots;
325 325 let dirs = parents;
326 326 let files: HashSet<HgPathBuf> = HashSet::from_iter(files);
327 327
328 328 let prefix = ignore_patterns.iter().all(|k| {
329 329 matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath)
330 330 });
331 let (patterns, match_fn) = build_match(ignore_patterns, b"$")?;
331 let (patterns, match_fn) =
332 build_match(ignore_patterns, GlobSuffix::Empty)?;
332 333
333 334 Ok(Self {
334 335 patterns,
335 336 match_fn,
336 337 prefix,
337 338 files,
338 339 dirs,
339 340 dirs_explicit,
340 341 })
341 342 }
342 343 }
343 344
344 345 impl<'a> Matcher for PatternMatcher<'a> {
345 346 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
346 347 Some(&self.files)
347 348 }
348 349
349 350 fn exact_match(&self, filename: &HgPath) -> bool {
350 351 self.files.contains(filename)
351 352 }
352 353
353 354 fn matches(&self, filename: &HgPath) -> bool {
354 355 if self.files.contains(filename) {
355 356 return true;
356 357 }
357 358 (self.match_fn)(filename)
358 359 }
359 360
360 361 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
361 362 if self.prefix && self.files.contains(directory) {
362 363 return VisitChildrenSet::Recursive;
363 364 }
364 365 if self.dirs.contains(directory) {
365 366 return VisitChildrenSet::This;
366 367 }
367 368 if dir_ancestors(directory).any(|parent_dir| {
368 369 self.files.contains(parent_dir)
369 370 || self.dirs_explicit.contains(parent_dir)
370 371 }) {
371 372 VisitChildrenSet::This
372 373 } else {
373 374 VisitChildrenSet::Empty
374 375 }
375 376 }
376 377
377 378 fn matches_everything(&self) -> bool {
378 379 false
379 380 }
380 381
381 382 fn is_exact(&self) -> bool {
382 383 false
383 384 }
384 385 }
385 386
386 387 /// Matches files that are included in the ignore rules.
387 388 /// ```
388 389 /// use hg::{
389 390 /// matchers::{IncludeMatcher, Matcher},
390 391 /// filepatterns::{IgnorePattern, PatternSyntax},
391 392 /// utils::hg_path::HgPath
392 393 /// };
393 394 /// use std::path::Path;
394 395 /// ///
395 396 /// let ignore_patterns =
396 397 /// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
397 398 /// let matcher = IncludeMatcher::new(ignore_patterns).unwrap();
398 399 /// ///
399 400 /// assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
400 401 /// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
401 402 /// assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
402 403 /// assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
403 404 /// ///
404 405 /// let ignore_patterns =
405 406 /// vec![IgnorePattern::new(PatternSyntax::RootFilesIn, b"dir/subdir", Path::new(""))];
406 407 /// let matcher = IncludeMatcher::new(ignore_patterns).unwrap();
407 408 /// ///
408 409 /// assert!(!matcher.matches(HgPath::new(b"file")));
409 410 /// assert!(!matcher.matches(HgPath::new(b"dir/file")));
410 411 /// assert!(matcher.matches(HgPath::new(b"dir/subdir/file")));
411 412 /// assert!(!matcher.matches(HgPath::new(b"dir/subdir/subsubdir/file")));
412 413 /// ```
413 414 pub struct IncludeMatcher<'a> {
414 415 patterns: Vec<u8>,
415 416 match_fn: IgnoreFnType<'a>,
416 417 /// Whether all the patterns match a prefix (i.e. recursively)
417 418 prefix: bool,
418 419 roots: HashSet<HgPathBuf>,
419 420 dirs: HashSet<HgPathBuf>,
420 421 parents: DirsMultiset,
421 422 }
422 423
423 424 impl core::fmt::Debug for IncludeMatcher<'_> {
424 425 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
425 426 f.debug_struct("IncludeMatcher")
426 427 .field("patterns", &String::from_utf8_lossy(&self.patterns))
427 428 .field("prefix", &self.prefix)
428 429 .field("roots", &self.roots)
429 430 .field("dirs", &self.dirs)
430 431 .field("parents", &self.parents)
431 432 .finish()
432 433 }
433 434 }
434 435
435 436 impl<'a> Matcher for IncludeMatcher<'a> {
436 437 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
437 438 None
438 439 }
439 440
440 441 fn exact_match(&self, _filename: &HgPath) -> bool {
441 442 false
442 443 }
443 444
444 445 fn matches(&self, filename: &HgPath) -> bool {
445 446 (self.match_fn)(filename)
446 447 }
447 448
448 449 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
449 450 let dir = directory;
450 451 if self.prefix && self.roots.contains(dir) {
451 452 return VisitChildrenSet::Recursive;
452 453 }
453 454 if self.roots.contains(HgPath::new(b""))
454 455 || self.roots.contains(dir)
455 456 || self.dirs.contains(dir)
456 457 || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
457 458 {
458 459 return VisitChildrenSet::This;
459 460 }
460 461
461 462 if self.parents.contains(dir.as_ref()) {
462 463 let multiset = self.get_all_parents_children();
463 464 if let Some(children) = multiset.get(dir) {
464 465 return VisitChildrenSet::Set(
465 466 children.iter().map(HgPathBuf::from).collect(),
466 467 );
467 468 }
468 469 }
469 470 VisitChildrenSet::Empty
470 471 }
471 472
472 473 fn matches_everything(&self) -> bool {
473 474 false
474 475 }
475 476
476 477 fn is_exact(&self) -> bool {
477 478 false
478 479 }
479 480 }
480 481
481 482 /// The union of multiple matchers. Will match if any of the matchers match.
482 483 #[derive(Debug)]
483 484 pub struct UnionMatcher {
484 485 matchers: Vec<Box<dyn Matcher + Sync>>,
485 486 }
486 487
487 488 impl Matcher for UnionMatcher {
488 489 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
489 490 None
490 491 }
491 492
492 493 fn exact_match(&self, _filename: &HgPath) -> bool {
493 494 false
494 495 }
495 496
496 497 fn matches(&self, filename: &HgPath) -> bool {
497 498 self.matchers.iter().any(|m| m.matches(filename))
498 499 }
499 500
500 501 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
501 502 let mut result = HashSet::new();
502 503 let mut this = false;
503 504 for matcher in self.matchers.iter() {
504 505 let visit = matcher.visit_children_set(directory);
505 506 match visit {
506 507 VisitChildrenSet::Empty => continue,
507 508 VisitChildrenSet::This => {
508 509 this = true;
509 510 // Don't break, we might have an 'all' in here.
510 511 continue;
511 512 }
512 513 VisitChildrenSet::Set(set) => {
513 514 result.extend(set);
514 515 }
515 516 VisitChildrenSet::Recursive => {
516 517 return visit;
517 518 }
518 519 }
519 520 }
520 521 if this {
521 522 return VisitChildrenSet::This;
522 523 }
523 524 if result.is_empty() {
524 525 VisitChildrenSet::Empty
525 526 } else {
526 527 VisitChildrenSet::Set(result)
527 528 }
528 529 }
529 530
530 531 fn matches_everything(&self) -> bool {
531 532 // TODO Maybe if all are AlwaysMatcher?
532 533 false
533 534 }
534 535
535 536 fn is_exact(&self) -> bool {
536 537 false
537 538 }
538 539 }
539 540
540 541 impl UnionMatcher {
541 542 pub fn new(matchers: Vec<Box<dyn Matcher + Sync>>) -> Self {
542 543 Self { matchers }
543 544 }
544 545 }
545 546
546 547 #[derive(Debug)]
547 548 pub struct IntersectionMatcher {
548 549 m1: Box<dyn Matcher + Sync>,
549 550 m2: Box<dyn Matcher + Sync>,
550 551 files: Option<HashSet<HgPathBuf>>,
551 552 }
552 553
553 554 impl Matcher for IntersectionMatcher {
554 555 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
555 556 self.files.as_ref()
556 557 }
557 558
558 559 fn exact_match(&self, filename: &HgPath) -> bool {
559 560 self.files.as_ref().map_or(false, |f| f.contains(filename))
560 561 }
561 562
562 563 fn matches(&self, filename: &HgPath) -> bool {
563 564 self.m1.matches(filename) && self.m2.matches(filename)
564 565 }
565 566
566 567 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
567 568 let m1_set = self.m1.visit_children_set(directory);
568 569 if m1_set == VisitChildrenSet::Empty {
569 570 return VisitChildrenSet::Empty;
570 571 }
571 572 let m2_set = self.m2.visit_children_set(directory);
572 573 if m2_set == VisitChildrenSet::Empty {
573 574 return VisitChildrenSet::Empty;
574 575 }
575 576
576 577 if m1_set == VisitChildrenSet::Recursive {
577 578 return m2_set;
578 579 } else if m2_set == VisitChildrenSet::Recursive {
579 580 return m1_set;
580 581 }
581 582
582 583 match (&m1_set, &m2_set) {
583 584 (VisitChildrenSet::Recursive, _) => m2_set,
584 585 (_, VisitChildrenSet::Recursive) => m1_set,
585 586 (VisitChildrenSet::This, _) | (_, VisitChildrenSet::This) => {
586 587 VisitChildrenSet::This
587 588 }
588 589 (VisitChildrenSet::Set(m1), VisitChildrenSet::Set(m2)) => {
589 590 let set: HashSet<_> = m1.intersection(m2).cloned().collect();
590 591 if set.is_empty() {
591 592 VisitChildrenSet::Empty
592 593 } else {
593 594 VisitChildrenSet::Set(set)
594 595 }
595 596 }
596 597 _ => unreachable!(),
597 598 }
598 599 }
599 600
600 601 fn matches_everything(&self) -> bool {
601 602 self.m1.matches_everything() && self.m2.matches_everything()
602 603 }
603 604
604 605 fn is_exact(&self) -> bool {
605 606 self.m1.is_exact() || self.m2.is_exact()
606 607 }
607 608 }
608 609
609 610 impl IntersectionMatcher {
610 611 pub fn new(
611 612 mut m1: Box<dyn Matcher + Sync>,
612 613 mut m2: Box<dyn Matcher + Sync>,
613 614 ) -> Self {
614 615 let files = if m1.is_exact() || m2.is_exact() {
615 616 if !m1.is_exact() {
616 617 std::mem::swap(&mut m1, &mut m2);
617 618 }
618 619 m1.file_set().map(|m1_files| {
619 620 m1_files
620 621 .iter()
621 622 .filter(|&f| m2.matches(f))
622 623 .cloned()
623 624 .collect()
624 625 })
625 626 } else {
626 627 // without exact input file sets, we can't do an exact
627 628 // intersection, so we must over-approximate by
628 629 // unioning instead
629 630 m1.file_set().map(|m1_files| match m2.file_set() {
630 631 Some(m2_files) => m1_files.union(m2_files).cloned().collect(),
631 632 None => m1_files.iter().cloned().collect(),
632 633 })
633 634 };
634 635 Self { m1, m2, files }
635 636 }
636 637 }
637 638
638 639 #[derive(Debug)]
639 640 pub struct DifferenceMatcher {
640 641 base: Box<dyn Matcher + Sync>,
641 642 excluded: Box<dyn Matcher + Sync>,
642 643 files: Option<HashSet<HgPathBuf>>,
643 644 }
644 645
645 646 impl Matcher for DifferenceMatcher {
646 647 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
647 648 self.files.as_ref()
648 649 }
649 650
650 651 fn exact_match(&self, filename: &HgPath) -> bool {
651 652 self.files.as_ref().map_or(false, |f| f.contains(filename))
652 653 }
653 654
654 655 fn matches(&self, filename: &HgPath) -> bool {
655 656 self.base.matches(filename) && !self.excluded.matches(filename)
656 657 }
657 658
658 659 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
659 660 let excluded_set = self.excluded.visit_children_set(directory);
660 661 if excluded_set == VisitChildrenSet::Recursive {
661 662 return VisitChildrenSet::Empty;
662 663 }
663 664 let base_set = self.base.visit_children_set(directory);
664 665 // Possible values for base: 'recursive', 'this', set(...), set()
665 666 // Possible values for excluded: 'this', set(...), set()
666 667 // If excluded has nothing under here that we care about, return base,
667 668 // even if it's 'recursive'.
668 669 if excluded_set == VisitChildrenSet::Empty {
669 670 return base_set;
670 671 }
671 672 match base_set {
672 673 VisitChildrenSet::This | VisitChildrenSet::Recursive => {
673 674 // Never return 'recursive' here if excluded_set is any kind of
674 675 // non-empty (either 'this' or set(foo)), since excluded might
675 676 // return set() for a subdirectory.
676 677 VisitChildrenSet::This
677 678 }
678 679 set => {
679 680 // Possible values for base: set(...), set()
680 681 // Possible values for excluded: 'this', set(...)
681 682 // We ignore excluded set results. They're possibly incorrect:
682 683 // base = path:dir/subdir
683 684 // excluded=rootfilesin:dir,
684 685 // visit_children_set(''):
685 686 // base returns {'dir'}, excluded returns {'dir'}, if we
686 687 // subtracted we'd return set(), which is *not* correct, we
687 688 // still need to visit 'dir'!
688 689 set
689 690 }
690 691 }
691 692 }
692 693
693 694 fn matches_everything(&self) -> bool {
694 695 false
695 696 }
696 697
697 698 fn is_exact(&self) -> bool {
698 699 self.base.is_exact()
699 700 }
700 701 }
701 702
702 703 impl DifferenceMatcher {
703 704 pub fn new(
704 705 base: Box<dyn Matcher + Sync>,
705 706 excluded: Box<dyn Matcher + Sync>,
706 707 ) -> Self {
707 708 let base_is_exact = base.is_exact();
708 709 let base_files = base.file_set().map(ToOwned::to_owned);
709 710 let mut new = Self {
710 711 base,
711 712 excluded,
712 713 files: None,
713 714 };
714 715 if base_is_exact {
715 716 new.files = base_files.map(|files| {
716 717 files.iter().filter(|&f| new.matches(f)).cloned().collect()
717 718 });
718 719 }
719 720 new
720 721 }
721 722 }
722 723
723 724 /// Wraps [`regex::bytes::Regex`] to improve performance in multithreaded
724 725 /// contexts.
725 726 ///
726 727 /// The `status` algorithm makes heavy use of threads, and calling `is_match`
727 728 /// from many threads at once is prone to contention, probably within the
728 729 /// scratch space needed as the regex DFA is built lazily.
729 730 ///
730 731 /// We are in the process of raising the issue upstream, but for now
731 732 /// the workaround used here is to store the `Regex` in a lazily populated
732 733 /// thread-local variable, sharing the initial read-only compilation, but
733 734 /// not the lazy dfa scratch space mentioned above.
734 735 ///
735 736 /// This reduces the contention observed with 16+ threads, but does not
736 737 /// completely remove it. Hopefully this can be addressed upstream.
737 738 struct RegexMatcher {
738 739 /// Compiled at the start of the status algorithm, used as a base for
739 740 /// cloning in each thread-local `self.local`, thus sharing the expensive
740 741 /// first compilation.
741 742 base: regex::bytes::Regex,
742 743 /// Thread-local variable that holds the `Regex` that is actually queried
743 744 /// from each thread.
744 745 local: thread_local::ThreadLocal<regex::bytes::Regex>,
745 746 }
746 747
747 748 impl RegexMatcher {
748 749 /// Returns whether the path matches the stored `Regex`.
749 750 pub fn is_match(&self, path: &HgPath) -> bool {
750 751 self.local
751 752 .get_or(|| self.base.clone())
752 753 .is_match(path.as_bytes())
753 754 }
754 755 }
755 756
756 757 /// Return a `RegexBuilder` from a bytes pattern
757 758 ///
758 759 /// This works around the fact that even if it works on byte haysacks,
759 760 /// [`regex::bytes::Regex`] still uses UTF-8 patterns.
760 761 pub fn re_bytes_builder(pattern: &[u8]) -> regex::bytes::RegexBuilder {
761 762 use std::io::Write;
762 763
763 764 // The `regex` crate adds `.*` to the start and end of expressions if there
764 765 // are no anchors, so add the start anchor.
765 766 let mut escaped_bytes = vec![b'^', b'(', b'?', b':'];
766 767 for byte in pattern {
767 768 if *byte > 127 {
768 769 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
769 770 } else {
770 771 escaped_bytes.push(*byte);
771 772 }
772 773 }
773 774 escaped_bytes.push(b')');
774 775
775 776 // Avoid the cost of UTF8 checking
776 777 //
777 778 // # Safety
778 779 // This is safe because we escaped all non-ASCII bytes.
779 780 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
780 781 regex::bytes::RegexBuilder::new(&pattern_string)
781 782 }
782 783
783 784 /// Returns a function that matches an `HgPath` against the given regex
784 785 /// pattern.
785 786 ///
786 787 /// This can fail when the pattern is invalid or not supported by the
787 788 /// underlying engine (the `regex` crate), for instance anything with
788 789 /// back-references.
789 790 #[logging_timer::time("trace")]
790 791 fn re_matcher(pattern: &[u8]) -> PatternResult<RegexMatcher> {
791 792 let re = re_bytes_builder(pattern)
792 793 .unicode(false)
793 794 // Big repos with big `.hgignore` will hit the default limit and
794 795 // incur a significant performance hit. One repo's `hg status` hit
795 796 // multiple *minutes*.
796 797 .dfa_size_limit(50 * (1 << 20))
797 798 .build()
798 799 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
799 800
800 801 Ok(RegexMatcher {
801 802 base: re,
802 803 local: Default::default(),
803 804 })
804 805 }
805 806
806 807 /// Returns the regex pattern and a function that matches an `HgPath` against
807 808 /// said regex formed by the given ignore patterns.
808 809 fn build_regex_match<'a>(
809 810 ignore_patterns: &[IgnorePattern],
810 glob_suffix: &[u8],
811 glob_suffix: GlobSuffix,
811 812 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> {
812 813 let mut regexps = vec![];
813 814 let mut exact_set = HashSet::new();
814 815
815 816 for pattern in ignore_patterns {
816 817 if let Some(re) = build_single_regex(pattern, glob_suffix)? {
817 818 regexps.push(re);
818 819 } else {
819 820 let exact = normalize_path_bytes(&pattern.pattern);
820 821 exact_set.insert(HgPathBuf::from_bytes(&exact));
821 822 }
822 823 }
823 824
824 825 let full_regex = regexps.join(&b'|');
825 826
826 827 // An empty pattern would cause the regex engine to incorrectly match the
827 828 // (empty) root directory
828 829 let func = if !(regexps.is_empty()) {
829 830 let matcher = re_matcher(&full_regex)?;
830 831 let func = move |filename: &HgPath| {
831 832 exact_set.contains(filename) || matcher.is_match(filename)
832 833 };
833 834 Box::new(func) as IgnoreFnType
834 835 } else {
835 836 let func = move |filename: &HgPath| exact_set.contains(filename);
836 837 Box::new(func) as IgnoreFnType
837 838 };
838 839
839 840 Ok((full_regex, func))
840 841 }
841 842
842 843 /// Returns roots and directories corresponding to each pattern.
843 844 ///
844 845 /// This calculates the roots and directories exactly matching the patterns and
845 846 /// returns a tuple of (roots, dirs). It does not return other directories
846 847 /// which may also need to be considered, like the parent directories.
847 848 fn roots_and_dirs(
848 849 ignore_patterns: &[IgnorePattern],
849 850 ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
850 851 let mut roots = Vec::new();
851 852 let mut dirs = Vec::new();
852 853
853 854 for ignore_pattern in ignore_patterns {
854 855 let IgnorePattern {
855 856 syntax, pattern, ..
856 857 } = ignore_pattern;
857 858 match syntax {
858 859 PatternSyntax::RootGlob | PatternSyntax::Glob => {
859 860 let mut root = HgPathBuf::new();
860 861 for p in pattern.split(|c| *c == b'/') {
861 862 if p.iter()
862 863 .any(|c| matches!(*c, b'[' | b'{' | b'*' | b'?'))
863 864 {
864 865 break;
865 866 }
866 867 root.push(HgPathBuf::from_bytes(p).as_ref());
867 868 }
868 869 roots.push(root);
869 870 }
870 871 PatternSyntax::Path
871 872 | PatternSyntax::RelPath
872 873 | PatternSyntax::FilePath => {
873 874 let pat = HgPath::new(if pattern == b"." {
874 875 &[] as &[u8]
875 876 } else {
876 877 pattern
877 878 });
878 879 roots.push(pat.to_owned());
879 880 }
880 881 PatternSyntax::RootFilesIn => {
881 882 let pat = if pattern == b"." {
882 883 &[] as &[u8]
883 884 } else {
884 885 pattern
885 886 };
886 887 dirs.push(HgPathBuf::from_bytes(pat));
887 888 }
888 889 _ => {
889 890 roots.push(HgPathBuf::new());
890 891 }
891 892 }
892 893 }
893 894 (roots, dirs)
894 895 }
895 896
896 897 /// Paths extracted from patterns
897 898 #[derive(Debug, PartialEq)]
898 899 struct RootsDirsAndParents {
899 900 /// Directories to match recursively
900 901 pub roots: HashSet<HgPathBuf>,
901 902 /// Directories to match non-recursively
902 903 pub dirs: HashSet<HgPathBuf>,
903 904 /// Implicitly required directories to go to items in either roots or dirs
904 905 pub parents: DirsMultiset,
905 906 }
906 907
907 908 /// Extract roots, dirs and parents from patterns.
908 909 fn roots_dirs_and_parents(
909 910 ignore_patterns: &[IgnorePattern],
910 911 ) -> PatternResult<RootsDirsAndParents> {
911 912 let (roots, dirs) = roots_and_dirs(ignore_patterns);
912 913
913 914 let mut parents = DirsMultiset::from_manifest(&dirs)?;
914 915
915 916 for path in &roots {
916 917 parents.add_path(path)?
917 918 }
918 919
919 920 Ok(RootsDirsAndParents {
920 921 roots: HashSet::from_iter(roots),
921 922 dirs: HashSet::from_iter(dirs),
922 923 parents,
923 924 })
924 925 }
925 926
926 927 /// Returns a function that checks whether a given file (in the general sense)
927 928 /// should be matched.
928 929 fn build_match<'a>(
929 930 ignore_patterns: Vec<IgnorePattern>,
930 glob_suffix: &[u8],
931 glob_suffix: GlobSuffix,
931 932 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> {
932 933 let mut match_funcs: Vec<IgnoreFnType<'a>> = vec![];
933 934 // For debugging and printing
934 935 let mut patterns = vec![];
935 936
936 937 let (subincludes, ignore_patterns) = filter_subincludes(ignore_patterns)?;
937 938
938 939 if !subincludes.is_empty() {
939 940 // Build prefix-based matcher functions for subincludes
940 941 let mut submatchers = FastHashMap::default();
941 942 let mut prefixes = vec![];
942 943
943 944 for sub_include in subincludes {
944 945 let matcher = IncludeMatcher::new(sub_include.included_patterns)?;
945 946 let match_fn =
946 947 Box::new(move |path: &HgPath| matcher.matches(path));
947 948 prefixes.push(sub_include.prefix.clone());
948 949 submatchers.insert(sub_include.prefix.clone(), match_fn);
949 950 }
950 951
951 952 let match_subinclude = move |filename: &HgPath| {
952 953 for prefix in prefixes.iter() {
953 954 if let Some(rel) = filename.relative_to(prefix) {
954 955 if (submatchers[prefix])(rel) {
955 956 return true;
956 957 }
957 958 }
958 959 }
959 960 false
960 961 };
961 962
962 963 match_funcs.push(Box::new(match_subinclude));
963 964 }
964 965
965 966 if !ignore_patterns.is_empty() {
966 967 // Either do dumb matching if all patterns are rootfiles, or match
967 968 // with a regex.
968 969 if ignore_patterns
969 970 .iter()
970 971 .all(|k| k.syntax == PatternSyntax::RootFilesIn)
971 972 {
972 973 let dirs: HashSet<_> = ignore_patterns
973 974 .iter()
974 975 .map(|k| k.pattern.to_owned())
975 976 .collect();
976 977 let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
977 978
978 979 let match_func = move |path: &HgPath| -> bool {
979 980 let path = path.as_bytes();
980 981 let i = path.iter().rposition(|a| *a == b'/');
981 982 let dir = if let Some(i) = i { &path[..i] } else { b"." };
982 983 dirs.contains(dir)
983 984 };
984 985 match_funcs.push(Box::new(match_func));
985 986
986 987 patterns.extend(b"rootfilesin: ");
987 988 dirs_vec.sort();
988 989 patterns.extend(dirs_vec.escaped_bytes());
989 990 } else {
990 991 let (new_re, match_func) =
991 992 build_regex_match(&ignore_patterns, glob_suffix)?;
992 993 patterns = new_re;
993 994 match_funcs.push(match_func)
994 995 }
995 996 }
996 997
997 998 Ok(if match_funcs.len() == 1 {
998 999 (patterns, match_funcs.remove(0))
999 1000 } else {
1000 1001 (
1001 1002 patterns,
1002 1003 Box::new(move |f: &HgPath| -> bool {
1003 1004 match_funcs.iter().any(|match_func| match_func(f))
1004 1005 }),
1005 1006 )
1006 1007 })
1007 1008 }
1008 1009
1009 1010 /// Parses all "ignore" files with their recursive includes and returns a
1010 1011 /// function that checks whether a given file (in the general sense) should be
1011 1012 /// ignored.
1012 1013 pub fn get_ignore_matcher<'a>(
1013 1014 mut all_pattern_files: Vec<PathBuf>,
1014 1015 root_dir: &Path,
1015 1016 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
1016 1017 ) -> PatternResult<(IncludeMatcher<'a>, Vec<PatternFileWarning>)> {
1017 1018 let mut all_patterns = vec![];
1018 1019 let mut all_warnings = vec![];
1019 1020
1020 1021 // Sort to make the ordering of calls to `inspect_pattern_bytes`
1021 1022 // deterministic even if the ordering of `all_pattern_files` is not (such
1022 1023 // as when a iteration order of a Python dict or Rust HashMap is involved).
1023 1024 // Sort by "string" representation instead of the default by component
1024 1025 // (with a Rust-specific definition of a component)
1025 1026 all_pattern_files
1026 1027 .sort_unstable_by(|a, b| a.as_os_str().cmp(b.as_os_str()));
1027 1028
1028 1029 for pattern_file in &all_pattern_files {
1029 1030 let (patterns, warnings) = get_patterns_from_file(
1030 1031 pattern_file,
1031 1032 root_dir,
1032 1033 inspect_pattern_bytes,
1033 1034 )?;
1034 1035
1035 1036 all_patterns.extend(patterns.to_owned());
1036 1037 all_warnings.extend(warnings);
1037 1038 }
1038 1039 let matcher = IncludeMatcher::new(all_patterns)?;
1039 1040 Ok((matcher, all_warnings))
1040 1041 }
1041 1042
1042 1043 /// Parses all "ignore" files with their recursive includes and returns a
1043 1044 /// function that checks whether a given file (in the general sense) should be
1044 1045 /// ignored.
1045 1046 pub fn get_ignore_function<'a>(
1046 1047 all_pattern_files: Vec<PathBuf>,
1047 1048 root_dir: &Path,
1048 1049 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
1049 1050 ) -> PatternResult<(IgnoreFnType<'a>, Vec<PatternFileWarning>)> {
1050 1051 let res =
1051 1052 get_ignore_matcher(all_pattern_files, root_dir, inspect_pattern_bytes);
1052 1053 res.map(|(matcher, all_warnings)| {
1053 1054 let res: IgnoreFnType<'a> =
1054 1055 Box::new(move |path: &HgPath| matcher.matches(path));
1055 1056
1056 1057 (res, all_warnings)
1057 1058 })
1058 1059 }
1059 1060
1060 1061 impl<'a> IncludeMatcher<'a> {
1061 1062 pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
1062 1063 let RootsDirsAndParents {
1063 1064 roots,
1064 1065 dirs,
1065 1066 parents,
1066 1067 } = roots_dirs_and_parents(&ignore_patterns)?;
1067 1068 let prefix = ignore_patterns.iter().all(|k| {
1068 1069 matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath)
1069 1070 });
1070 let (patterns, match_fn) = build_match(ignore_patterns, b"(?:/|$)")?;
1071 let (patterns, match_fn) =
1072 build_match(ignore_patterns, GlobSuffix::MoreComponents)?;
1071 1073
1072 1074 Ok(Self {
1073 1075 patterns,
1074 1076 match_fn,
1075 1077 prefix,
1076 1078 roots,
1077 1079 dirs,
1078 1080 parents,
1079 1081 })
1080 1082 }
1081 1083
1082 1084 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
1083 1085 // TODO cache
1084 1086 let thing = self
1085 1087 .dirs
1086 1088 .iter()
1087 1089 .chain(self.roots.iter())
1088 1090 .chain(self.parents.iter());
1089 1091 DirsChildrenMultiset::new(thing, Some(self.parents.iter()))
1090 1092 }
1091 1093
1092 1094 pub fn debug_get_patterns(&self) -> &[u8] {
1093 1095 self.patterns.as_ref()
1094 1096 }
1095 1097 }
1096 1098
1097 1099 impl<'a> Display for IncludeMatcher<'a> {
1098 1100 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
1099 1101 // XXX What about exact matches?
1100 1102 // I'm not sure it's worth it to clone the HashSet and keep it
1101 1103 // around just in case someone wants to display the matcher, plus
1102 1104 // it's going to be unreadable after a few entries, but we need to
1103 1105 // inform in this display that exact matches are being used and are
1104 1106 // (on purpose) missing from the `includes`.
1105 1107 write!(
1106 1108 f,
1107 1109 "IncludeMatcher(includes='{}')",
1108 1110 String::from_utf8_lossy(&self.patterns.escaped_bytes())
1109 1111 )
1110 1112 }
1111 1113 }
1112 1114
1113 1115 #[cfg(test)]
1114 1116 mod tests {
1115 1117 use super::*;
1116 1118 use pretty_assertions::assert_eq;
1117 1119 use std::collections::BTreeMap;
1118 1120 use std::collections::BTreeSet;
1119 1121 use std::fmt::Debug;
1120 1122 use std::path::Path;
1121 1123
1122 1124 #[test]
1123 1125 fn test_roots_and_dirs() {
1124 1126 let pats = vec![
1125 1127 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
1126 1128 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
1127 1129 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
1128 1130 ];
1129 1131 let (roots, dirs) = roots_and_dirs(&pats);
1130 1132
1131 1133 assert_eq!(
1132 1134 roots,
1133 1135 vec!(
1134 1136 HgPathBuf::from_bytes(b"g/h"),
1135 1137 HgPathBuf::from_bytes(b"g/h"),
1136 1138 HgPathBuf::new()
1137 1139 ),
1138 1140 );
1139 1141 assert_eq!(dirs, vec!());
1140 1142 }
1141 1143
1142 1144 #[test]
1143 1145 fn test_roots_dirs_and_parents() {
1144 1146 let pats = vec![
1145 1147 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
1146 1148 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
1147 1149 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
1148 1150 ];
1149 1151
1150 1152 let mut roots = HashSet::new();
1151 1153 roots.insert(HgPathBuf::from_bytes(b"g/h"));
1152 1154 roots.insert(HgPathBuf::new());
1153 1155
1154 1156 let dirs = HashSet::new();
1155 1157
1156 1158 let parents = DirsMultiset::from_manifest(&[
1157 1159 HgPathBuf::from_bytes(b"x"),
1158 1160 HgPathBuf::from_bytes(b"g/x"),
1159 1161 HgPathBuf::from_bytes(b"g/y"),
1160 1162 ])
1161 1163 .unwrap();
1162 1164
1163 1165 assert_eq!(
1164 1166 roots_dirs_and_parents(&pats).unwrap(),
1165 1167 RootsDirsAndParents {
1166 1168 roots,
1167 1169 dirs,
1168 1170 parents
1169 1171 }
1170 1172 );
1171 1173 }
1172 1174
1173 1175 #[test]
1174 1176 fn test_filematcher_visit_children_set() {
1175 1177 // Visitchildrenset
1176 1178 let files = vec![HgPathBuf::from_bytes(b"dir/subdir/foo.txt")];
1177 1179 let matcher = FileMatcher::new(files).unwrap();
1178 1180
1179 1181 let mut set = HashSet::new();
1180 1182 set.insert(HgPathBuf::from_bytes(b"dir"));
1181 1183 assert_eq!(
1182 1184 matcher.visit_children_set(HgPath::new(b"")),
1183 1185 VisitChildrenSet::Set(set)
1184 1186 );
1185 1187
1186 1188 let mut set = HashSet::new();
1187 1189 set.insert(HgPathBuf::from_bytes(b"subdir"));
1188 1190 assert_eq!(
1189 1191 matcher.visit_children_set(HgPath::new(b"dir")),
1190 1192 VisitChildrenSet::Set(set)
1191 1193 );
1192 1194
1193 1195 let mut set = HashSet::new();
1194 1196 set.insert(HgPathBuf::from_bytes(b"foo.txt"));
1195 1197 assert_eq!(
1196 1198 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1197 1199 VisitChildrenSet::Set(set)
1198 1200 );
1199 1201
1200 1202 assert_eq!(
1201 1203 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1202 1204 VisitChildrenSet::Empty
1203 1205 );
1204 1206 assert_eq!(
1205 1207 matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
1206 1208 VisitChildrenSet::Empty
1207 1209 );
1208 1210 assert_eq!(
1209 1211 matcher.visit_children_set(HgPath::new(b"folder")),
1210 1212 VisitChildrenSet::Empty
1211 1213 );
1212 1214 }
1213 1215
1214 1216 #[test]
1215 1217 fn test_filematcher_visit_children_set_files_and_dirs() {
1216 1218 let files = vec![
1217 1219 HgPathBuf::from_bytes(b"rootfile.txt"),
1218 1220 HgPathBuf::from_bytes(b"a/file1.txt"),
1219 1221 HgPathBuf::from_bytes(b"a/b/file2.txt"),
1220 1222 // No file in a/b/c
1221 1223 HgPathBuf::from_bytes(b"a/b/c/d/file4.txt"),
1222 1224 ];
1223 1225 let matcher = FileMatcher::new(files).unwrap();
1224 1226
1225 1227 let mut set = HashSet::new();
1226 1228 set.insert(HgPathBuf::from_bytes(b"a"));
1227 1229 set.insert(HgPathBuf::from_bytes(b"rootfile.txt"));
1228 1230 assert_eq!(
1229 1231 matcher.visit_children_set(HgPath::new(b"")),
1230 1232 VisitChildrenSet::Set(set)
1231 1233 );
1232 1234
1233 1235 let mut set = HashSet::new();
1234 1236 set.insert(HgPathBuf::from_bytes(b"b"));
1235 1237 set.insert(HgPathBuf::from_bytes(b"file1.txt"));
1236 1238 assert_eq!(
1237 1239 matcher.visit_children_set(HgPath::new(b"a")),
1238 1240 VisitChildrenSet::Set(set)
1239 1241 );
1240 1242
1241 1243 let mut set = HashSet::new();
1242 1244 set.insert(HgPathBuf::from_bytes(b"c"));
1243 1245 set.insert(HgPathBuf::from_bytes(b"file2.txt"));
1244 1246 assert_eq!(
1245 1247 matcher.visit_children_set(HgPath::new(b"a/b")),
1246 1248 VisitChildrenSet::Set(set)
1247 1249 );
1248 1250
1249 1251 let mut set = HashSet::new();
1250 1252 set.insert(HgPathBuf::from_bytes(b"d"));
1251 1253 assert_eq!(
1252 1254 matcher.visit_children_set(HgPath::new(b"a/b/c")),
1253 1255 VisitChildrenSet::Set(set)
1254 1256 );
1255 1257 let mut set = HashSet::new();
1256 1258 set.insert(HgPathBuf::from_bytes(b"file4.txt"));
1257 1259 assert_eq!(
1258 1260 matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
1259 1261 VisitChildrenSet::Set(set)
1260 1262 );
1261 1263
1262 1264 assert_eq!(
1263 1265 matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
1264 1266 VisitChildrenSet::Empty
1265 1267 );
1266 1268 assert_eq!(
1267 1269 matcher.visit_children_set(HgPath::new(b"folder")),
1268 1270 VisitChildrenSet::Empty
1269 1271 );
1270 1272 }
1271 1273
1272 1274 #[test]
1273 1275 fn test_patternmatcher() {
1274 1276 // VisitdirPrefix
1275 1277 let m = PatternMatcher::new(vec![IgnorePattern::new(
1276 1278 PatternSyntax::Path,
1277 1279 b"dir/subdir",
1278 1280 Path::new(""),
1279 1281 )])
1280 1282 .unwrap();
1281 1283 assert_eq!(
1282 1284 m.visit_children_set(HgPath::new(b"")),
1283 1285 VisitChildrenSet::This
1284 1286 );
1285 1287 assert_eq!(
1286 1288 m.visit_children_set(HgPath::new(b"dir")),
1287 1289 VisitChildrenSet::This
1288 1290 );
1289 1291 assert_eq!(
1290 1292 m.visit_children_set(HgPath::new(b"dir/subdir")),
1291 1293 VisitChildrenSet::Recursive
1292 1294 );
1293 1295 // OPT: This should probably be Recursive if its parent is?
1294 1296 assert_eq!(
1295 1297 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1296 1298 VisitChildrenSet::This
1297 1299 );
1298 1300 assert_eq!(
1299 1301 m.visit_children_set(HgPath::new(b"folder")),
1300 1302 VisitChildrenSet::Empty
1301 1303 );
1302 1304
1303 1305 // VisitchildrensetPrefix
1304 1306 let m = PatternMatcher::new(vec![IgnorePattern::new(
1305 1307 PatternSyntax::Path,
1306 1308 b"dir/subdir",
1307 1309 Path::new(""),
1308 1310 )])
1309 1311 .unwrap();
1310 1312 assert_eq!(
1311 1313 m.visit_children_set(HgPath::new(b"")),
1312 1314 VisitChildrenSet::This
1313 1315 );
1314 1316 assert_eq!(
1315 1317 m.visit_children_set(HgPath::new(b"dir")),
1316 1318 VisitChildrenSet::This
1317 1319 );
1318 1320 assert_eq!(
1319 1321 m.visit_children_set(HgPath::new(b"dir/subdir")),
1320 1322 VisitChildrenSet::Recursive
1321 1323 );
1322 1324 // OPT: This should probably be Recursive if its parent is?
1323 1325 assert_eq!(
1324 1326 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1325 1327 VisitChildrenSet::This
1326 1328 );
1327 1329 assert_eq!(
1328 1330 m.visit_children_set(HgPath::new(b"folder")),
1329 1331 VisitChildrenSet::Empty
1330 1332 );
1331 1333
1332 1334 // VisitdirRootfilesin
1333 1335 let m = PatternMatcher::new(vec![IgnorePattern::new(
1334 1336 PatternSyntax::RootFilesIn,
1335 1337 b"dir/subdir",
1336 1338 Path::new(""),
1337 1339 )])
1338 1340 .unwrap();
1339 1341 assert_eq!(
1340 1342 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1341 1343 VisitChildrenSet::This
1342 1344 );
1343 1345 assert_eq!(
1344 1346 m.visit_children_set(HgPath::new(b"folder")),
1345 1347 VisitChildrenSet::Empty
1346 1348 );
1347 1349 assert_eq!(
1348 1350 m.visit_children_set(HgPath::new(b"")),
1349 1351 VisitChildrenSet::This
1350 1352 );
1351 1353 assert_eq!(
1352 1354 m.visit_children_set(HgPath::new(b"dir")),
1353 1355 VisitChildrenSet::This
1354 1356 );
1355 1357 assert_eq!(
1356 1358 m.visit_children_set(HgPath::new(b"dir/subdir")),
1357 1359 VisitChildrenSet::This
1358 1360 );
1359 1361
1360 1362 // VisitchildrensetRootfilesin
1361 1363 let m = PatternMatcher::new(vec![IgnorePattern::new(
1362 1364 PatternSyntax::RootFilesIn,
1363 1365 b"dir/subdir",
1364 1366 Path::new(""),
1365 1367 )])
1366 1368 .unwrap();
1367 1369 assert_eq!(
1368 1370 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1369 1371 VisitChildrenSet::This
1370 1372 );
1371 1373 assert_eq!(
1372 1374 m.visit_children_set(HgPath::new(b"folder")),
1373 1375 VisitChildrenSet::Empty
1374 1376 );
1375 1377 // FIXME: These should probably be {'dir'}, {'subdir'} and This,
1376 1378 // respectively
1377 1379 assert_eq!(
1378 1380 m.visit_children_set(HgPath::new(b"")),
1379 1381 VisitChildrenSet::This
1380 1382 );
1381 1383 assert_eq!(
1382 1384 m.visit_children_set(HgPath::new(b"dir")),
1383 1385 VisitChildrenSet::This
1384 1386 );
1385 1387 assert_eq!(
1386 1388 m.visit_children_set(HgPath::new(b"dir/subdir")),
1387 1389 VisitChildrenSet::This
1388 1390 );
1389 1391
1390 1392 // VisitdirGlob
1391 1393 let m = PatternMatcher::new(vec![IgnorePattern::new(
1392 1394 PatternSyntax::Glob,
1393 1395 b"dir/z*",
1394 1396 Path::new(""),
1395 1397 )])
1396 1398 .unwrap();
1397 1399 assert_eq!(
1398 1400 m.visit_children_set(HgPath::new(b"")),
1399 1401 VisitChildrenSet::This
1400 1402 );
1401 1403 assert_eq!(
1402 1404 m.visit_children_set(HgPath::new(b"dir")),
1403 1405 VisitChildrenSet::This
1404 1406 );
1405 1407 assert_eq!(
1406 1408 m.visit_children_set(HgPath::new(b"folder")),
1407 1409 VisitChildrenSet::Empty
1408 1410 );
1409 1411 // OPT: these should probably be False.
1410 1412 assert_eq!(
1411 1413 m.visit_children_set(HgPath::new(b"dir/subdir")),
1412 1414 VisitChildrenSet::This
1413 1415 );
1414 1416 assert_eq!(
1415 1417 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1416 1418 VisitChildrenSet::This
1417 1419 );
1418 1420
1419 1421 // VisitchildrensetGlob
1420 1422 let m = PatternMatcher::new(vec![IgnorePattern::new(
1421 1423 PatternSyntax::Glob,
1422 1424 b"dir/z*",
1423 1425 Path::new(""),
1424 1426 )])
1425 1427 .unwrap();
1426 1428 assert_eq!(
1427 1429 m.visit_children_set(HgPath::new(b"")),
1428 1430 VisitChildrenSet::This
1429 1431 );
1430 1432 assert_eq!(
1431 1433 m.visit_children_set(HgPath::new(b"folder")),
1432 1434 VisitChildrenSet::Empty
1433 1435 );
1434 1436 assert_eq!(
1435 1437 m.visit_children_set(HgPath::new(b"dir")),
1436 1438 VisitChildrenSet::This
1437 1439 );
1438 1440 // OPT: these should probably be Empty
1439 1441 assert_eq!(
1440 1442 m.visit_children_set(HgPath::new(b"dir/subdir")),
1441 1443 VisitChildrenSet::This
1442 1444 );
1443 1445 assert_eq!(
1444 1446 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1445 1447 VisitChildrenSet::This
1446 1448 );
1447 1449
1448 1450 // VisitdirFilepath
1449 1451 let m = PatternMatcher::new(vec![IgnorePattern::new(
1450 1452 PatternSyntax::FilePath,
1451 1453 b"dir/z",
1452 1454 Path::new(""),
1453 1455 )])
1454 1456 .unwrap();
1455 1457 assert_eq!(
1456 1458 m.visit_children_set(HgPath::new(b"")),
1457 1459 VisitChildrenSet::This
1458 1460 );
1459 1461 assert_eq!(
1460 1462 m.visit_children_set(HgPath::new(b"dir")),
1461 1463 VisitChildrenSet::This
1462 1464 );
1463 1465 assert_eq!(
1464 1466 m.visit_children_set(HgPath::new(b"folder")),
1465 1467 VisitChildrenSet::Empty
1466 1468 );
1467 1469 assert_eq!(
1468 1470 m.visit_children_set(HgPath::new(b"dir/subdir")),
1469 1471 VisitChildrenSet::Empty
1470 1472 );
1471 1473 assert_eq!(
1472 1474 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1473 1475 VisitChildrenSet::Empty
1474 1476 );
1475 1477
1476 1478 // VisitchildrensetFilepath
1477 1479 let m = PatternMatcher::new(vec![IgnorePattern::new(
1478 1480 PatternSyntax::FilePath,
1479 1481 b"dir/z",
1480 1482 Path::new(""),
1481 1483 )])
1482 1484 .unwrap();
1483 1485 assert_eq!(
1484 1486 m.visit_children_set(HgPath::new(b"")),
1485 1487 VisitChildrenSet::This
1486 1488 );
1487 1489 assert_eq!(
1488 1490 m.visit_children_set(HgPath::new(b"folder")),
1489 1491 VisitChildrenSet::Empty
1490 1492 );
1491 1493 assert_eq!(
1492 1494 m.visit_children_set(HgPath::new(b"dir")),
1493 1495 VisitChildrenSet::This
1494 1496 );
1495 1497 assert_eq!(
1496 1498 m.visit_children_set(HgPath::new(b"dir/subdir")),
1497 1499 VisitChildrenSet::Empty
1498 1500 );
1499 1501 assert_eq!(
1500 1502 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1501 1503 VisitChildrenSet::Empty
1502 1504 );
1503 1505 }
1504 1506
1505 1507 #[test]
1506 1508 fn test_includematcher() {
1507 1509 // VisitchildrensetPrefix
1508 1510 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1509 1511 PatternSyntax::RelPath,
1510 1512 b"dir/subdir",
1511 1513 Path::new(""),
1512 1514 )])
1513 1515 .unwrap();
1514 1516
1515 1517 let mut set = HashSet::new();
1516 1518 set.insert(HgPathBuf::from_bytes(b"dir"));
1517 1519 assert_eq!(
1518 1520 matcher.visit_children_set(HgPath::new(b"")),
1519 1521 VisitChildrenSet::Set(set)
1520 1522 );
1521 1523
1522 1524 let mut set = HashSet::new();
1523 1525 set.insert(HgPathBuf::from_bytes(b"subdir"));
1524 1526 assert_eq!(
1525 1527 matcher.visit_children_set(HgPath::new(b"dir")),
1526 1528 VisitChildrenSet::Set(set)
1527 1529 );
1528 1530 assert_eq!(
1529 1531 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1530 1532 VisitChildrenSet::Recursive
1531 1533 );
1532 1534 // OPT: This should probably be 'all' if its parent is?
1533 1535 assert_eq!(
1534 1536 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1535 1537 VisitChildrenSet::This
1536 1538 );
1537 1539 assert_eq!(
1538 1540 matcher.visit_children_set(HgPath::new(b"folder")),
1539 1541 VisitChildrenSet::Empty
1540 1542 );
1541 1543
1542 1544 // VisitchildrensetRootfilesin
1543 1545 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1544 1546 PatternSyntax::RootFilesIn,
1545 1547 b"dir/subdir",
1546 1548 Path::new(""),
1547 1549 )])
1548 1550 .unwrap();
1549 1551
1550 1552 let mut set = HashSet::new();
1551 1553 set.insert(HgPathBuf::from_bytes(b"dir"));
1552 1554 assert_eq!(
1553 1555 matcher.visit_children_set(HgPath::new(b"")),
1554 1556 VisitChildrenSet::Set(set)
1555 1557 );
1556 1558
1557 1559 let mut set = HashSet::new();
1558 1560 set.insert(HgPathBuf::from_bytes(b"subdir"));
1559 1561 assert_eq!(
1560 1562 matcher.visit_children_set(HgPath::new(b"dir")),
1561 1563 VisitChildrenSet::Set(set)
1562 1564 );
1563 1565
1564 1566 assert_eq!(
1565 1567 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1566 1568 VisitChildrenSet::This
1567 1569 );
1568 1570 assert_eq!(
1569 1571 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1570 1572 VisitChildrenSet::Empty
1571 1573 );
1572 1574 assert_eq!(
1573 1575 matcher.visit_children_set(HgPath::new(b"folder")),
1574 1576 VisitChildrenSet::Empty
1575 1577 );
1576 1578
1577 1579 // VisitchildrensetGlob
1578 1580 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1579 1581 PatternSyntax::Glob,
1580 1582 b"dir/z*",
1581 1583 Path::new(""),
1582 1584 )])
1583 1585 .unwrap();
1584 1586
1585 1587 let mut set = HashSet::new();
1586 1588 set.insert(HgPathBuf::from_bytes(b"dir"));
1587 1589 assert_eq!(
1588 1590 matcher.visit_children_set(HgPath::new(b"")),
1589 1591 VisitChildrenSet::Set(set)
1590 1592 );
1591 1593 assert_eq!(
1592 1594 matcher.visit_children_set(HgPath::new(b"folder")),
1593 1595 VisitChildrenSet::Empty
1594 1596 );
1595 1597 assert_eq!(
1596 1598 matcher.visit_children_set(HgPath::new(b"dir")),
1597 1599 VisitChildrenSet::This
1598 1600 );
1599 1601 // OPT: these should probably be set().
1600 1602 assert_eq!(
1601 1603 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1602 1604 VisitChildrenSet::This
1603 1605 );
1604 1606 assert_eq!(
1605 1607 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1606 1608 VisitChildrenSet::This
1607 1609 );
1608 1610
1609 1611 // VisitchildrensetFilePath
1610 1612 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1611 1613 PatternSyntax::FilePath,
1612 1614 b"dir/z",
1613 1615 Path::new(""),
1614 1616 )])
1615 1617 .unwrap();
1616 1618
1617 1619 let mut set = HashSet::new();
1618 1620 set.insert(HgPathBuf::from_bytes(b"dir"));
1619 1621 assert_eq!(
1620 1622 matcher.visit_children_set(HgPath::new(b"")),
1621 1623 VisitChildrenSet::Set(set)
1622 1624 );
1623 1625 assert_eq!(
1624 1626 matcher.visit_children_set(HgPath::new(b"folder")),
1625 1627 VisitChildrenSet::Empty
1626 1628 );
1627 1629 let mut set = HashSet::new();
1628 1630 set.insert(HgPathBuf::from_bytes(b"z"));
1629 1631 assert_eq!(
1630 1632 matcher.visit_children_set(HgPath::new(b"dir")),
1631 1633 VisitChildrenSet::Set(set)
1632 1634 );
1633 1635 // OPT: these should probably be set().
1634 1636 assert_eq!(
1635 1637 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1636 1638 VisitChildrenSet::Empty
1637 1639 );
1638 1640 assert_eq!(
1639 1641 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1640 1642 VisitChildrenSet::Empty
1641 1643 );
1642 1644
1643 1645 // Test multiple patterns
1644 1646 let matcher = IncludeMatcher::new(vec![
1645 1647 IgnorePattern::new(PatternSyntax::RelPath, b"foo", Path::new("")),
1646 1648 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
1647 1649 ])
1648 1650 .unwrap();
1649 1651
1650 1652 assert_eq!(
1651 1653 matcher.visit_children_set(HgPath::new(b"")),
1652 1654 VisitChildrenSet::This
1653 1655 );
1654 1656
1655 1657 // Test multiple patterns
1656 1658 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1657 1659 PatternSyntax::Glob,
1658 1660 b"**/*.exe",
1659 1661 Path::new(""),
1660 1662 )])
1661 1663 .unwrap();
1662 1664
1663 1665 assert_eq!(
1664 1666 matcher.visit_children_set(HgPath::new(b"")),
1665 1667 VisitChildrenSet::This
1666 1668 );
1667 1669 }
1668 1670
1669 1671 #[test]
1670 1672 fn test_unionmatcher() {
1671 1673 // Path + Rootfiles
1672 1674 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1673 1675 PatternSyntax::RelPath,
1674 1676 b"dir/subdir",
1675 1677 Path::new(""),
1676 1678 )])
1677 1679 .unwrap();
1678 1680 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1679 1681 PatternSyntax::RootFilesIn,
1680 1682 b"dir",
1681 1683 Path::new(""),
1682 1684 )])
1683 1685 .unwrap();
1684 1686 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1685 1687
1686 1688 let mut set = HashSet::new();
1687 1689 set.insert(HgPathBuf::from_bytes(b"dir"));
1688 1690 assert_eq!(
1689 1691 matcher.visit_children_set(HgPath::new(b"")),
1690 1692 VisitChildrenSet::Set(set)
1691 1693 );
1692 1694 assert_eq!(
1693 1695 matcher.visit_children_set(HgPath::new(b"dir")),
1694 1696 VisitChildrenSet::This
1695 1697 );
1696 1698 assert_eq!(
1697 1699 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1698 1700 VisitChildrenSet::Recursive
1699 1701 );
1700 1702 assert_eq!(
1701 1703 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1702 1704 VisitChildrenSet::Empty
1703 1705 );
1704 1706 assert_eq!(
1705 1707 matcher.visit_children_set(HgPath::new(b"folder")),
1706 1708 VisitChildrenSet::Empty
1707 1709 );
1708 1710 assert_eq!(
1709 1711 matcher.visit_children_set(HgPath::new(b"folder")),
1710 1712 VisitChildrenSet::Empty
1711 1713 );
1712 1714
1713 1715 // OPT: These next two could be 'all' instead of 'this'.
1714 1716 assert_eq!(
1715 1717 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1716 1718 VisitChildrenSet::This
1717 1719 );
1718 1720 assert_eq!(
1719 1721 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1720 1722 VisitChildrenSet::This
1721 1723 );
1722 1724
1723 1725 // Path + unrelated Path
1724 1726 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1725 1727 PatternSyntax::RelPath,
1726 1728 b"dir/subdir",
1727 1729 Path::new(""),
1728 1730 )])
1729 1731 .unwrap();
1730 1732 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1731 1733 PatternSyntax::RelPath,
1732 1734 b"folder",
1733 1735 Path::new(""),
1734 1736 )])
1735 1737 .unwrap();
1736 1738 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1737 1739
1738 1740 let mut set = HashSet::new();
1739 1741 set.insert(HgPathBuf::from_bytes(b"folder"));
1740 1742 set.insert(HgPathBuf::from_bytes(b"dir"));
1741 1743 assert_eq!(
1742 1744 matcher.visit_children_set(HgPath::new(b"")),
1743 1745 VisitChildrenSet::Set(set)
1744 1746 );
1745 1747 let mut set = HashSet::new();
1746 1748 set.insert(HgPathBuf::from_bytes(b"subdir"));
1747 1749 assert_eq!(
1748 1750 matcher.visit_children_set(HgPath::new(b"dir")),
1749 1751 VisitChildrenSet::Set(set)
1750 1752 );
1751 1753
1752 1754 assert_eq!(
1753 1755 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1754 1756 VisitChildrenSet::Recursive
1755 1757 );
1756 1758 assert_eq!(
1757 1759 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1758 1760 VisitChildrenSet::Empty
1759 1761 );
1760 1762
1761 1763 assert_eq!(
1762 1764 matcher.visit_children_set(HgPath::new(b"folder")),
1763 1765 VisitChildrenSet::Recursive
1764 1766 );
1765 1767 // OPT: These next two could be 'all' instead of 'this'.
1766 1768 assert_eq!(
1767 1769 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1768 1770 VisitChildrenSet::This
1769 1771 );
1770 1772 assert_eq!(
1771 1773 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1772 1774 VisitChildrenSet::This
1773 1775 );
1774 1776
1775 1777 // Path + subpath
1776 1778 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1777 1779 PatternSyntax::RelPath,
1778 1780 b"dir/subdir/x",
1779 1781 Path::new(""),
1780 1782 )])
1781 1783 .unwrap();
1782 1784 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1783 1785 PatternSyntax::RelPath,
1784 1786 b"dir/subdir",
1785 1787 Path::new(""),
1786 1788 )])
1787 1789 .unwrap();
1788 1790 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1789 1791
1790 1792 let mut set = HashSet::new();
1791 1793 set.insert(HgPathBuf::from_bytes(b"dir"));
1792 1794 assert_eq!(
1793 1795 matcher.visit_children_set(HgPath::new(b"")),
1794 1796 VisitChildrenSet::Set(set)
1795 1797 );
1796 1798 let mut set = HashSet::new();
1797 1799 set.insert(HgPathBuf::from_bytes(b"subdir"));
1798 1800 assert_eq!(
1799 1801 matcher.visit_children_set(HgPath::new(b"dir")),
1800 1802 VisitChildrenSet::Set(set)
1801 1803 );
1802 1804
1803 1805 assert_eq!(
1804 1806 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1805 1807 VisitChildrenSet::Recursive
1806 1808 );
1807 1809 assert_eq!(
1808 1810 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1809 1811 VisitChildrenSet::Empty
1810 1812 );
1811 1813
1812 1814 assert_eq!(
1813 1815 matcher.visit_children_set(HgPath::new(b"folder")),
1814 1816 VisitChildrenSet::Empty
1815 1817 );
1816 1818 assert_eq!(
1817 1819 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1818 1820 VisitChildrenSet::Recursive
1819 1821 );
1820 1822 // OPT: this should probably be 'all' not 'this'.
1821 1823 assert_eq!(
1822 1824 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1823 1825 VisitChildrenSet::This
1824 1826 );
1825 1827 }
1826 1828
1827 1829 #[test]
1828 1830 fn test_intersectionmatcher() {
1829 1831 // Include path + Include rootfiles
1830 1832 let m1 = Box::new(
1831 1833 IncludeMatcher::new(vec![IgnorePattern::new(
1832 1834 PatternSyntax::RelPath,
1833 1835 b"dir/subdir",
1834 1836 Path::new(""),
1835 1837 )])
1836 1838 .unwrap(),
1837 1839 );
1838 1840 let m2 = Box::new(
1839 1841 IncludeMatcher::new(vec![IgnorePattern::new(
1840 1842 PatternSyntax::RootFilesIn,
1841 1843 b"dir",
1842 1844 Path::new(""),
1843 1845 )])
1844 1846 .unwrap(),
1845 1847 );
1846 1848 let matcher = IntersectionMatcher::new(m1, m2);
1847 1849
1848 1850 let mut set = HashSet::new();
1849 1851 set.insert(HgPathBuf::from_bytes(b"dir"));
1850 1852 assert_eq!(
1851 1853 matcher.visit_children_set(HgPath::new(b"")),
1852 1854 VisitChildrenSet::Set(set)
1853 1855 );
1854 1856 assert_eq!(
1855 1857 matcher.visit_children_set(HgPath::new(b"dir")),
1856 1858 VisitChildrenSet::This
1857 1859 );
1858 1860 assert_eq!(
1859 1861 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1860 1862 VisitChildrenSet::Empty
1861 1863 );
1862 1864 assert_eq!(
1863 1865 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1864 1866 VisitChildrenSet::Empty
1865 1867 );
1866 1868 assert_eq!(
1867 1869 matcher.visit_children_set(HgPath::new(b"folder")),
1868 1870 VisitChildrenSet::Empty
1869 1871 );
1870 1872 assert_eq!(
1871 1873 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1872 1874 VisitChildrenSet::Empty
1873 1875 );
1874 1876 assert_eq!(
1875 1877 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1876 1878 VisitChildrenSet::Empty
1877 1879 );
1878 1880
1879 1881 // Non intersecting paths
1880 1882 let m1 = Box::new(
1881 1883 IncludeMatcher::new(vec![IgnorePattern::new(
1882 1884 PatternSyntax::RelPath,
1883 1885 b"dir/subdir",
1884 1886 Path::new(""),
1885 1887 )])
1886 1888 .unwrap(),
1887 1889 );
1888 1890 let m2 = Box::new(
1889 1891 IncludeMatcher::new(vec![IgnorePattern::new(
1890 1892 PatternSyntax::RelPath,
1891 1893 b"folder",
1892 1894 Path::new(""),
1893 1895 )])
1894 1896 .unwrap(),
1895 1897 );
1896 1898 let matcher = IntersectionMatcher::new(m1, m2);
1897 1899
1898 1900 assert_eq!(
1899 1901 matcher.visit_children_set(HgPath::new(b"")),
1900 1902 VisitChildrenSet::Empty
1901 1903 );
1902 1904 assert_eq!(
1903 1905 matcher.visit_children_set(HgPath::new(b"dir")),
1904 1906 VisitChildrenSet::Empty
1905 1907 );
1906 1908 assert_eq!(
1907 1909 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1908 1910 VisitChildrenSet::Empty
1909 1911 );
1910 1912 assert_eq!(
1911 1913 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1912 1914 VisitChildrenSet::Empty
1913 1915 );
1914 1916 assert_eq!(
1915 1917 matcher.visit_children_set(HgPath::new(b"folder")),
1916 1918 VisitChildrenSet::Empty
1917 1919 );
1918 1920 assert_eq!(
1919 1921 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1920 1922 VisitChildrenSet::Empty
1921 1923 );
1922 1924 assert_eq!(
1923 1925 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1924 1926 VisitChildrenSet::Empty
1925 1927 );
1926 1928
1927 1929 // Nested paths
1928 1930 let m1 = Box::new(
1929 1931 IncludeMatcher::new(vec![IgnorePattern::new(
1930 1932 PatternSyntax::RelPath,
1931 1933 b"dir/subdir/x",
1932 1934 Path::new(""),
1933 1935 )])
1934 1936 .unwrap(),
1935 1937 );
1936 1938 let m2 = Box::new(
1937 1939 IncludeMatcher::new(vec![IgnorePattern::new(
1938 1940 PatternSyntax::RelPath,
1939 1941 b"dir/subdir",
1940 1942 Path::new(""),
1941 1943 )])
1942 1944 .unwrap(),
1943 1945 );
1944 1946 let matcher = IntersectionMatcher::new(m1, m2);
1945 1947
1946 1948 let mut set = HashSet::new();
1947 1949 set.insert(HgPathBuf::from_bytes(b"dir"));
1948 1950 assert_eq!(
1949 1951 matcher.visit_children_set(HgPath::new(b"")),
1950 1952 VisitChildrenSet::Set(set)
1951 1953 );
1952 1954
1953 1955 let mut set = HashSet::new();
1954 1956 set.insert(HgPathBuf::from_bytes(b"subdir"));
1955 1957 assert_eq!(
1956 1958 matcher.visit_children_set(HgPath::new(b"dir")),
1957 1959 VisitChildrenSet::Set(set)
1958 1960 );
1959 1961 let mut set = HashSet::new();
1960 1962 set.insert(HgPathBuf::from_bytes(b"x"));
1961 1963 assert_eq!(
1962 1964 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1963 1965 VisitChildrenSet::Set(set)
1964 1966 );
1965 1967 assert_eq!(
1966 1968 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1967 1969 VisitChildrenSet::Empty
1968 1970 );
1969 1971 assert_eq!(
1970 1972 matcher.visit_children_set(HgPath::new(b"folder")),
1971 1973 VisitChildrenSet::Empty
1972 1974 );
1973 1975 assert_eq!(
1974 1976 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1975 1977 VisitChildrenSet::Empty
1976 1978 );
1977 1979 // OPT: this should probably be 'all' not 'this'.
1978 1980 assert_eq!(
1979 1981 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1980 1982 VisitChildrenSet::This
1981 1983 );
1982 1984
1983 1985 // Diverging paths
1984 1986 let m1 = Box::new(
1985 1987 IncludeMatcher::new(vec![IgnorePattern::new(
1986 1988 PatternSyntax::RelPath,
1987 1989 b"dir/subdir/x",
1988 1990 Path::new(""),
1989 1991 )])
1990 1992 .unwrap(),
1991 1993 );
1992 1994 let m2 = Box::new(
1993 1995 IncludeMatcher::new(vec![IgnorePattern::new(
1994 1996 PatternSyntax::RelPath,
1995 1997 b"dir/subdir/z",
1996 1998 Path::new(""),
1997 1999 )])
1998 2000 .unwrap(),
1999 2001 );
2000 2002 let matcher = IntersectionMatcher::new(m1, m2);
2001 2003
2002 2004 // OPT: these next two could probably be Empty as well.
2003 2005 let mut set = HashSet::new();
2004 2006 set.insert(HgPathBuf::from_bytes(b"dir"));
2005 2007 assert_eq!(
2006 2008 matcher.visit_children_set(HgPath::new(b"")),
2007 2009 VisitChildrenSet::Set(set)
2008 2010 );
2009 2011 // OPT: these next two could probably be Empty as well.
2010 2012 let mut set = HashSet::new();
2011 2013 set.insert(HgPathBuf::from_bytes(b"subdir"));
2012 2014 assert_eq!(
2013 2015 matcher.visit_children_set(HgPath::new(b"dir")),
2014 2016 VisitChildrenSet::Set(set)
2015 2017 );
2016 2018 assert_eq!(
2017 2019 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
2018 2020 VisitChildrenSet::Empty
2019 2021 );
2020 2022 assert_eq!(
2021 2023 matcher.visit_children_set(HgPath::new(b"dir/foo")),
2022 2024 VisitChildrenSet::Empty
2023 2025 );
2024 2026 assert_eq!(
2025 2027 matcher.visit_children_set(HgPath::new(b"folder")),
2026 2028 VisitChildrenSet::Empty
2027 2029 );
2028 2030 assert_eq!(
2029 2031 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
2030 2032 VisitChildrenSet::Empty
2031 2033 );
2032 2034 assert_eq!(
2033 2035 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
2034 2036 VisitChildrenSet::Empty
2035 2037 );
2036 2038 }
2037 2039
2038 2040 #[test]
2039 2041 fn test_differencematcher() {
2040 2042 // Two alwaysmatchers should function like a nevermatcher
2041 2043 let m1 = AlwaysMatcher;
2042 2044 let m2 = AlwaysMatcher;
2043 2045 let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2));
2044 2046
2045 2047 for case in &[
2046 2048 &b""[..],
2047 2049 b"dir",
2048 2050 b"dir/subdir",
2049 2051 b"dir/subdir/z",
2050 2052 b"dir/foo",
2051 2053 b"dir/subdir/x",
2052 2054 b"folder",
2053 2055 ] {
2054 2056 assert_eq!(
2055 2057 matcher.visit_children_set(HgPath::new(case)),
2056 2058 VisitChildrenSet::Empty
2057 2059 );
2058 2060 }
2059 2061
2060 2062 // One always and one never should behave the same as an always
2061 2063 let m1 = AlwaysMatcher;
2062 2064 let m2 = NeverMatcher;
2063 2065 let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2));
2064 2066
2065 2067 for case in &[
2066 2068 &b""[..],
2067 2069 b"dir",
2068 2070 b"dir/subdir",
2069 2071 b"dir/subdir/z",
2070 2072 b"dir/foo",
2071 2073 b"dir/subdir/x",
2072 2074 b"folder",
2073 2075 ] {
2074 2076 assert_eq!(
2075 2077 matcher.visit_children_set(HgPath::new(case)),
2076 2078 VisitChildrenSet::Recursive
2077 2079 );
2078 2080 }
2079 2081
2080 2082 // Two include matchers
2081 2083 let m1 = Box::new(
2082 2084 IncludeMatcher::new(vec![IgnorePattern::new(
2083 2085 PatternSyntax::RelPath,
2084 2086 b"dir/subdir",
2085 2087 Path::new("/repo"),
2086 2088 )])
2087 2089 .unwrap(),
2088 2090 );
2089 2091 let m2 = Box::new(
2090 2092 IncludeMatcher::new(vec![IgnorePattern::new(
2091 2093 PatternSyntax::RootFilesIn,
2092 2094 b"dir",
2093 2095 Path::new("/repo"),
2094 2096 )])
2095 2097 .unwrap(),
2096 2098 );
2097 2099
2098 2100 let matcher = DifferenceMatcher::new(m1, m2);
2099 2101
2100 2102 let mut set = HashSet::new();
2101 2103 set.insert(HgPathBuf::from_bytes(b"dir"));
2102 2104 assert_eq!(
2103 2105 matcher.visit_children_set(HgPath::new(b"")),
2104 2106 VisitChildrenSet::Set(set)
2105 2107 );
2106 2108
2107 2109 let mut set = HashSet::new();
2108 2110 set.insert(HgPathBuf::from_bytes(b"subdir"));
2109 2111 assert_eq!(
2110 2112 matcher.visit_children_set(HgPath::new(b"dir")),
2111 2113 VisitChildrenSet::Set(set)
2112 2114 );
2113 2115 assert_eq!(
2114 2116 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
2115 2117 VisitChildrenSet::Recursive
2116 2118 );
2117 2119 assert_eq!(
2118 2120 matcher.visit_children_set(HgPath::new(b"dir/foo")),
2119 2121 VisitChildrenSet::Empty
2120 2122 );
2121 2123 assert_eq!(
2122 2124 matcher.visit_children_set(HgPath::new(b"folder")),
2123 2125 VisitChildrenSet::Empty
2124 2126 );
2125 2127 assert_eq!(
2126 2128 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
2127 2129 VisitChildrenSet::This
2128 2130 );
2129 2131 assert_eq!(
2130 2132 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
2131 2133 VisitChildrenSet::This
2132 2134 );
2133 2135 }
2134 2136
2135 2137 mod invariants {
2136 2138 pub mod visit_children_set {
2137 2139
2138 2140 use crate::{
2139 2141 matchers::{tests::Tree, Matcher, VisitChildrenSet},
2140 2142 utils::hg_path::HgPath,
2141 2143 };
2142 2144
2143 2145 #[allow(dead_code)]
2144 2146 #[derive(Debug)]
2145 2147 struct Error<'a, M> {
2146 2148 matcher: &'a M,
2147 2149 path: &'a HgPath,
2148 2150 matching: &'a Tree,
2149 2151 visit_children_set: &'a VisitChildrenSet,
2150 2152 }
2151 2153
2152 2154 fn holds(
2153 2155 matching: &Tree,
2154 2156 not_matching: &Tree,
2155 2157 vcs: &VisitChildrenSet,
2156 2158 ) -> bool {
2157 2159 match vcs {
2158 2160 VisitChildrenSet::Empty => matching.is_empty(),
2159 2161 VisitChildrenSet::This => {
2160 2162 // `This` does not come with any obligations.
2161 2163 true
2162 2164 }
2163 2165 VisitChildrenSet::Recursive => {
2164 2166 // `Recursive` requires that *everything* in the
2165 2167 // subtree matches. This
2166 2168 // requirement is relied on for example in
2167 2169 // DifferenceMatcher implementation.
2168 2170 not_matching.is_empty()
2169 2171 }
2170 2172 VisitChildrenSet::Set(allowed_children) => {
2171 2173 // `allowed_children` does not distinguish between
2172 2174 // files and directories: if it's not included, it
2173 2175 // must not be matched.
2174 2176 for k in matching.dirs.keys() {
2175 2177 if !(allowed_children.contains(k)) {
2176 2178 return false;
2177 2179 }
2178 2180 }
2179 2181 for k in matching.files.iter() {
2180 2182 if !(allowed_children.contains(k)) {
2181 2183 return false;
2182 2184 }
2183 2185 }
2184 2186 true
2185 2187 }
2186 2188 }
2187 2189 }
2188 2190
2189 2191 pub fn check<M: Matcher + std::fmt::Debug>(
2190 2192 matcher: &M,
2191 2193 path: &HgPath,
2192 2194 matching: &Tree,
2193 2195 not_matching: &Tree,
2194 2196 visit_children_set: &VisitChildrenSet,
2195 2197 ) {
2196 2198 if !holds(matching, not_matching, visit_children_set) {
2197 2199 panic!(
2198 2200 "{:#?}",
2199 2201 Error {
2200 2202 matcher,
2201 2203 path,
2202 2204 visit_children_set,
2203 2205 matching
2204 2206 }
2205 2207 )
2206 2208 }
2207 2209 }
2208 2210 }
2209 2211 }
2210 2212
2211 2213 #[derive(Debug, Clone)]
2212 2214 pub struct Tree {
2213 2215 files: BTreeSet<HgPathBuf>,
2214 2216 dirs: BTreeMap<HgPathBuf, Tree>,
2215 2217 }
2216 2218
2217 2219 impl Tree {
2218 2220 fn len(&self) -> usize {
2219 2221 let mut n = 0;
2220 2222 n += self.files.len();
2221 2223 for d in self.dirs.values() {
2222 2224 n += d.len();
2223 2225 }
2224 2226 n
2225 2227 }
2226 2228
2227 2229 fn is_empty(&self) -> bool {
2228 2230 self.files.is_empty() && self.dirs.is_empty()
2229 2231 }
2230 2232
2231 2233 fn make(
2232 2234 files: BTreeSet<HgPathBuf>,
2233 2235 dirs: BTreeMap<HgPathBuf, Tree>,
2234 2236 ) -> Self {
2235 2237 Self {
2236 2238 files,
2237 2239 dirs: dirs
2238 2240 .into_iter()
2239 2241 .filter(|(_k, v)| (!(v.is_empty())))
2240 2242 .collect(),
2241 2243 }
2242 2244 }
2243 2245
2244 2246 fn filter_and_check<M: Matcher + Debug>(
2245 2247 &self,
2246 2248 m: &M,
2247 2249 path: &HgPath,
2248 2250 ) -> (Self, Self) {
2249 2251 let (files1, files2): (BTreeSet<HgPathBuf>, BTreeSet<HgPathBuf>) =
2250 2252 self.files
2251 2253 .iter()
2252 2254 .map(|v| v.to_owned())
2253 2255 .partition(|v| m.matches(&path.join(v)));
2254 2256 let (dirs1, dirs2): (
2255 2257 BTreeMap<HgPathBuf, Tree>,
2256 2258 BTreeMap<HgPathBuf, Tree>,
2257 2259 ) = self
2258 2260 .dirs
2259 2261 .iter()
2260 2262 .map(|(k, v)| {
2261 2263 let path = path.join(k);
2262 2264 let (t1, t2) = v.filter_and_check(m, &path);
2263 2265 ((k.clone(), t1), (k.clone(), t2))
2264 2266 })
2265 2267 .unzip();
2266 2268 let matching = Self::make(files1, dirs1);
2267 2269 let not_matching = Self::make(files2, dirs2);
2268 2270 let vcs = m.visit_children_set(path);
2269 2271 invariants::visit_children_set::check(
2270 2272 m,
2271 2273 path,
2272 2274 &matching,
2273 2275 &not_matching,
2274 2276 &vcs,
2275 2277 );
2276 2278 (matching, not_matching)
2277 2279 }
2278 2280
2279 2281 fn check_matcher<M: Matcher + Debug>(
2280 2282 &self,
2281 2283 m: &M,
2282 2284 expect_count: usize,
2283 2285 ) {
2284 2286 let res = self.filter_and_check(m, &HgPathBuf::new());
2285 2287 if expect_count != res.0.len() {
2286 2288 eprintln!(
2287 2289 "warning: expected {} matches, got {} for {:#?}",
2288 2290 expect_count,
2289 2291 res.0.len(),
2290 2292 m
2291 2293 );
2292 2294 }
2293 2295 }
2294 2296 }
2295 2297
2296 2298 fn mkdir(children: &[(&[u8], &Tree)]) -> Tree {
2297 2299 let p = HgPathBuf::from_bytes;
2298 2300 let names = [
2299 2301 p(b"a"),
2300 2302 p(b"b.txt"),
2301 2303 p(b"file.txt"),
2302 2304 p(b"c.c"),
2303 2305 p(b"c.h"),
2304 2306 p(b"dir1"),
2305 2307 p(b"dir2"),
2306 2308 p(b"subdir"),
2307 2309 ];
2308 2310 let files: BTreeSet<HgPathBuf> = BTreeSet::from(names);
2309 2311 let dirs = children
2310 2312 .iter()
2311 2313 .map(|(name, t)| (p(name), (*t).clone()))
2312 2314 .collect();
2313 2315 Tree { files, dirs }
2314 2316 }
2315 2317
2316 2318 fn make_example_tree() -> Tree {
2317 2319 let leaf = mkdir(&[]);
2318 2320 let abc = mkdir(&[(b"d", &leaf)]);
2319 2321 let ab = mkdir(&[(b"c", &abc)]);
2320 2322 let a = mkdir(&[(b"b", &ab)]);
2321 2323 let dir = mkdir(&[(b"subdir", &leaf), (b"subdir.c", &leaf)]);
2322 2324 mkdir(&[(b"dir", &dir), (b"dir1", &dir), (b"dir2", &dir), (b"a", &a)])
2323 2325 }
2324 2326
2325 2327 #[test]
2326 2328 fn test_pattern_matcher_visit_children_set() {
2327 2329 let tree = make_example_tree();
2328 2330 let pattern_dir1_glob_c =
2329 2331 PatternMatcher::new(vec![IgnorePattern::new(
2330 2332 PatternSyntax::Glob,
2331 2333 b"dir1/*.c",
2332 2334 Path::new(""),
2333 2335 )])
2334 2336 .unwrap();
2335 2337 let pattern_dir1 = || {
2336 2338 PatternMatcher::new(vec![IgnorePattern::new(
2337 2339 PatternSyntax::Path,
2338 2340 b"dir1",
2339 2341 Path::new(""),
2340 2342 )])
2341 2343 .unwrap()
2342 2344 };
2343 2345 let pattern_dir1_a = PatternMatcher::new(vec![IgnorePattern::new(
2344 2346 PatternSyntax::Glob,
2345 2347 b"dir1/a",
2346 2348 Path::new(""),
2347 2349 )])
2348 2350 .unwrap();
2349 2351 let pattern_relglob_c = || {
2350 2352 PatternMatcher::new(vec![IgnorePattern::new(
2351 2353 PatternSyntax::RelGlob,
2352 2354 b"*.c",
2353 2355 Path::new(""),
2354 2356 )])
2355 2357 .unwrap()
2356 2358 };
2357 2359 let files = vec![HgPathBuf::from_bytes(b"dir/subdir/b.txt")];
2358 2360 let file_dir_subdir_b = FileMatcher::new(files).unwrap();
2359 2361
2360 2362 let files = vec![
2361 2363 HgPathBuf::from_bytes(b"file.txt"),
2362 2364 HgPathBuf::from_bytes(b"a/file.txt"),
2363 2365 HgPathBuf::from_bytes(b"a/b/file.txt"),
2364 2366 // No file in a/b/c
2365 2367 HgPathBuf::from_bytes(b"a/b/c/d/file.txt"),
2366 2368 ];
2367 2369 let file_abcdfile = FileMatcher::new(files).unwrap();
2368 2370 let rootfilesin_dir = PatternMatcher::new(vec![IgnorePattern::new(
2369 2371 PatternSyntax::RootFilesIn,
2370 2372 b"dir",
2371 2373 Path::new(""),
2372 2374 )])
2373 2375 .unwrap();
2374 2376
2375 2377 let pattern_filepath_dir_subdir =
2376 2378 PatternMatcher::new(vec![IgnorePattern::new(
2377 2379 PatternSyntax::FilePath,
2378 2380 b"dir/subdir",
2379 2381 Path::new(""),
2380 2382 )])
2381 2383 .unwrap();
2382 2384
2383 2385 let include_dir_subdir =
2384 2386 IncludeMatcher::new(vec![IgnorePattern::new(
2385 2387 PatternSyntax::RelPath,
2386 2388 b"dir/subdir",
2387 2389 Path::new(""),
2388 2390 )])
2389 2391 .unwrap();
2390 2392
2391 2393 let more_includematchers = [
2392 2394 IncludeMatcher::new(vec![IgnorePattern::new(
2393 2395 PatternSyntax::Glob,
2394 2396 b"dir/s*",
2395 2397 Path::new(""),
2396 2398 )])
2397 2399 .unwrap(),
2398 2400 // Test multiple patterns
2399 2401 IncludeMatcher::new(vec![
2400 2402 IgnorePattern::new(
2401 2403 PatternSyntax::RelPath,
2402 2404 b"dir",
2403 2405 Path::new(""),
2404 2406 ),
2405 2407 IgnorePattern::new(PatternSyntax::Glob, b"s*", Path::new("")),
2406 2408 ])
2407 2409 .unwrap(),
2408 2410 // Test multiple patterns
2409 2411 IncludeMatcher::new(vec![IgnorePattern::new(
2410 2412 PatternSyntax::Glob,
2411 2413 b"**/*.c",
2412 2414 Path::new(""),
2413 2415 )])
2414 2416 .unwrap(),
2415 2417 ];
2416 2418
2417 2419 tree.check_matcher(&pattern_dir1(), 25);
2418 2420 tree.check_matcher(&pattern_dir1_a, 1);
2419 2421 tree.check_matcher(&pattern_dir1_glob_c, 2);
2420 2422 tree.check_matcher(&pattern_relglob_c(), 14);
2421 2423 tree.check_matcher(&AlwaysMatcher, 112);
2422 2424 tree.check_matcher(&NeverMatcher, 0);
2423 2425 tree.check_matcher(
2424 2426 &IntersectionMatcher::new(
2425 2427 Box::new(pattern_relglob_c()),
2426 2428 Box::new(pattern_dir1()),
2427 2429 ),
2428 2430 3,
2429 2431 );
2430 2432 tree.check_matcher(
2431 2433 &UnionMatcher::new(vec![
2432 2434 Box::new(pattern_relglob_c()),
2433 2435 Box::new(pattern_dir1()),
2434 2436 ]),
2435 2437 36,
2436 2438 );
2437 2439 tree.check_matcher(
2438 2440 &DifferenceMatcher::new(
2439 2441 Box::new(pattern_relglob_c()),
2440 2442 Box::new(pattern_dir1()),
2441 2443 ),
2442 2444 11,
2443 2445 );
2444 2446 tree.check_matcher(&file_dir_subdir_b, 1);
2445 2447 tree.check_matcher(&file_abcdfile, 4);
2446 2448 tree.check_matcher(&rootfilesin_dir, 8);
2447 2449 tree.check_matcher(&pattern_filepath_dir_subdir, 1);
2448 2450 tree.check_matcher(&include_dir_subdir, 9);
2449 2451 tree.check_matcher(&more_includematchers[0], 17);
2450 2452 tree.check_matcher(&more_includematchers[1], 25);
2451 2453 tree.check_matcher(&more_includematchers[2], 35);
2452 2454 }
2453 2455 }
General Comments 0
You need to be logged in to leave comments. Login now