##// END OF EJS Templates
match: rename RootFiles to RootFilesIn for more consistency
Arseniy Alekseyev -
r52461:2a89d2f6 stable
parent child Browse files
Show More
@@ -1,874 +1,874
1 1 // filepatterns.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Handling of Mercurial-specific patterns.
9 9
10 10 use crate::{
11 11 utils::{
12 12 files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
13 13 hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
14 14 SliceExt,
15 15 },
16 16 FastHashMap, PatternError,
17 17 };
18 18 use lazy_static::lazy_static;
19 19 use regex::bytes::{NoExpand, Regex};
20 20 use std::ops::Deref;
21 21 use std::path::{Path, PathBuf};
22 22 use std::vec::Vec;
23 23
24 24 lazy_static! {
25 25 static ref RE_ESCAPE: Vec<Vec<u8>> = {
26 26 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
27 27 let to_escape = b"()[]{}?*+-|^$\\.&~#\t\n\r\x0b\x0c";
28 28 for byte in to_escape {
29 29 v[*byte as usize].insert(0, b'\\');
30 30 }
31 31 v
32 32 };
33 33 }
34 34
35 35 /// These are matched in order
36 36 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
37 37 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
38 38
39 39 #[derive(Debug, Clone, PartialEq, Eq)]
40 40 pub enum PatternSyntax {
41 41 /// A regular expression
42 42 Regexp,
43 43 /// Glob that matches at the front of the path
44 44 RootGlob,
45 45 /// Glob that matches at any suffix of the path (still anchored at
46 46 /// slashes)
47 47 Glob,
48 48 /// a path relative to repository root, which is matched recursively
49 49 Path,
50 50 /// a single exact path relative to repository root
51 51 FilePath,
52 52 /// A path relative to cwd
53 53 RelPath,
54 54 /// an unrooted glob (*.rs matches Rust files in all dirs)
55 55 RelGlob,
56 56 /// A regexp that needn't match the start of a name
57 57 RelRegexp,
58 58 /// A path relative to repository root, which is matched non-recursively
59 59 /// (will not match subdirectories)
60 RootFiles,
60 RootFilesIn,
61 61 /// A file of patterns to read and include
62 62 Include,
63 63 /// A file of patterns to match against files under the same directory
64 64 SubInclude,
65 65 /// SubInclude with the result of parsing the included file
66 66 ///
67 67 /// Note: there is no ExpandedInclude because that expansion can be done
68 68 /// in place by replacing the Include pattern by the included patterns.
69 69 /// SubInclude requires more handling.
70 70 ///
71 71 /// Note: `Box` is used to minimize size impact on other enum variants
72 72 ExpandedSubInclude(Box<SubInclude>),
73 73 }
74 74
75 75 /// Transforms a glob pattern into a regex
76 76 pub fn glob_to_re(pat: &[u8]) -> Vec<u8> {
77 77 let mut input = pat;
78 78 let mut res: Vec<u8> = vec![];
79 79 let mut group_depth = 0;
80 80
81 81 while let Some((c, rest)) = input.split_first() {
82 82 input = rest;
83 83
84 84 match c {
85 85 b'*' => {
86 86 for (source, repl) in GLOB_REPLACEMENTS {
87 87 if let Some(rest) = input.drop_prefix(source) {
88 88 input = rest;
89 89 res.extend(*repl);
90 90 break;
91 91 }
92 92 }
93 93 }
94 94 b'?' => res.extend(b"."),
95 95 b'[' => {
96 96 match input.iter().skip(1).position(|b| *b == b']') {
97 97 None => res.extend(b"\\["),
98 98 Some(end) => {
99 99 // Account for the one we skipped
100 100 let end = end + 1;
101 101
102 102 res.extend(b"[");
103 103
104 104 for (i, b) in input[..end].iter().enumerate() {
105 105 if *b == b'!' && i == 0 {
106 106 res.extend(b"^")
107 107 } else if *b == b'^' && i == 0 {
108 108 res.extend(b"\\^")
109 109 } else if *b == b'\\' {
110 110 res.extend(b"\\\\")
111 111 } else {
112 112 res.push(*b)
113 113 }
114 114 }
115 115 res.extend(b"]");
116 116 input = &input[end + 1..];
117 117 }
118 118 }
119 119 }
120 120 b'{' => {
121 121 group_depth += 1;
122 122 res.extend(b"(?:")
123 123 }
124 124 b'}' if group_depth > 0 => {
125 125 group_depth -= 1;
126 126 res.extend(b")");
127 127 }
128 128 b',' if group_depth > 0 => res.extend(b"|"),
129 129 b'\\' => {
130 130 let c = {
131 131 if let Some((c, rest)) = input.split_first() {
132 132 input = rest;
133 133 c
134 134 } else {
135 135 c
136 136 }
137 137 };
138 138 res.extend(&RE_ESCAPE[*c as usize])
139 139 }
140 140 _ => res.extend(&RE_ESCAPE[*c as usize]),
141 141 }
142 142 }
143 143 res
144 144 }
145 145
146 146 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
147 147 pattern
148 148 .iter()
149 149 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
150 150 .collect()
151 151 }
152 152
153 153 pub fn parse_pattern_syntax(
154 154 kind: &[u8],
155 155 ) -> Result<PatternSyntax, PatternError> {
156 156 match kind {
157 157 b"re:" => Ok(PatternSyntax::Regexp),
158 158 b"path:" => Ok(PatternSyntax::Path),
159 159 b"filepath:" => Ok(PatternSyntax::FilePath),
160 160 b"relpath:" => Ok(PatternSyntax::RelPath),
161 b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
161 b"rootfilesin:" => Ok(PatternSyntax::RootFilesIn),
162 162 b"relglob:" => Ok(PatternSyntax::RelGlob),
163 163 b"relre:" => Ok(PatternSyntax::RelRegexp),
164 164 b"glob:" => Ok(PatternSyntax::Glob),
165 165 b"rootglob:" => Ok(PatternSyntax::RootGlob),
166 166 b"include:" => Ok(PatternSyntax::Include),
167 167 b"subinclude:" => Ok(PatternSyntax::SubInclude),
168 168 _ => Err(PatternError::UnsupportedSyntax(
169 169 String::from_utf8_lossy(kind).to_string(),
170 170 )),
171 171 }
172 172 }
173 173
174 174 lazy_static! {
175 175 static ref FLAG_RE: Regex = Regex::new(r"^\(\?[aiLmsux]+\)").unwrap();
176 176 }
177 177
178 178 /// Builds the regex that corresponds to the given pattern.
179 179 /// If within a `syntax: regexp` context, returns the pattern,
180 180 /// otherwise, returns the corresponding regex.
181 181 fn _build_single_regex(entry: &IgnorePattern, glob_suffix: &[u8]) -> Vec<u8> {
182 182 let IgnorePattern {
183 183 syntax, pattern, ..
184 184 } = entry;
185 185 if pattern.is_empty() {
186 186 return vec![];
187 187 }
188 188 match syntax {
189 189 PatternSyntax::Regexp => pattern.to_owned(),
190 190 PatternSyntax::RelRegexp => {
191 191 // The `regex` crate accepts `**` while `re2` and Python's `re`
192 192 // do not. Checking for `*` correctly triggers the same error all
193 193 // engines.
194 194 if pattern[0] == b'^'
195 195 || pattern[0] == b'*'
196 196 || pattern.starts_with(b".*")
197 197 {
198 198 return pattern.to_owned();
199 199 }
200 200 match FLAG_RE.find(pattern) {
201 201 Some(mat) => {
202 202 let s = mat.start();
203 203 let e = mat.end();
204 204 [
205 205 &b"(?"[..],
206 206 &pattern[s + 2..e - 1],
207 207 &b":"[..],
208 208 if pattern[e] == b'^'
209 209 || pattern[e] == b'*'
210 210 || pattern[e..].starts_with(b".*")
211 211 {
212 212 &b""[..]
213 213 } else {
214 214 &b".*"[..]
215 215 },
216 216 &pattern[e..],
217 217 &b")"[..],
218 218 ]
219 219 .concat()
220 220 }
221 221 None => [&b".*"[..], pattern].concat(),
222 222 }
223 223 }
224 224 PatternSyntax::Path | PatternSyntax::RelPath => {
225 225 if pattern == b"." {
226 226 return vec![];
227 227 }
228 228 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
229 229 }
230 PatternSyntax::RootFiles => {
230 PatternSyntax::RootFilesIn => {
231 231 let mut res = if pattern == b"." {
232 232 vec![]
233 233 } else {
234 234 // Pattern is a directory name.
235 235 [escape_pattern(pattern).as_slice(), b"/"].concat()
236 236 };
237 237
238 238 // Anything after the pattern must be a non-directory.
239 239 res.extend(b"[^/]+$");
240 240 res
241 241 }
242 242 PatternSyntax::RelGlob => {
243 243 let glob_re = glob_to_re(pattern);
244 244 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
245 245 [b".*", rest, glob_suffix].concat()
246 246 } else {
247 247 [b"(?:.*/)?", glob_re.as_slice(), glob_suffix].concat()
248 248 }
249 249 }
250 250 PatternSyntax::Glob | PatternSyntax::RootGlob => {
251 251 [glob_to_re(pattern).as_slice(), glob_suffix].concat()
252 252 }
253 253 PatternSyntax::Include
254 254 | PatternSyntax::SubInclude
255 255 | PatternSyntax::ExpandedSubInclude(_)
256 256 | PatternSyntax::FilePath => unreachable!(),
257 257 }
258 258 }
259 259
260 260 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
261 261 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
262 262
263 263 /// TODO support other platforms
264 264 #[cfg(unix)]
265 265 pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
266 266 if bytes.is_empty() {
267 267 return b".".to_vec();
268 268 }
269 269 let sep = b'/';
270 270
271 271 let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
272 272 if initial_slashes > 2 {
273 273 // POSIX allows one or two initial slashes, but treats three or more
274 274 // as single slash.
275 275 initial_slashes = 1;
276 276 }
277 277 let components = bytes
278 278 .split(|b| *b == sep)
279 279 .filter(|c| !(c.is_empty() || c == b"."))
280 280 .fold(vec![], |mut acc, component| {
281 281 if component != b".."
282 282 || (initial_slashes == 0 && acc.is_empty())
283 283 || (!acc.is_empty() && acc[acc.len() - 1] == b"..")
284 284 {
285 285 acc.push(component)
286 286 } else if !acc.is_empty() {
287 287 acc.pop();
288 288 }
289 289 acc
290 290 });
291 291 let mut new_bytes = components.join(&sep);
292 292
293 293 if initial_slashes > 0 {
294 294 let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
295 295 buf.extend(new_bytes);
296 296 new_bytes = buf;
297 297 }
298 298 if new_bytes.is_empty() {
299 299 b".".to_vec()
300 300 } else {
301 301 new_bytes
302 302 }
303 303 }
304 304
305 305 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
306 306 /// that don't need to be transformed into a regex.
307 307 pub fn build_single_regex(
308 308 entry: &IgnorePattern,
309 309 glob_suffix: &[u8],
310 310 ) -> Result<Option<Vec<u8>>, PatternError> {
311 311 let IgnorePattern {
312 312 pattern, syntax, ..
313 313 } = entry;
314 314 let pattern = match syntax {
315 315 PatternSyntax::RootGlob
316 316 | PatternSyntax::Path
317 317 | PatternSyntax::RelGlob
318 318 | PatternSyntax::RelPath
319 | PatternSyntax::RootFiles => normalize_path_bytes(pattern),
319 | PatternSyntax::RootFilesIn => normalize_path_bytes(pattern),
320 320 PatternSyntax::Include | PatternSyntax::SubInclude => {
321 321 return Err(PatternError::NonRegexPattern(entry.clone()))
322 322 }
323 323 _ => pattern.to_owned(),
324 324 };
325 325 let is_simple_rootglob = *syntax == PatternSyntax::RootGlob
326 326 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b));
327 327 if is_simple_rootglob || syntax == &PatternSyntax::FilePath {
328 328 Ok(None)
329 329 } else {
330 330 let mut entry = entry.clone();
331 331 entry.pattern = pattern;
332 332 Ok(Some(_build_single_regex(&entry, glob_suffix)))
333 333 }
334 334 }
335 335
336 336 lazy_static! {
337 337 static ref SYNTAXES: FastHashMap<&'static [u8], PatternSyntax> = {
338 338 let mut m = FastHashMap::default();
339 339
340 340 m.insert(b"re:".as_ref(), PatternSyntax::Regexp);
341 341 m.insert(b"regexp:".as_ref(), PatternSyntax::Regexp);
342 342 m.insert(b"path:".as_ref(), PatternSyntax::Path);
343 343 m.insert(b"filepath:".as_ref(), PatternSyntax::FilePath);
344 344 m.insert(b"relpath:".as_ref(), PatternSyntax::RelPath);
345 m.insert(b"rootfilesin:".as_ref(), PatternSyntax::RootFiles);
345 m.insert(b"rootfilesin:".as_ref(), PatternSyntax::RootFilesIn);
346 346 m.insert(b"relglob:".as_ref(), PatternSyntax::RelGlob);
347 347 m.insert(b"relre:".as_ref(), PatternSyntax::RelRegexp);
348 348 m.insert(b"glob:".as_ref(), PatternSyntax::Glob);
349 349 m.insert(b"rootglob:".as_ref(), PatternSyntax::RootGlob);
350 350 m.insert(b"include:".as_ref(), PatternSyntax::Include);
351 351 m.insert(b"subinclude:".as_ref(), PatternSyntax::SubInclude);
352 352
353 353 m
354 354 };
355 355 }
356 356
357 357 #[derive(Debug)]
358 358 pub enum PatternFileWarning {
359 359 /// (file path, syntax bytes)
360 360 InvalidSyntax(PathBuf, Vec<u8>),
361 361 /// File path
362 362 NoSuchFile(PathBuf),
363 363 }
364 364
365 365 pub fn parse_one_pattern(
366 366 pattern: &[u8],
367 367 source: &Path,
368 368 default: PatternSyntax,
369 369 normalize: bool,
370 370 ) -> IgnorePattern {
371 371 let mut pattern_bytes: &[u8] = pattern;
372 372 let mut syntax = default;
373 373
374 374 for (s, val) in SYNTAXES.iter() {
375 375 if let Some(rest) = pattern_bytes.drop_prefix(s) {
376 376 syntax = val.clone();
377 377 pattern_bytes = rest;
378 378 break;
379 379 }
380 380 }
381 381
382 382 let pattern = match syntax {
383 383 PatternSyntax::RootGlob
384 384 | PatternSyntax::Path
385 385 | PatternSyntax::Glob
386 386 | PatternSyntax::RelGlob
387 387 | PatternSyntax::RelPath
388 | PatternSyntax::RootFiles
388 | PatternSyntax::RootFilesIn
389 389 if normalize =>
390 390 {
391 391 normalize_path_bytes(pattern_bytes)
392 392 }
393 393 _ => pattern_bytes.to_vec(),
394 394 };
395 395
396 396 IgnorePattern {
397 397 syntax,
398 398 pattern,
399 399 source: source.to_owned(),
400 400 }
401 401 }
402 402
403 403 pub fn parse_pattern_file_contents(
404 404 lines: &[u8],
405 405 file_path: &Path,
406 406 default_syntax_override: Option<PatternSyntax>,
407 407 warn: bool,
408 408 relativize: bool,
409 409 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
410 410 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
411 411
412 412 #[allow(clippy::trivial_regex)]
413 413 let comment_escape_regex = Regex::new(r"\\#").unwrap();
414 414 let mut inputs: Vec<IgnorePattern> = vec![];
415 415 let mut warnings: Vec<PatternFileWarning> = vec![];
416 416
417 417 let mut current_syntax =
418 418 default_syntax_override.unwrap_or(PatternSyntax::RelRegexp);
419 419
420 420 for mut line in lines.split(|c| *c == b'\n') {
421 421 let line_buf;
422 422 if line.contains(&b'#') {
423 423 if let Some(cap) = comment_regex.captures(line) {
424 424 line = &line[..cap.get(1).unwrap().end()]
425 425 }
426 426 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
427 427 line = &line_buf;
428 428 }
429 429
430 430 let line = line.trim_end();
431 431
432 432 if line.is_empty() {
433 433 continue;
434 434 }
435 435
436 436 if let Some(syntax) = line.drop_prefix(b"syntax:") {
437 437 let syntax = syntax.trim();
438 438
439 439 if let Some(parsed) =
440 440 SYNTAXES.get([syntax, &b":"[..]].concat().as_slice())
441 441 {
442 442 current_syntax = parsed.clone();
443 443 } else if warn {
444 444 warnings.push(PatternFileWarning::InvalidSyntax(
445 445 file_path.to_owned(),
446 446 syntax.to_owned(),
447 447 ));
448 448 }
449 449 } else {
450 450 let pattern = parse_one_pattern(
451 451 line,
452 452 file_path,
453 453 current_syntax.clone(),
454 454 false,
455 455 );
456 456 inputs.push(if relativize {
457 457 pattern.to_relative()
458 458 } else {
459 459 pattern
460 460 })
461 461 }
462 462 }
463 463 Ok((inputs, warnings))
464 464 }
465 465
466 466 pub fn parse_pattern_args(
467 467 patterns: Vec<Vec<u8>>,
468 468 cwd: &Path,
469 469 root: &Path,
470 470 ) -> Result<Vec<IgnorePattern>, HgPathError> {
471 471 let mut ignore_patterns: Vec<IgnorePattern> = Vec::new();
472 472 for pattern in patterns {
473 473 let pattern = parse_one_pattern(
474 474 &pattern,
475 475 Path::new("<args>"),
476 476 PatternSyntax::RelPath,
477 477 true,
478 478 );
479 479 match pattern.syntax {
480 480 PatternSyntax::RelGlob | PatternSyntax::RelPath => {
481 481 let name = get_path_from_bytes(&pattern.pattern);
482 482 let canon = canonical_path(root, cwd, name)?;
483 483 ignore_patterns.push(IgnorePattern {
484 484 syntax: pattern.syntax,
485 485 pattern: get_bytes_from_path(canon),
486 486 source: pattern.source,
487 487 })
488 488 }
489 489 _ => ignore_patterns.push(pattern.to_owned()),
490 490 };
491 491 }
492 492 Ok(ignore_patterns)
493 493 }
494 494
495 495 pub fn read_pattern_file(
496 496 file_path: &Path,
497 497 warn: bool,
498 498 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
499 499 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
500 500 match std::fs::read(file_path) {
501 501 Ok(contents) => {
502 502 inspect_pattern_bytes(file_path, &contents);
503 503 parse_pattern_file_contents(&contents, file_path, None, warn, true)
504 504 }
505 505 Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok((
506 506 vec![],
507 507 vec![PatternFileWarning::NoSuchFile(file_path.to_owned())],
508 508 )),
509 509 Err(e) => Err(e.into()),
510 510 }
511 511 }
512 512
513 513 /// Represents an entry in an "ignore" file.
514 514 #[derive(Debug, Eq, PartialEq, Clone)]
515 515 pub struct IgnorePattern {
516 516 pub syntax: PatternSyntax,
517 517 pub pattern: Vec<u8>,
518 518 pub source: PathBuf,
519 519 }
520 520
521 521 impl IgnorePattern {
522 522 pub fn new(syntax: PatternSyntax, pattern: &[u8], source: &Path) -> Self {
523 523 Self {
524 524 syntax,
525 525 pattern: pattern.to_owned(),
526 526 source: source.to_owned(),
527 527 }
528 528 }
529 529
530 530 pub fn to_relative(self) -> Self {
531 531 let Self {
532 532 syntax,
533 533 pattern,
534 534 source,
535 535 } = self;
536 536 Self {
537 537 syntax: match syntax {
538 538 PatternSyntax::Regexp => PatternSyntax::RelRegexp,
539 539 PatternSyntax::Glob => PatternSyntax::RelGlob,
540 540 x => x,
541 541 },
542 542 pattern,
543 543 source,
544 544 }
545 545 }
546 546 }
547 547
548 548 pub type PatternResult<T> = Result<T, PatternError>;
549 549
550 550 /// Wrapper for `read_pattern_file` that also recursively expands `include:`
551 551 /// and `subinclude:` patterns.
552 552 ///
553 553 /// The former are expanded in place, while `PatternSyntax::ExpandedSubInclude`
554 554 /// is used for the latter to form a tree of patterns.
555 555 pub fn get_patterns_from_file(
556 556 pattern_file: &Path,
557 557 root_dir: &Path,
558 558 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
559 559 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
560 560 let (patterns, mut warnings) =
561 561 read_pattern_file(pattern_file, true, inspect_pattern_bytes)?;
562 562 let patterns = patterns
563 563 .into_iter()
564 564 .flat_map(|entry| -> PatternResult<_> {
565 565 Ok(match &entry.syntax {
566 566 PatternSyntax::Include => {
567 567 let inner_include =
568 568 root_dir.join(get_path_from_bytes(&entry.pattern));
569 569 let (inner_pats, inner_warnings) = get_patterns_from_file(
570 570 &inner_include,
571 571 root_dir,
572 572 inspect_pattern_bytes,
573 573 )?;
574 574 warnings.extend(inner_warnings);
575 575 inner_pats
576 576 }
577 577 PatternSyntax::SubInclude => {
578 578 let mut sub_include = SubInclude::new(
579 579 root_dir,
580 580 &entry.pattern,
581 581 &entry.source,
582 582 )?;
583 583 let (inner_patterns, inner_warnings) =
584 584 get_patterns_from_file(
585 585 &sub_include.path,
586 586 &sub_include.root,
587 587 inspect_pattern_bytes,
588 588 )?;
589 589 sub_include.included_patterns = inner_patterns;
590 590 warnings.extend(inner_warnings);
591 591 vec![IgnorePattern {
592 592 syntax: PatternSyntax::ExpandedSubInclude(Box::new(
593 593 sub_include,
594 594 )),
595 595 ..entry
596 596 }]
597 597 }
598 598 _ => vec![entry],
599 599 })
600 600 })
601 601 .flatten()
602 602 .collect();
603 603
604 604 Ok((patterns, warnings))
605 605 }
606 606
607 607 /// Holds all the information needed to handle a `subinclude:` pattern.
608 608 #[derive(Debug, PartialEq, Eq, Clone)]
609 609 pub struct SubInclude {
610 610 /// Will be used for repository (hg) paths that start with this prefix.
611 611 /// It is relative to the current working directory, so comparing against
612 612 /// repository paths is painless.
613 613 pub prefix: HgPathBuf,
614 614 /// The file itself, containing the patterns
615 615 pub path: PathBuf,
616 616 /// Folder in the filesystem where this it applies
617 617 pub root: PathBuf,
618 618
619 619 pub included_patterns: Vec<IgnorePattern>,
620 620 }
621 621
622 622 impl SubInclude {
623 623 pub fn new(
624 624 root_dir: &Path,
625 625 pattern: &[u8],
626 626 source: &Path,
627 627 ) -> Result<SubInclude, HgPathError> {
628 628 let normalized_source =
629 629 normalize_path_bytes(&get_bytes_from_path(source));
630 630
631 631 let source_root = get_path_from_bytes(&normalized_source);
632 632 let source_root = source_root.parent().unwrap_or(source_root);
633 633
634 634 let path = source_root.join(get_path_from_bytes(pattern));
635 635 let new_root = path.parent().unwrap_or_else(|| path.deref());
636 636
637 637 let prefix = canonical_path(root_dir, root_dir, new_root)?;
638 638
639 639 Ok(Self {
640 640 prefix: path_to_hg_path_buf(prefix).map(|mut p| {
641 641 if !p.is_empty() {
642 642 p.push_byte(b'/');
643 643 }
644 644 p
645 645 })?,
646 646 path: path.to_owned(),
647 647 root: new_root.to_owned(),
648 648 included_patterns: Vec::new(),
649 649 })
650 650 }
651 651 }
652 652
653 653 /// Separate and pre-process subincludes from other patterns for the "ignore"
654 654 /// phase.
655 655 pub fn filter_subincludes(
656 656 ignore_patterns: Vec<IgnorePattern>,
657 657 ) -> Result<(Vec<SubInclude>, Vec<IgnorePattern>), HgPathError> {
658 658 let mut subincludes = vec![];
659 659 let mut others = vec![];
660 660
661 661 for pattern in ignore_patterns {
662 662 if let PatternSyntax::ExpandedSubInclude(sub_include) = pattern.syntax
663 663 {
664 664 subincludes.push(*sub_include);
665 665 } else {
666 666 others.push(pattern)
667 667 }
668 668 }
669 669 Ok((subincludes, others))
670 670 }
671 671
672 672 #[cfg(test)]
673 673 mod tests {
674 674 use super::*;
675 675 use pretty_assertions::assert_eq;
676 676
677 677 #[test]
678 678 fn escape_pattern_test() {
679 679 let untouched =
680 680 br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
681 681 assert_eq!(escape_pattern(untouched), untouched.to_vec());
682 682 // All escape codes
683 683 assert_eq!(
684 684 escape_pattern(br"()[]{}?*+-|^$\\.&~#\t\n\r\v\f"),
685 685 br"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\\t\\n\\r\\v\\f".to_vec()
686 686 );
687 687 }
688 688
689 689 #[test]
690 690 fn glob_test() {
691 691 assert_eq!(glob_to_re(br"?"), br".");
692 692 assert_eq!(glob_to_re(br"*"), br"[^/]*");
693 693 assert_eq!(glob_to_re(br"**"), br".*");
694 694 assert_eq!(glob_to_re(br"**/a"), br"(?:.*/)?a");
695 695 assert_eq!(glob_to_re(br"a/**/b"), br"a/(?:.*/)?b");
696 696 assert_eq!(glob_to_re(br"[a*?!^][^b][!c]"), br"[a*?!^][\^b][^c]");
697 697 assert_eq!(glob_to_re(br"{a,b}"), br"(?:a|b)");
698 698 assert_eq!(glob_to_re(br".\*\?"), br"\.\*\?");
699 699 }
700 700
701 701 #[test]
702 702 fn test_parse_pattern_file_contents() {
703 703 let lines = b"syntax: glob\n*.elc";
704 704
705 705 assert_eq!(
706 706 parse_pattern_file_contents(
707 707 lines,
708 708 Path::new("file_path"),
709 709 None,
710 710 false,
711 711 true,
712 712 )
713 713 .unwrap()
714 714 .0,
715 715 vec![IgnorePattern::new(
716 716 PatternSyntax::RelGlob,
717 717 b"*.elc",
718 718 Path::new("file_path")
719 719 )],
720 720 );
721 721
722 722 let lines = b"syntax: include\nsyntax: glob";
723 723
724 724 assert_eq!(
725 725 parse_pattern_file_contents(
726 726 lines,
727 727 Path::new("file_path"),
728 728 None,
729 729 false,
730 730 true,
731 731 )
732 732 .unwrap()
733 733 .0,
734 734 vec![]
735 735 );
736 736 let lines = b"glob:**.o";
737 737 assert_eq!(
738 738 parse_pattern_file_contents(
739 739 lines,
740 740 Path::new("file_path"),
741 741 None,
742 742 false,
743 743 true,
744 744 )
745 745 .unwrap()
746 746 .0,
747 747 vec![IgnorePattern::new(
748 748 PatternSyntax::RelGlob,
749 749 b"**.o",
750 750 Path::new("file_path")
751 751 )]
752 752 );
753 753 }
754 754
755 755 #[test]
756 756 fn test_build_single_regex() {
757 757 assert_eq!(
758 758 build_single_regex(
759 759 &IgnorePattern::new(
760 760 PatternSyntax::RelGlob,
761 761 b"rust/target/",
762 762 Path::new("")
763 763 ),
764 764 b"(?:/|$)"
765 765 )
766 766 .unwrap(),
767 767 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
768 768 );
769 769 assert_eq!(
770 770 build_single_regex(
771 771 &IgnorePattern::new(
772 772 PatternSyntax::Regexp,
773 773 br"rust/target/\d+",
774 774 Path::new("")
775 775 ),
776 776 b"(?:/|$)"
777 777 )
778 778 .unwrap(),
779 779 Some(br"rust/target/\d+".to_vec()),
780 780 );
781 781 }
782 782
783 783 #[test]
784 784 fn test_build_single_regex_shortcut() {
785 785 assert_eq!(
786 786 build_single_regex(
787 787 &IgnorePattern::new(
788 788 PatternSyntax::RootGlob,
789 789 b"",
790 790 Path::new("")
791 791 ),
792 792 b"(?:/|$)"
793 793 )
794 794 .unwrap(),
795 795 None,
796 796 );
797 797 assert_eq!(
798 798 build_single_regex(
799 799 &IgnorePattern::new(
800 800 PatternSyntax::RootGlob,
801 801 b"whatever",
802 802 Path::new("")
803 803 ),
804 804 b"(?:/|$)"
805 805 )
806 806 .unwrap(),
807 807 None,
808 808 );
809 809 assert_eq!(
810 810 build_single_regex(
811 811 &IgnorePattern::new(
812 812 PatternSyntax::RootGlob,
813 813 b"*.o",
814 814 Path::new("")
815 815 ),
816 816 b"(?:/|$)"
817 817 )
818 818 .unwrap(),
819 819 Some(br"[^/]*\.o(?:/|$)".to_vec()),
820 820 );
821 821 }
822 822
823 823 #[test]
824 824 fn test_build_single_relregex() {
825 825 assert_eq!(
826 826 build_single_regex(
827 827 &IgnorePattern::new(
828 828 PatternSyntax::RelRegexp,
829 829 b"^ba{2}r",
830 830 Path::new("")
831 831 ),
832 832 b"(?:/|$)"
833 833 )
834 834 .unwrap(),
835 835 Some(b"^ba{2}r".to_vec()),
836 836 );
837 837 assert_eq!(
838 838 build_single_regex(
839 839 &IgnorePattern::new(
840 840 PatternSyntax::RelRegexp,
841 841 b"ba{2}r",
842 842 Path::new("")
843 843 ),
844 844 b"(?:/|$)"
845 845 )
846 846 .unwrap(),
847 847 Some(b".*ba{2}r".to_vec()),
848 848 );
849 849 assert_eq!(
850 850 build_single_regex(
851 851 &IgnorePattern::new(
852 852 PatternSyntax::RelRegexp,
853 853 b"(?ia)ba{2}r",
854 854 Path::new("")
855 855 ),
856 856 b"(?:/|$)"
857 857 )
858 858 .unwrap(),
859 859 Some(b"(?ia:.*ba{2}r)".to_vec()),
860 860 );
861 861 assert_eq!(
862 862 build_single_regex(
863 863 &IgnorePattern::new(
864 864 PatternSyntax::RelRegexp,
865 865 b"(?ia)^ba{2}r",
866 866 Path::new("")
867 867 ),
868 868 b"(?:/|$)"
869 869 )
870 870 .unwrap(),
871 871 Some(b"(?ia:^ba{2}r)".to_vec()),
872 872 );
873 873 }
874 874 }
@@ -1,2434 +1,2434
1 1 // matchers.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Structs and types for matching files and directories.
9 9
10 10 use format_bytes::format_bytes;
11 11 use once_cell::sync::OnceCell;
12 12
13 13 use crate::{
14 14 dirstate::dirs_multiset::DirsChildrenMultiset,
15 15 filepatterns::{
16 16 build_single_regex, filter_subincludes, get_patterns_from_file,
17 17 PatternFileWarning, PatternResult,
18 18 },
19 19 utils::{
20 20 files::{dir_ancestors, find_dirs},
21 21 hg_path::{HgPath, HgPathBuf, HgPathError},
22 22 Escaped,
23 23 },
24 24 DirsMultiset, FastHashMap, IgnorePattern, PatternError, PatternSyntax,
25 25 };
26 26
27 27 use crate::dirstate::status::IgnoreFnType;
28 28 use crate::filepatterns::normalize_path_bytes;
29 29 use std::collections::HashSet;
30 30 use std::fmt::{Display, Error, Formatter};
31 31 use std::path::{Path, PathBuf};
32 32 use std::{borrow::ToOwned, collections::BTreeSet};
33 33
34 34 #[derive(Debug, PartialEq)]
35 35 pub enum VisitChildrenSet {
36 36 /// Don't visit anything
37 37 Empty,
38 38 /// Visit this directory and probably its children
39 39 This,
40 40 /// Only visit the children (both files and directories) if they
41 41 /// are mentioned in this set. (empty set corresponds to [Empty])
42 42 /// TODO Should we implement a `NonEmptyHashSet`?
43 43 Set(HashSet<HgPathBuf>),
44 44 /// Visit this directory and all subdirectories
45 45 /// (you can stop asking about the children set)
46 46 Recursive,
47 47 }
48 48
49 49 pub trait Matcher: core::fmt::Debug {
50 50 /// Explicitly listed files
51 51 fn file_set(&self) -> Option<&HashSet<HgPathBuf>>;
52 52 /// Returns whether `filename` is in `file_set`
53 53 fn exact_match(&self, filename: &HgPath) -> bool;
54 54 /// Returns whether `filename` is matched by this matcher
55 55 fn matches(&self, filename: &HgPath) -> bool;
56 56 /// Decides whether a directory should be visited based on whether it
57 57 /// has potential matches in it or one of its subdirectories, and
58 58 /// potentially lists which subdirectories of that directory should be
59 59 /// visited. This is based on the match's primary, included, and excluded
60 60 /// patterns.
61 61 ///
62 62 /// # Example
63 63 ///
64 64 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
65 65 /// return the following values (assuming the implementation of
66 66 /// visit_children_set is capable of recognizing this; some implementations
67 67 /// are not).
68 68 ///
69 69 /// ```text
70 70 /// ```ignore
71 71 /// '' -> {'foo', 'qux'}
72 72 /// 'baz' -> set()
73 73 /// 'foo' -> {'bar'}
74 74 /// // Ideally this would be `Recursive`, but since the prefix nature of
75 75 /// // matchers is applied to the entire matcher, we have to downgrade this
76 76 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
77 77 /// // `RootFilesIn'-kind matcher being mixed in.
78 78 /// 'foo/bar' -> 'this'
79 79 /// 'qux' -> 'this'
80 80 /// ```
81 81 /// # Important
82 82 ///
83 83 /// Most matchers do not know if they're representing files or
84 84 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
85 85 /// file or a directory, so `visit_children_set('dir')` for most matchers
86 86 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
87 87 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
88 88 /// it may return `VisitChildrenSet::This`.
89 89 /// Do not rely on the return being a `HashSet` indicating that there are
90 90 /// no files in this dir to investigate (or equivalently that if there are
91 91 /// files to investigate in 'dir' that it will always return
92 92 /// `VisitChildrenSet::This`).
93 93 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet;
94 94 /// Matcher will match everything and `files_set()` will be empty:
95 95 /// optimization might be possible.
96 96 fn matches_everything(&self) -> bool;
97 97 /// Matcher will match exactly the files in `files_set()`: optimization
98 98 /// might be possible.
99 99 fn is_exact(&self) -> bool;
100 100 }
101 101
102 102 /// Matches everything.
103 103 ///```
104 104 /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
105 105 ///
106 106 /// let matcher = AlwaysMatcher;
107 107 ///
108 108 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
109 109 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
110 110 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
111 111 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
112 112 /// ```
113 113 #[derive(Debug)]
114 114 pub struct AlwaysMatcher;
115 115
116 116 impl Matcher for AlwaysMatcher {
117 117 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
118 118 None
119 119 }
120 120 fn exact_match(&self, _filename: &HgPath) -> bool {
121 121 false
122 122 }
123 123 fn matches(&self, _filename: &HgPath) -> bool {
124 124 true
125 125 }
126 126 fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
127 127 VisitChildrenSet::Recursive
128 128 }
129 129 fn matches_everything(&self) -> bool {
130 130 true
131 131 }
132 132 fn is_exact(&self) -> bool {
133 133 false
134 134 }
135 135 }
136 136
137 137 /// Matches nothing.
138 138 #[derive(Debug)]
139 139 pub struct NeverMatcher;
140 140
141 141 impl Matcher for NeverMatcher {
142 142 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
143 143 None
144 144 }
145 145 fn exact_match(&self, _filename: &HgPath) -> bool {
146 146 false
147 147 }
148 148 fn matches(&self, _filename: &HgPath) -> bool {
149 149 false
150 150 }
151 151 fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
152 152 VisitChildrenSet::Empty
153 153 }
154 154 fn matches_everything(&self) -> bool {
155 155 false
156 156 }
157 157 fn is_exact(&self) -> bool {
158 158 true
159 159 }
160 160 }
161 161
162 162 /// Matches the input files exactly. They are interpreted as paths, not
163 163 /// patterns.
164 164 ///
165 165 ///```
166 166 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::{HgPath, HgPathBuf} };
167 167 ///
168 168 /// let files = vec![HgPathBuf::from_bytes(b"a.txt"), HgPathBuf::from_bytes(br"re:.*\.c$")];
169 169 /// let matcher = FileMatcher::new(files).unwrap();
170 170 ///
171 171 /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
172 172 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
173 173 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
174 174 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
175 175 /// ```
176 176 #[derive(Debug)]
177 177 pub struct FileMatcher {
178 178 files: HashSet<HgPathBuf>,
179 179 dirs: DirsMultiset,
180 180 sorted_visitchildrenset_candidates: OnceCell<BTreeSet<HgPathBuf>>,
181 181 }
182 182
183 183 impl FileMatcher {
184 184 pub fn new(files: Vec<HgPathBuf>) -> Result<Self, HgPathError> {
185 185 let dirs = DirsMultiset::from_manifest(&files)?;
186 186 Ok(Self {
187 187 files: HashSet::from_iter(files),
188 188 dirs,
189 189 sorted_visitchildrenset_candidates: OnceCell::new(),
190 190 })
191 191 }
192 192 fn inner_matches(&self, filename: &HgPath) -> bool {
193 193 self.files.contains(filename.as_ref())
194 194 }
195 195 }
196 196
197 197 impl Matcher for FileMatcher {
198 198 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
199 199 Some(&self.files)
200 200 }
201 201 fn exact_match(&self, filename: &HgPath) -> bool {
202 202 self.inner_matches(filename)
203 203 }
204 204 fn matches(&self, filename: &HgPath) -> bool {
205 205 self.inner_matches(filename)
206 206 }
207 207 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
208 208 if self.files.is_empty() || !self.dirs.contains(directory) {
209 209 return VisitChildrenSet::Empty;
210 210 }
211 211
212 212 let compute_candidates = || -> BTreeSet<HgPathBuf> {
213 213 let mut candidates: BTreeSet<HgPathBuf> =
214 214 self.dirs.iter().cloned().collect();
215 215 candidates.extend(self.files.iter().cloned());
216 216 candidates.remove(HgPath::new(b""));
217 217 candidates
218 218 };
219 219 let candidates =
220 220 if directory.as_ref().is_empty() {
221 221 compute_candidates()
222 222 } else {
223 223 let sorted_candidates = self
224 224 .sorted_visitchildrenset_candidates
225 225 .get_or_init(compute_candidates);
226 226 let directory_bytes = directory.as_ref().as_bytes();
227 227 let start: HgPathBuf =
228 228 format_bytes!(b"{}/", directory_bytes).into();
229 229 let start_len = start.len();
230 230 // `0` sorts after `/`
231 231 let end = format_bytes!(b"{}0", directory_bytes).into();
232 232 BTreeSet::from_iter(sorted_candidates.range(start..end).map(
233 233 |c| HgPathBuf::from_bytes(&c.as_bytes()[start_len..]),
234 234 ))
235 235 };
236 236
237 237 // `self.dirs` includes all of the directories, recursively, so if
238 238 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
239 239 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
240 240 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
241 241 // subdir will be in there without a slash.
242 242 VisitChildrenSet::Set(
243 243 candidates
244 244 .into_iter()
245 245 .filter_map(|c| {
246 246 if c.bytes().all(|b| *b != b'/') {
247 247 Some(c)
248 248 } else {
249 249 None
250 250 }
251 251 })
252 252 .collect(),
253 253 )
254 254 }
255 255 fn matches_everything(&self) -> bool {
256 256 false
257 257 }
258 258 fn is_exact(&self) -> bool {
259 259 true
260 260 }
261 261 }
262 262
263 263 /// Matches a set of (kind, pat, source) against a 'root' directory.
264 264 /// (Currently the 'root' directory is effectively always empty)
265 265 /// ```
266 266 /// use hg::{
267 267 /// matchers::{PatternMatcher, Matcher},
268 268 /// IgnorePattern,
269 269 /// PatternSyntax,
270 270 /// utils::hg_path::{HgPath, HgPathBuf}
271 271 /// };
272 272 /// use std::collections::HashSet;
273 273 /// use std::path::Path;
274 274 /// ///
275 275 /// let ignore_patterns : Vec<IgnorePattern> =
276 276 /// vec![IgnorePattern::new(PatternSyntax::Regexp, br".*\.c$", Path::new("")),
277 277 /// IgnorePattern::new(PatternSyntax::Path, b"foo/a", Path::new("")),
278 278 /// IgnorePattern::new(PatternSyntax::RelPath, b"b", Path::new("")),
279 279 /// IgnorePattern::new(PatternSyntax::Glob, b"*.h", Path::new("")),
280 280 /// ];
281 281 /// let matcher = PatternMatcher::new(ignore_patterns).unwrap();
282 282 /// ///
283 283 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true); // matches re:.*\.c$
284 284 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
285 285 /// assert_eq!(matcher.matches(HgPath::new(b"foo/a")), true); // matches path:foo/a
286 286 /// assert_eq!(matcher.matches(HgPath::new(b"a")), false); // does not match path:b, since 'root' is 'foo'
287 287 /// assert_eq!(matcher.matches(HgPath::new(b"b")), true); // matches relpath:b, since 'root' is 'foo'
288 288 /// assert_eq!(matcher.matches(HgPath::new(b"lib.h")), true); // matches glob:*.h
289 289 /// assert_eq!(matcher.file_set().unwrap(),
290 290 /// &HashSet::from([HgPathBuf::from_bytes(b""), HgPathBuf::from_bytes(b"foo/a"),
291 291 /// HgPathBuf::from_bytes(b""), HgPathBuf::from_bytes(b"b")]));
292 292 /// assert_eq!(matcher.exact_match(HgPath::new(b"foo/a")), true);
293 293 /// assert_eq!(matcher.exact_match(HgPath::new(b"b")), true);
294 294 /// assert_eq!(matcher.exact_match(HgPath::new(b"lib.h")), false); // exact matches are for (rel)path kinds
295 295 /// ```
296 296 pub struct PatternMatcher<'a> {
297 297 patterns: Vec<u8>,
298 298 match_fn: IgnoreFnType<'a>,
299 299 /// Whether all the patterns match a prefix (i.e. recursively)
300 300 prefix: bool,
301 301 files: HashSet<HgPathBuf>,
302 302 dirs: DirsMultiset,
303 303 }
304 304
305 305 impl core::fmt::Debug for PatternMatcher<'_> {
306 306 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
307 307 f.debug_struct("PatternMatcher")
308 308 .field("patterns", &String::from_utf8_lossy(&self.patterns))
309 309 .field("prefix", &self.prefix)
310 310 .field("files", &self.files)
311 311 .field("dirs", &self.dirs)
312 312 .finish()
313 313 }
314 314 }
315 315
316 316 impl<'a> PatternMatcher<'a> {
317 317 pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
318 318 let (files, _) = roots_and_dirs(&ignore_patterns);
319 319 let dirs = DirsMultiset::from_manifest(&files)?;
320 320 let files: HashSet<HgPathBuf> = HashSet::from_iter(files);
321 321
322 322 let prefix = ignore_patterns.iter().all(|k| {
323 323 matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath)
324 324 });
325 325 let (patterns, match_fn) = build_match(ignore_patterns, b"$")?;
326 326
327 327 Ok(Self {
328 328 patterns,
329 329 match_fn,
330 330 prefix,
331 331 files,
332 332 dirs,
333 333 })
334 334 }
335 335 }
336 336
337 337 impl<'a> Matcher for PatternMatcher<'a> {
338 338 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
339 339 Some(&self.files)
340 340 }
341 341
342 342 fn exact_match(&self, filename: &HgPath) -> bool {
343 343 self.files.contains(filename)
344 344 }
345 345
346 346 fn matches(&self, filename: &HgPath) -> bool {
347 347 if self.files.contains(filename) {
348 348 return true;
349 349 }
350 350 (self.match_fn)(filename)
351 351 }
352 352
353 353 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
354 354 if self.prefix && self.files.contains(directory) {
355 355 return VisitChildrenSet::Recursive;
356 356 }
357 357 if self.dirs.contains(directory) {
358 358 return VisitChildrenSet::This;
359 359 }
360 360 if dir_ancestors(directory)
361 361 .any(|parent_dir| self.files.contains(parent_dir))
362 362 {
363 363 VisitChildrenSet::This
364 364 } else {
365 365 VisitChildrenSet::Empty
366 366 }
367 367 }
368 368
369 369 fn matches_everything(&self) -> bool {
370 370 false
371 371 }
372 372
373 373 fn is_exact(&self) -> bool {
374 374 false
375 375 }
376 376 }
377 377
378 378 /// Matches files that are included in the ignore rules.
379 379 /// ```
380 380 /// use hg::{
381 381 /// matchers::{IncludeMatcher, Matcher},
382 382 /// IgnorePattern,
383 383 /// PatternSyntax,
384 384 /// utils::hg_path::HgPath
385 385 /// };
386 386 /// use std::path::Path;
387 387 /// ///
388 388 /// let ignore_patterns =
389 389 /// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
390 390 /// let matcher = IncludeMatcher::new(ignore_patterns).unwrap();
391 391 /// ///
392 392 /// assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
393 393 /// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
394 394 /// assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
395 395 /// assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
396 396 /// ///
397 397 /// let ignore_patterns =
398 /// vec![IgnorePattern::new(PatternSyntax::RootFiles, b"dir/subdir", Path::new(""))];
398 /// vec![IgnorePattern::new(PatternSyntax::RootFilesIn, b"dir/subdir", Path::new(""))];
399 399 /// let matcher = IncludeMatcher::new(ignore_patterns).unwrap();
400 400 /// ///
401 401 /// assert!(!matcher.matches(HgPath::new(b"file")));
402 402 /// assert!(!matcher.matches(HgPath::new(b"dir/file")));
403 403 /// assert!(matcher.matches(HgPath::new(b"dir/subdir/file")));
404 404 /// assert!(!matcher.matches(HgPath::new(b"dir/subdir/subsubdir/file")));
405 405 /// ```
406 406 pub struct IncludeMatcher<'a> {
407 407 patterns: Vec<u8>,
408 408 match_fn: IgnoreFnType<'a>,
409 409 /// Whether all the patterns match a prefix (i.e. recursively)
410 410 prefix: bool,
411 411 roots: HashSet<HgPathBuf>,
412 412 dirs: HashSet<HgPathBuf>,
413 413 parents: HashSet<HgPathBuf>,
414 414 }
415 415
416 416 impl core::fmt::Debug for IncludeMatcher<'_> {
417 417 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
418 418 f.debug_struct("IncludeMatcher")
419 419 .field("patterns", &String::from_utf8_lossy(&self.patterns))
420 420 .field("prefix", &self.prefix)
421 421 .field("roots", &self.roots)
422 422 .field("dirs", &self.dirs)
423 423 .field("parents", &self.parents)
424 424 .finish()
425 425 }
426 426 }
427 427
428 428 impl<'a> Matcher for IncludeMatcher<'a> {
429 429 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
430 430 None
431 431 }
432 432
433 433 fn exact_match(&self, _filename: &HgPath) -> bool {
434 434 false
435 435 }
436 436
437 437 fn matches(&self, filename: &HgPath) -> bool {
438 438 (self.match_fn)(filename)
439 439 }
440 440
441 441 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
442 442 let dir = directory;
443 443 if self.prefix && self.roots.contains(dir) {
444 444 return VisitChildrenSet::Recursive;
445 445 }
446 446 if self.roots.contains(HgPath::new(b""))
447 447 || self.roots.contains(dir)
448 448 || self.dirs.contains(dir)
449 449 || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
450 450 {
451 451 return VisitChildrenSet::This;
452 452 }
453 453
454 454 if self.parents.contains(dir.as_ref()) {
455 455 let multiset = self.get_all_parents_children();
456 456 if let Some(children) = multiset.get(dir) {
457 457 return VisitChildrenSet::Set(
458 458 children.iter().map(HgPathBuf::from).collect(),
459 459 );
460 460 }
461 461 }
462 462 VisitChildrenSet::Empty
463 463 }
464 464
465 465 fn matches_everything(&self) -> bool {
466 466 false
467 467 }
468 468
469 469 fn is_exact(&self) -> bool {
470 470 false
471 471 }
472 472 }
473 473
474 474 /// The union of multiple matchers. Will match if any of the matchers match.
475 475 #[derive(Debug)]
476 476 pub struct UnionMatcher {
477 477 matchers: Vec<Box<dyn Matcher + Sync>>,
478 478 }
479 479
480 480 impl Matcher for UnionMatcher {
481 481 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
482 482 None
483 483 }
484 484
485 485 fn exact_match(&self, _filename: &HgPath) -> bool {
486 486 false
487 487 }
488 488
489 489 fn matches(&self, filename: &HgPath) -> bool {
490 490 self.matchers.iter().any(|m| m.matches(filename))
491 491 }
492 492
493 493 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
494 494 let mut result = HashSet::new();
495 495 let mut this = false;
496 496 for matcher in self.matchers.iter() {
497 497 let visit = matcher.visit_children_set(directory);
498 498 match visit {
499 499 VisitChildrenSet::Empty => continue,
500 500 VisitChildrenSet::This => {
501 501 this = true;
502 502 // Don't break, we might have an 'all' in here.
503 503 continue;
504 504 }
505 505 VisitChildrenSet::Set(set) => {
506 506 result.extend(set);
507 507 }
508 508 VisitChildrenSet::Recursive => {
509 509 return visit;
510 510 }
511 511 }
512 512 }
513 513 if this {
514 514 return VisitChildrenSet::This;
515 515 }
516 516 if result.is_empty() {
517 517 VisitChildrenSet::Empty
518 518 } else {
519 519 VisitChildrenSet::Set(result)
520 520 }
521 521 }
522 522
523 523 fn matches_everything(&self) -> bool {
524 524 // TODO Maybe if all are AlwaysMatcher?
525 525 false
526 526 }
527 527
528 528 fn is_exact(&self) -> bool {
529 529 false
530 530 }
531 531 }
532 532
533 533 impl UnionMatcher {
534 534 pub fn new(matchers: Vec<Box<dyn Matcher + Sync>>) -> Self {
535 535 Self { matchers }
536 536 }
537 537 }
538 538
539 539 #[derive(Debug)]
540 540 pub struct IntersectionMatcher {
541 541 m1: Box<dyn Matcher + Sync>,
542 542 m2: Box<dyn Matcher + Sync>,
543 543 files: Option<HashSet<HgPathBuf>>,
544 544 }
545 545
546 546 impl Matcher for IntersectionMatcher {
547 547 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
548 548 self.files.as_ref()
549 549 }
550 550
551 551 fn exact_match(&self, filename: &HgPath) -> bool {
552 552 self.files.as_ref().map_or(false, |f| f.contains(filename))
553 553 }
554 554
555 555 fn matches(&self, filename: &HgPath) -> bool {
556 556 self.m1.matches(filename) && self.m2.matches(filename)
557 557 }
558 558
559 559 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
560 560 let m1_set = self.m1.visit_children_set(directory);
561 561 if m1_set == VisitChildrenSet::Empty {
562 562 return VisitChildrenSet::Empty;
563 563 }
564 564 let m2_set = self.m2.visit_children_set(directory);
565 565 if m2_set == VisitChildrenSet::Empty {
566 566 return VisitChildrenSet::Empty;
567 567 }
568 568
569 569 if m1_set == VisitChildrenSet::Recursive {
570 570 return m2_set;
571 571 } else if m2_set == VisitChildrenSet::Recursive {
572 572 return m1_set;
573 573 }
574 574
575 575 match (&m1_set, &m2_set) {
576 576 (VisitChildrenSet::Recursive, _) => m2_set,
577 577 (_, VisitChildrenSet::Recursive) => m1_set,
578 578 (VisitChildrenSet::This, _) | (_, VisitChildrenSet::This) => {
579 579 VisitChildrenSet::This
580 580 }
581 581 (VisitChildrenSet::Set(m1), VisitChildrenSet::Set(m2)) => {
582 582 let set: HashSet<_> = m1.intersection(m2).cloned().collect();
583 583 if set.is_empty() {
584 584 VisitChildrenSet::Empty
585 585 } else {
586 586 VisitChildrenSet::Set(set)
587 587 }
588 588 }
589 589 _ => unreachable!(),
590 590 }
591 591 }
592 592
593 593 fn matches_everything(&self) -> bool {
594 594 self.m1.matches_everything() && self.m2.matches_everything()
595 595 }
596 596
597 597 fn is_exact(&self) -> bool {
598 598 self.m1.is_exact() || self.m2.is_exact()
599 599 }
600 600 }
601 601
602 602 impl IntersectionMatcher {
603 603 pub fn new(
604 604 mut m1: Box<dyn Matcher + Sync>,
605 605 mut m2: Box<dyn Matcher + Sync>,
606 606 ) -> Self {
607 607 let files = if m1.is_exact() || m2.is_exact() {
608 608 if !m1.is_exact() {
609 609 std::mem::swap(&mut m1, &mut m2);
610 610 }
611 611 m1.file_set().map(|m1_files| {
612 612 m1_files.iter().cloned().filter(|f| m2.matches(f)).collect()
613 613 })
614 614 } else {
615 615 // without exact input file sets, we can't do an exact
616 616 // intersection, so we must over-approximate by
617 617 // unioning instead
618 618 m1.file_set().map(|m1_files| match m2.file_set() {
619 619 Some(m2_files) => m1_files.union(m2_files).cloned().collect(),
620 620 None => m1_files.iter().cloned().collect(),
621 621 })
622 622 };
623 623 Self { m1, m2, files }
624 624 }
625 625 }
626 626
627 627 #[derive(Debug)]
628 628 pub struct DifferenceMatcher {
629 629 base: Box<dyn Matcher + Sync>,
630 630 excluded: Box<dyn Matcher + Sync>,
631 631 files: Option<HashSet<HgPathBuf>>,
632 632 }
633 633
634 634 impl Matcher for DifferenceMatcher {
635 635 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
636 636 self.files.as_ref()
637 637 }
638 638
639 639 fn exact_match(&self, filename: &HgPath) -> bool {
640 640 self.files.as_ref().map_or(false, |f| f.contains(filename))
641 641 }
642 642
643 643 fn matches(&self, filename: &HgPath) -> bool {
644 644 self.base.matches(filename) && !self.excluded.matches(filename)
645 645 }
646 646
647 647 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
648 648 let excluded_set = self.excluded.visit_children_set(directory);
649 649 if excluded_set == VisitChildrenSet::Recursive {
650 650 return VisitChildrenSet::Empty;
651 651 }
652 652 let base_set = self.base.visit_children_set(directory);
653 653 // Possible values for base: 'recursive', 'this', set(...), set()
654 654 // Possible values for excluded: 'this', set(...), set()
655 655 // If excluded has nothing under here that we care about, return base,
656 656 // even if it's 'recursive'.
657 657 if excluded_set == VisitChildrenSet::Empty {
658 658 return base_set;
659 659 }
660 660 match base_set {
661 661 VisitChildrenSet::This | VisitChildrenSet::Recursive => {
662 662 // Never return 'recursive' here if excluded_set is any kind of
663 663 // non-empty (either 'this' or set(foo)), since excluded might
664 664 // return set() for a subdirectory.
665 665 VisitChildrenSet::This
666 666 }
667 667 set => {
668 668 // Possible values for base: set(...), set()
669 669 // Possible values for excluded: 'this', set(...)
670 670 // We ignore excluded set results. They're possibly incorrect:
671 671 // base = path:dir/subdir
672 672 // excluded=rootfilesin:dir,
673 673 // visit_children_set(''):
674 674 // base returns {'dir'}, excluded returns {'dir'}, if we
675 675 // subtracted we'd return set(), which is *not* correct, we
676 676 // still need to visit 'dir'!
677 677 set
678 678 }
679 679 }
680 680 }
681 681
682 682 fn matches_everything(&self) -> bool {
683 683 false
684 684 }
685 685
686 686 fn is_exact(&self) -> bool {
687 687 self.base.is_exact()
688 688 }
689 689 }
690 690
691 691 impl DifferenceMatcher {
692 692 pub fn new(
693 693 base: Box<dyn Matcher + Sync>,
694 694 excluded: Box<dyn Matcher + Sync>,
695 695 ) -> Self {
696 696 let base_is_exact = base.is_exact();
697 697 let base_files = base.file_set().map(ToOwned::to_owned);
698 698 let mut new = Self {
699 699 base,
700 700 excluded,
701 701 files: None,
702 702 };
703 703 if base_is_exact {
704 704 new.files = base_files.map(|files| {
705 705 files.iter().cloned().filter(|f| new.matches(f)).collect()
706 706 });
707 707 }
708 708 new
709 709 }
710 710 }
711 711
712 712 /// Wraps [`regex::bytes::Regex`] to improve performance in multithreaded
713 713 /// contexts.
714 714 ///
715 715 /// The `status` algorithm makes heavy use of threads, and calling `is_match`
716 716 /// from many threads at once is prone to contention, probably within the
717 717 /// scratch space needed as the regex DFA is built lazily.
718 718 ///
719 719 /// We are in the process of raising the issue upstream, but for now
720 720 /// the workaround used here is to store the `Regex` in a lazily populated
721 721 /// thread-local variable, sharing the initial read-only compilation, but
722 722 /// not the lazy dfa scratch space mentioned above.
723 723 ///
724 724 /// This reduces the contention observed with 16+ threads, but does not
725 725 /// completely remove it. Hopefully this can be addressed upstream.
726 726 struct RegexMatcher {
727 727 /// Compiled at the start of the status algorithm, used as a base for
728 728 /// cloning in each thread-local `self.local`, thus sharing the expensive
729 729 /// first compilation.
730 730 base: regex::bytes::Regex,
731 731 /// Thread-local variable that holds the `Regex` that is actually queried
732 732 /// from each thread.
733 733 local: thread_local::ThreadLocal<regex::bytes::Regex>,
734 734 }
735 735
736 736 impl RegexMatcher {
737 737 /// Returns whether the path matches the stored `Regex`.
738 738 pub fn is_match(&self, path: &HgPath) -> bool {
739 739 self.local
740 740 .get_or(|| self.base.clone())
741 741 .is_match(path.as_bytes())
742 742 }
743 743 }
744 744
745 745 /// Return a `RegexBuilder` from a bytes pattern
746 746 ///
747 747 /// This works around the fact that even if it works on byte haysacks,
748 748 /// [`regex::bytes::Regex`] still uses UTF-8 patterns.
749 749 pub fn re_bytes_builder(pattern: &[u8]) -> regex::bytes::RegexBuilder {
750 750 use std::io::Write;
751 751
752 752 // The `regex` crate adds `.*` to the start and end of expressions if there
753 753 // are no anchors, so add the start anchor.
754 754 let mut escaped_bytes = vec![b'^', b'(', b'?', b':'];
755 755 for byte in pattern {
756 756 if *byte > 127 {
757 757 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
758 758 } else {
759 759 escaped_bytes.push(*byte);
760 760 }
761 761 }
762 762 escaped_bytes.push(b')');
763 763
764 764 // Avoid the cost of UTF8 checking
765 765 //
766 766 // # Safety
767 767 // This is safe because we escaped all non-ASCII bytes.
768 768 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
769 769 regex::bytes::RegexBuilder::new(&pattern_string)
770 770 }
771 771
772 772 /// Returns a function that matches an `HgPath` against the given regex
773 773 /// pattern.
774 774 ///
775 775 /// This can fail when the pattern is invalid or not supported by the
776 776 /// underlying engine (the `regex` crate), for instance anything with
777 777 /// back-references.
778 778 #[logging_timer::time("trace")]
779 779 fn re_matcher(pattern: &[u8]) -> PatternResult<RegexMatcher> {
780 780 let re = re_bytes_builder(pattern)
781 781 .unicode(false)
782 782 // Big repos with big `.hgignore` will hit the default limit and
783 783 // incur a significant performance hit. One repo's `hg status` hit
784 784 // multiple *minutes*.
785 785 .dfa_size_limit(50 * (1 << 20))
786 786 .build()
787 787 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
788 788
789 789 Ok(RegexMatcher {
790 790 base: re,
791 791 local: Default::default(),
792 792 })
793 793 }
794 794
795 795 /// Returns the regex pattern and a function that matches an `HgPath` against
796 796 /// said regex formed by the given ignore patterns.
797 797 fn build_regex_match<'a>(
798 798 ignore_patterns: &[IgnorePattern],
799 799 glob_suffix: &[u8],
800 800 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> {
801 801 let mut regexps = vec![];
802 802 let mut exact_set = HashSet::new();
803 803
804 804 for pattern in ignore_patterns {
805 805 if let Some(re) = build_single_regex(pattern, glob_suffix)? {
806 806 regexps.push(re);
807 807 } else {
808 808 let exact = normalize_path_bytes(&pattern.pattern);
809 809 exact_set.insert(HgPathBuf::from_bytes(&exact));
810 810 }
811 811 }
812 812
813 813 let full_regex = regexps.join(&b'|');
814 814
815 815 // An empty pattern would cause the regex engine to incorrectly match the
816 816 // (empty) root directory
817 817 let func = if !(regexps.is_empty()) {
818 818 let matcher = re_matcher(&full_regex)?;
819 819 let func = move |filename: &HgPath| {
820 820 exact_set.contains(filename) || matcher.is_match(filename)
821 821 };
822 822 Box::new(func) as IgnoreFnType
823 823 } else {
824 824 let func = move |filename: &HgPath| exact_set.contains(filename);
825 825 Box::new(func) as IgnoreFnType
826 826 };
827 827
828 828 Ok((full_regex, func))
829 829 }
830 830
831 831 /// Returns roots and directories corresponding to each pattern.
832 832 ///
833 833 /// This calculates the roots and directories exactly matching the patterns and
834 834 /// returns a tuple of (roots, dirs). It does not return other directories
835 835 /// which may also need to be considered, like the parent directories.
836 836 fn roots_and_dirs(
837 837 ignore_patterns: &[IgnorePattern],
838 838 ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
839 839 let mut roots = Vec::new();
840 840 let mut dirs = Vec::new();
841 841
842 842 for ignore_pattern in ignore_patterns {
843 843 let IgnorePattern {
844 844 syntax, pattern, ..
845 845 } = ignore_pattern;
846 846 match syntax {
847 847 PatternSyntax::RootGlob | PatternSyntax::Glob => {
848 848 let mut root = HgPathBuf::new();
849 849 for p in pattern.split(|c| *c == b'/') {
850 850 if p.iter()
851 851 .any(|c| matches!(*c, b'[' | b'{' | b'*' | b'?'))
852 852 {
853 853 break;
854 854 }
855 855 root.push(HgPathBuf::from_bytes(p).as_ref());
856 856 }
857 857 roots.push(root);
858 858 }
859 859 PatternSyntax::Path
860 860 | PatternSyntax::RelPath
861 861 | PatternSyntax::FilePath => {
862 862 let pat = HgPath::new(if pattern == b"." {
863 863 &[] as &[u8]
864 864 } else {
865 865 pattern
866 866 });
867 867 roots.push(pat.to_owned());
868 868 }
869 PatternSyntax::RootFiles => {
869 PatternSyntax::RootFilesIn => {
870 870 let pat = if pattern == b"." {
871 871 &[] as &[u8]
872 872 } else {
873 873 pattern
874 874 };
875 875 dirs.push(HgPathBuf::from_bytes(pat));
876 876 }
877 877 _ => {
878 878 roots.push(HgPathBuf::new());
879 879 }
880 880 }
881 881 }
882 882 (roots, dirs)
883 883 }
884 884
885 885 /// Paths extracted from patterns
886 886 #[derive(Debug, PartialEq)]
887 887 struct RootsDirsAndParents {
888 888 /// Directories to match recursively
889 889 pub roots: HashSet<HgPathBuf>,
890 890 /// Directories to match non-recursively
891 891 pub dirs: HashSet<HgPathBuf>,
892 892 /// Implicitly required directories to go to items in either roots or dirs
893 893 pub parents: HashSet<HgPathBuf>,
894 894 }
895 895
896 896 /// Extract roots, dirs and parents from patterns.
897 897 fn roots_dirs_and_parents(
898 898 ignore_patterns: &[IgnorePattern],
899 899 ) -> PatternResult<RootsDirsAndParents> {
900 900 let (roots, dirs) = roots_and_dirs(ignore_patterns);
901 901
902 902 let mut parents = HashSet::new();
903 903
904 904 parents.extend(
905 905 DirsMultiset::from_manifest(&dirs)?
906 906 .iter()
907 907 .map(ToOwned::to_owned),
908 908 );
909 909 parents.extend(
910 910 DirsMultiset::from_manifest(&roots)?
911 911 .iter()
912 912 .map(ToOwned::to_owned),
913 913 );
914 914
915 915 Ok(RootsDirsAndParents {
916 916 roots: HashSet::from_iter(roots),
917 917 dirs: HashSet::from_iter(dirs),
918 918 parents,
919 919 })
920 920 }
921 921
922 922 /// Returns a function that checks whether a given file (in the general sense)
923 923 /// should be matched.
924 924 fn build_match<'a>(
925 925 ignore_patterns: Vec<IgnorePattern>,
926 926 glob_suffix: &[u8],
927 927 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> {
928 928 let mut match_funcs: Vec<IgnoreFnType<'a>> = vec![];
929 929 // For debugging and printing
930 930 let mut patterns = vec![];
931 931
932 932 let (subincludes, ignore_patterns) = filter_subincludes(ignore_patterns)?;
933 933
934 934 if !subincludes.is_empty() {
935 935 // Build prefix-based matcher functions for subincludes
936 936 let mut submatchers = FastHashMap::default();
937 937 let mut prefixes = vec![];
938 938
939 939 for sub_include in subincludes {
940 940 let matcher = IncludeMatcher::new(sub_include.included_patterns)?;
941 941 let match_fn =
942 942 Box::new(move |path: &HgPath| matcher.matches(path));
943 943 prefixes.push(sub_include.prefix.clone());
944 944 submatchers.insert(sub_include.prefix.clone(), match_fn);
945 945 }
946 946
947 947 let match_subinclude = move |filename: &HgPath| {
948 948 for prefix in prefixes.iter() {
949 949 if let Some(rel) = filename.relative_to(prefix) {
950 950 if (submatchers[prefix])(rel) {
951 951 return true;
952 952 }
953 953 }
954 954 }
955 955 false
956 956 };
957 957
958 958 match_funcs.push(Box::new(match_subinclude));
959 959 }
960 960
961 961 if !ignore_patterns.is_empty() {
962 962 // Either do dumb matching if all patterns are rootfiles, or match
963 963 // with a regex.
964 964 if ignore_patterns
965 965 .iter()
966 .all(|k| k.syntax == PatternSyntax::RootFiles)
966 .all(|k| k.syntax == PatternSyntax::RootFilesIn)
967 967 {
968 968 let dirs: HashSet<_> = ignore_patterns
969 969 .iter()
970 970 .map(|k| k.pattern.to_owned())
971 971 .collect();
972 972 let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
973 973
974 974 let match_func = move |path: &HgPath| -> bool {
975 975 let path = path.as_bytes();
976 976 let i = path.iter().rposition(|a| *a == b'/');
977 977 let dir = if let Some(i) = i { &path[..i] } else { b"." };
978 978 dirs.contains(dir)
979 979 };
980 980 match_funcs.push(Box::new(match_func));
981 981
982 982 patterns.extend(b"rootfilesin: ");
983 983 dirs_vec.sort();
984 984 patterns.extend(dirs_vec.escaped_bytes());
985 985 } else {
986 986 let (new_re, match_func) =
987 987 build_regex_match(&ignore_patterns, glob_suffix)?;
988 988 patterns = new_re;
989 989 match_funcs.push(match_func)
990 990 }
991 991 }
992 992
993 993 Ok(if match_funcs.len() == 1 {
994 994 (patterns, match_funcs.remove(0))
995 995 } else {
996 996 (
997 997 patterns,
998 998 Box::new(move |f: &HgPath| -> bool {
999 999 match_funcs.iter().any(|match_func| match_func(f))
1000 1000 }),
1001 1001 )
1002 1002 })
1003 1003 }
1004 1004
1005 1005 /// Parses all "ignore" files with their recursive includes and returns a
1006 1006 /// function that checks whether a given file (in the general sense) should be
1007 1007 /// ignored.
1008 1008 pub fn get_ignore_matcher<'a>(
1009 1009 mut all_pattern_files: Vec<PathBuf>,
1010 1010 root_dir: &Path,
1011 1011 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
1012 1012 ) -> PatternResult<(IncludeMatcher<'a>, Vec<PatternFileWarning>)> {
1013 1013 let mut all_patterns = vec![];
1014 1014 let mut all_warnings = vec![];
1015 1015
1016 1016 // Sort to make the ordering of calls to `inspect_pattern_bytes`
1017 1017 // deterministic even if the ordering of `all_pattern_files` is not (such
1018 1018 // as when a iteration order of a Python dict or Rust HashMap is involved).
1019 1019 // Sort by "string" representation instead of the default by component
1020 1020 // (with a Rust-specific definition of a component)
1021 1021 all_pattern_files
1022 1022 .sort_unstable_by(|a, b| a.as_os_str().cmp(b.as_os_str()));
1023 1023
1024 1024 for pattern_file in &all_pattern_files {
1025 1025 let (patterns, warnings) = get_patterns_from_file(
1026 1026 pattern_file,
1027 1027 root_dir,
1028 1028 inspect_pattern_bytes,
1029 1029 )?;
1030 1030
1031 1031 all_patterns.extend(patterns.to_owned());
1032 1032 all_warnings.extend(warnings);
1033 1033 }
1034 1034 let matcher = IncludeMatcher::new(all_patterns)?;
1035 1035 Ok((matcher, all_warnings))
1036 1036 }
1037 1037
1038 1038 /// Parses all "ignore" files with their recursive includes and returns a
1039 1039 /// function that checks whether a given file (in the general sense) should be
1040 1040 /// ignored.
1041 1041 pub fn get_ignore_function<'a>(
1042 1042 all_pattern_files: Vec<PathBuf>,
1043 1043 root_dir: &Path,
1044 1044 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
1045 1045 ) -> PatternResult<(IgnoreFnType<'a>, Vec<PatternFileWarning>)> {
1046 1046 let res =
1047 1047 get_ignore_matcher(all_pattern_files, root_dir, inspect_pattern_bytes);
1048 1048 res.map(|(matcher, all_warnings)| {
1049 1049 let res: IgnoreFnType<'a> =
1050 1050 Box::new(move |path: &HgPath| matcher.matches(path));
1051 1051
1052 1052 (res, all_warnings)
1053 1053 })
1054 1054 }
1055 1055
1056 1056 impl<'a> IncludeMatcher<'a> {
1057 1057 pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
1058 1058 let RootsDirsAndParents {
1059 1059 roots,
1060 1060 dirs,
1061 1061 parents,
1062 1062 } = roots_dirs_and_parents(&ignore_patterns)?;
1063 1063 let prefix = ignore_patterns.iter().all(|k| {
1064 1064 matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath)
1065 1065 });
1066 1066 let (patterns, match_fn) = build_match(ignore_patterns, b"(?:/|$)")?;
1067 1067
1068 1068 Ok(Self {
1069 1069 patterns,
1070 1070 match_fn,
1071 1071 prefix,
1072 1072 roots,
1073 1073 dirs,
1074 1074 parents,
1075 1075 })
1076 1076 }
1077 1077
1078 1078 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
1079 1079 // TODO cache
1080 1080 let thing = self
1081 1081 .dirs
1082 1082 .iter()
1083 1083 .chain(self.roots.iter())
1084 1084 .chain(self.parents.iter());
1085 1085 DirsChildrenMultiset::new(thing, Some(&self.parents))
1086 1086 }
1087 1087
1088 1088 pub fn debug_get_patterns(&self) -> &[u8] {
1089 1089 self.patterns.as_ref()
1090 1090 }
1091 1091 }
1092 1092
1093 1093 impl<'a> Display for IncludeMatcher<'a> {
1094 1094 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
1095 1095 // XXX What about exact matches?
1096 1096 // I'm not sure it's worth it to clone the HashSet and keep it
1097 1097 // around just in case someone wants to display the matcher, plus
1098 1098 // it's going to be unreadable after a few entries, but we need to
1099 1099 // inform in this display that exact matches are being used and are
1100 1100 // (on purpose) missing from the `includes`.
1101 1101 write!(
1102 1102 f,
1103 1103 "IncludeMatcher(includes='{}')",
1104 1104 String::from_utf8_lossy(&self.patterns.escaped_bytes())
1105 1105 )
1106 1106 }
1107 1107 }
1108 1108
1109 1109 #[cfg(test)]
1110 1110 mod tests {
1111 1111 use super::*;
1112 1112 use pretty_assertions::assert_eq;
1113 1113 use std::collections::BTreeMap;
1114 1114 use std::collections::BTreeSet;
1115 1115 use std::fmt::Debug;
1116 1116 use std::path::Path;
1117 1117
1118 1118 #[test]
1119 1119 fn test_roots_and_dirs() {
1120 1120 let pats = vec![
1121 1121 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
1122 1122 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
1123 1123 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
1124 1124 ];
1125 1125 let (roots, dirs) = roots_and_dirs(&pats);
1126 1126
1127 1127 assert_eq!(
1128 1128 roots,
1129 1129 vec!(
1130 1130 HgPathBuf::from_bytes(b"g/h"),
1131 1131 HgPathBuf::from_bytes(b"g/h"),
1132 1132 HgPathBuf::new()
1133 1133 ),
1134 1134 );
1135 1135 assert_eq!(dirs, vec!());
1136 1136 }
1137 1137
1138 1138 #[test]
1139 1139 fn test_roots_dirs_and_parents() {
1140 1140 let pats = vec![
1141 1141 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
1142 1142 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
1143 1143 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
1144 1144 ];
1145 1145
1146 1146 let mut roots = HashSet::new();
1147 1147 roots.insert(HgPathBuf::from_bytes(b"g/h"));
1148 1148 roots.insert(HgPathBuf::new());
1149 1149
1150 1150 let dirs = HashSet::new();
1151 1151
1152 1152 let mut parents = HashSet::new();
1153 1153 parents.insert(HgPathBuf::new());
1154 1154 parents.insert(HgPathBuf::from_bytes(b"g"));
1155 1155
1156 1156 assert_eq!(
1157 1157 roots_dirs_and_parents(&pats).unwrap(),
1158 1158 RootsDirsAndParents {
1159 1159 roots,
1160 1160 dirs,
1161 1161 parents
1162 1162 }
1163 1163 );
1164 1164 }
1165 1165
1166 1166 #[test]
1167 1167 fn test_filematcher_visit_children_set() {
1168 1168 // Visitchildrenset
1169 1169 let files = vec![HgPathBuf::from_bytes(b"dir/subdir/foo.txt")];
1170 1170 let matcher = FileMatcher::new(files).unwrap();
1171 1171
1172 1172 let mut set = HashSet::new();
1173 1173 set.insert(HgPathBuf::from_bytes(b"dir"));
1174 1174 assert_eq!(
1175 1175 matcher.visit_children_set(HgPath::new(b"")),
1176 1176 VisitChildrenSet::Set(set)
1177 1177 );
1178 1178
1179 1179 let mut set = HashSet::new();
1180 1180 set.insert(HgPathBuf::from_bytes(b"subdir"));
1181 1181 assert_eq!(
1182 1182 matcher.visit_children_set(HgPath::new(b"dir")),
1183 1183 VisitChildrenSet::Set(set)
1184 1184 );
1185 1185
1186 1186 let mut set = HashSet::new();
1187 1187 set.insert(HgPathBuf::from_bytes(b"foo.txt"));
1188 1188 assert_eq!(
1189 1189 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1190 1190 VisitChildrenSet::Set(set)
1191 1191 );
1192 1192
1193 1193 assert_eq!(
1194 1194 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1195 1195 VisitChildrenSet::Empty
1196 1196 );
1197 1197 assert_eq!(
1198 1198 matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
1199 1199 VisitChildrenSet::Empty
1200 1200 );
1201 1201 assert_eq!(
1202 1202 matcher.visit_children_set(HgPath::new(b"folder")),
1203 1203 VisitChildrenSet::Empty
1204 1204 );
1205 1205 }
1206 1206
1207 1207 #[test]
1208 1208 fn test_filematcher_visit_children_set_files_and_dirs() {
1209 1209 let files = vec![
1210 1210 HgPathBuf::from_bytes(b"rootfile.txt"),
1211 1211 HgPathBuf::from_bytes(b"a/file1.txt"),
1212 1212 HgPathBuf::from_bytes(b"a/b/file2.txt"),
1213 1213 // No file in a/b/c
1214 1214 HgPathBuf::from_bytes(b"a/b/c/d/file4.txt"),
1215 1215 ];
1216 1216 let matcher = FileMatcher::new(files).unwrap();
1217 1217
1218 1218 let mut set = HashSet::new();
1219 1219 set.insert(HgPathBuf::from_bytes(b"a"));
1220 1220 set.insert(HgPathBuf::from_bytes(b"rootfile.txt"));
1221 1221 assert_eq!(
1222 1222 matcher.visit_children_set(HgPath::new(b"")),
1223 1223 VisitChildrenSet::Set(set)
1224 1224 );
1225 1225
1226 1226 let mut set = HashSet::new();
1227 1227 set.insert(HgPathBuf::from_bytes(b"b"));
1228 1228 set.insert(HgPathBuf::from_bytes(b"file1.txt"));
1229 1229 assert_eq!(
1230 1230 matcher.visit_children_set(HgPath::new(b"a")),
1231 1231 VisitChildrenSet::Set(set)
1232 1232 );
1233 1233
1234 1234 let mut set = HashSet::new();
1235 1235 set.insert(HgPathBuf::from_bytes(b"c"));
1236 1236 set.insert(HgPathBuf::from_bytes(b"file2.txt"));
1237 1237 assert_eq!(
1238 1238 matcher.visit_children_set(HgPath::new(b"a/b")),
1239 1239 VisitChildrenSet::Set(set)
1240 1240 );
1241 1241
1242 1242 let mut set = HashSet::new();
1243 1243 set.insert(HgPathBuf::from_bytes(b"d"));
1244 1244 assert_eq!(
1245 1245 matcher.visit_children_set(HgPath::new(b"a/b/c")),
1246 1246 VisitChildrenSet::Set(set)
1247 1247 );
1248 1248 let mut set = HashSet::new();
1249 1249 set.insert(HgPathBuf::from_bytes(b"file4.txt"));
1250 1250 assert_eq!(
1251 1251 matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
1252 1252 VisitChildrenSet::Set(set)
1253 1253 );
1254 1254
1255 1255 assert_eq!(
1256 1256 matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
1257 1257 VisitChildrenSet::Empty
1258 1258 );
1259 1259 assert_eq!(
1260 1260 matcher.visit_children_set(HgPath::new(b"folder")),
1261 1261 VisitChildrenSet::Empty
1262 1262 );
1263 1263 }
1264 1264
1265 1265 #[test]
1266 1266 fn test_patternmatcher() {
1267 1267 // VisitdirPrefix
1268 1268 let m = PatternMatcher::new(vec![IgnorePattern::new(
1269 1269 PatternSyntax::Path,
1270 1270 b"dir/subdir",
1271 1271 Path::new(""),
1272 1272 )])
1273 1273 .unwrap();
1274 1274 assert_eq!(
1275 1275 m.visit_children_set(HgPath::new(b"")),
1276 1276 VisitChildrenSet::This
1277 1277 );
1278 1278 assert_eq!(
1279 1279 m.visit_children_set(HgPath::new(b"dir")),
1280 1280 VisitChildrenSet::This
1281 1281 );
1282 1282 assert_eq!(
1283 1283 m.visit_children_set(HgPath::new(b"dir/subdir")),
1284 1284 VisitChildrenSet::Recursive
1285 1285 );
1286 1286 // OPT: This should probably be Recursive if its parent is?
1287 1287 assert_eq!(
1288 1288 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1289 1289 VisitChildrenSet::This
1290 1290 );
1291 1291 assert_eq!(
1292 1292 m.visit_children_set(HgPath::new(b"folder")),
1293 1293 VisitChildrenSet::Empty
1294 1294 );
1295 1295
1296 1296 // VisitchildrensetPrefix
1297 1297 let m = PatternMatcher::new(vec![IgnorePattern::new(
1298 1298 PatternSyntax::Path,
1299 1299 b"dir/subdir",
1300 1300 Path::new(""),
1301 1301 )])
1302 1302 .unwrap();
1303 1303 assert_eq!(
1304 1304 m.visit_children_set(HgPath::new(b"")),
1305 1305 VisitChildrenSet::This
1306 1306 );
1307 1307 assert_eq!(
1308 1308 m.visit_children_set(HgPath::new(b"dir")),
1309 1309 VisitChildrenSet::This
1310 1310 );
1311 1311 assert_eq!(
1312 1312 m.visit_children_set(HgPath::new(b"dir/subdir")),
1313 1313 VisitChildrenSet::Recursive
1314 1314 );
1315 1315 // OPT: This should probably be Recursive if its parent is?
1316 1316 assert_eq!(
1317 1317 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1318 1318 VisitChildrenSet::This
1319 1319 );
1320 1320 assert_eq!(
1321 1321 m.visit_children_set(HgPath::new(b"folder")),
1322 1322 VisitChildrenSet::Empty
1323 1323 );
1324 1324
1325 1325 // VisitdirRootfilesin
1326 1326 let m = PatternMatcher::new(vec![IgnorePattern::new(
1327 PatternSyntax::RootFiles,
1327 PatternSyntax::RootFilesIn,
1328 1328 b"dir/subdir",
1329 1329 Path::new(""),
1330 1330 )])
1331 1331 .unwrap();
1332 1332 assert_eq!(
1333 1333 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1334 1334 VisitChildrenSet::Empty
1335 1335 );
1336 1336 assert_eq!(
1337 1337 m.visit_children_set(HgPath::new(b"folder")),
1338 1338 VisitChildrenSet::Empty
1339 1339 );
1340 1340 // FIXME: These should probably be This.
1341 1341 assert_eq!(
1342 1342 m.visit_children_set(HgPath::new(b"")),
1343 1343 VisitChildrenSet::Empty
1344 1344 );
1345 1345 assert_eq!(
1346 1346 m.visit_children_set(HgPath::new(b"dir")),
1347 1347 VisitChildrenSet::Empty
1348 1348 );
1349 1349 assert_eq!(
1350 1350 m.visit_children_set(HgPath::new(b"dir/subdir")),
1351 1351 VisitChildrenSet::Empty
1352 1352 );
1353 1353
1354 1354 // VisitchildrensetRootfilesin
1355 1355 let m = PatternMatcher::new(vec![IgnorePattern::new(
1356 PatternSyntax::RootFiles,
1356 PatternSyntax::RootFilesIn,
1357 1357 b"dir/subdir",
1358 1358 Path::new(""),
1359 1359 )])
1360 1360 .unwrap();
1361 1361 assert_eq!(
1362 1362 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1363 1363 VisitChildrenSet::Empty
1364 1364 );
1365 1365 assert_eq!(
1366 1366 m.visit_children_set(HgPath::new(b"folder")),
1367 1367 VisitChildrenSet::Empty
1368 1368 );
1369 1369 // FIXME: These should probably be {'dir'}, {'subdir'} and This,
1370 1370 // respectively, or at least This for all three.
1371 1371 assert_eq!(
1372 1372 m.visit_children_set(HgPath::new(b"")),
1373 1373 VisitChildrenSet::Empty
1374 1374 );
1375 1375 assert_eq!(
1376 1376 m.visit_children_set(HgPath::new(b"dir")),
1377 1377 VisitChildrenSet::Empty
1378 1378 );
1379 1379 assert_eq!(
1380 1380 m.visit_children_set(HgPath::new(b"dir/subdir")),
1381 1381 VisitChildrenSet::Empty
1382 1382 );
1383 1383
1384 1384 // VisitdirGlob
1385 1385 let m = PatternMatcher::new(vec![IgnorePattern::new(
1386 1386 PatternSyntax::Glob,
1387 1387 b"dir/z*",
1388 1388 Path::new(""),
1389 1389 )])
1390 1390 .unwrap();
1391 1391 assert_eq!(
1392 1392 m.visit_children_set(HgPath::new(b"")),
1393 1393 VisitChildrenSet::This
1394 1394 );
1395 1395 // FIXME: This probably should be This
1396 1396 assert_eq!(
1397 1397 m.visit_children_set(HgPath::new(b"dir")),
1398 1398 VisitChildrenSet::Empty
1399 1399 );
1400 1400 assert_eq!(
1401 1401 m.visit_children_set(HgPath::new(b"folder")),
1402 1402 VisitChildrenSet::Empty
1403 1403 );
1404 1404 // OPT: these should probably be False.
1405 1405 assert_eq!(
1406 1406 m.visit_children_set(HgPath::new(b"dir/subdir")),
1407 1407 VisitChildrenSet::This
1408 1408 );
1409 1409 assert_eq!(
1410 1410 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1411 1411 VisitChildrenSet::This
1412 1412 );
1413 1413
1414 1414 // VisitchildrensetGlob
1415 1415 let m = PatternMatcher::new(vec![IgnorePattern::new(
1416 1416 PatternSyntax::Glob,
1417 1417 b"dir/z*",
1418 1418 Path::new(""),
1419 1419 )])
1420 1420 .unwrap();
1421 1421 assert_eq!(
1422 1422 m.visit_children_set(HgPath::new(b"")),
1423 1423 VisitChildrenSet::This
1424 1424 );
1425 1425 assert_eq!(
1426 1426 m.visit_children_set(HgPath::new(b"folder")),
1427 1427 VisitChildrenSet::Empty
1428 1428 );
1429 1429 // FIXME: This probably should be This
1430 1430 assert_eq!(
1431 1431 m.visit_children_set(HgPath::new(b"dir")),
1432 1432 VisitChildrenSet::Empty
1433 1433 );
1434 1434 // OPT: these should probably be Empty
1435 1435 assert_eq!(
1436 1436 m.visit_children_set(HgPath::new(b"dir/subdir")),
1437 1437 VisitChildrenSet::This
1438 1438 );
1439 1439 assert_eq!(
1440 1440 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1441 1441 VisitChildrenSet::This
1442 1442 );
1443 1443
1444 1444 // VisitdirFilepath
1445 1445 let m = PatternMatcher::new(vec![IgnorePattern::new(
1446 1446 PatternSyntax::FilePath,
1447 1447 b"dir/z",
1448 1448 Path::new(""),
1449 1449 )])
1450 1450 .unwrap();
1451 1451 assert_eq!(
1452 1452 m.visit_children_set(HgPath::new(b"")),
1453 1453 VisitChildrenSet::This
1454 1454 );
1455 1455 assert_eq!(
1456 1456 m.visit_children_set(HgPath::new(b"dir")),
1457 1457 VisitChildrenSet::This
1458 1458 );
1459 1459 assert_eq!(
1460 1460 m.visit_children_set(HgPath::new(b"folder")),
1461 1461 VisitChildrenSet::Empty
1462 1462 );
1463 1463 assert_eq!(
1464 1464 m.visit_children_set(HgPath::new(b"dir/subdir")),
1465 1465 VisitChildrenSet::Empty
1466 1466 );
1467 1467 assert_eq!(
1468 1468 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1469 1469 VisitChildrenSet::Empty
1470 1470 );
1471 1471
1472 1472 // VisitchildrensetFilepath
1473 1473 let m = PatternMatcher::new(vec![IgnorePattern::new(
1474 1474 PatternSyntax::FilePath,
1475 1475 b"dir/z",
1476 1476 Path::new(""),
1477 1477 )])
1478 1478 .unwrap();
1479 1479 assert_eq!(
1480 1480 m.visit_children_set(HgPath::new(b"")),
1481 1481 VisitChildrenSet::This
1482 1482 );
1483 1483 assert_eq!(
1484 1484 m.visit_children_set(HgPath::new(b"folder")),
1485 1485 VisitChildrenSet::Empty
1486 1486 );
1487 1487 assert_eq!(
1488 1488 m.visit_children_set(HgPath::new(b"dir")),
1489 1489 VisitChildrenSet::This
1490 1490 );
1491 1491 assert_eq!(
1492 1492 m.visit_children_set(HgPath::new(b"dir/subdir")),
1493 1493 VisitChildrenSet::Empty
1494 1494 );
1495 1495 assert_eq!(
1496 1496 m.visit_children_set(HgPath::new(b"dir/subdir/x")),
1497 1497 VisitChildrenSet::Empty
1498 1498 );
1499 1499 }
1500 1500
1501 1501 #[test]
1502 1502 fn test_includematcher() {
1503 1503 // VisitchildrensetPrefix
1504 1504 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1505 1505 PatternSyntax::RelPath,
1506 1506 b"dir/subdir",
1507 1507 Path::new(""),
1508 1508 )])
1509 1509 .unwrap();
1510 1510
1511 1511 let mut set = HashSet::new();
1512 1512 set.insert(HgPathBuf::from_bytes(b"dir"));
1513 1513 assert_eq!(
1514 1514 matcher.visit_children_set(HgPath::new(b"")),
1515 1515 VisitChildrenSet::Set(set)
1516 1516 );
1517 1517
1518 1518 let mut set = HashSet::new();
1519 1519 set.insert(HgPathBuf::from_bytes(b"subdir"));
1520 1520 assert_eq!(
1521 1521 matcher.visit_children_set(HgPath::new(b"dir")),
1522 1522 VisitChildrenSet::Set(set)
1523 1523 );
1524 1524 assert_eq!(
1525 1525 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1526 1526 VisitChildrenSet::Recursive
1527 1527 );
1528 1528 // OPT: This should probably be 'all' if its parent is?
1529 1529 assert_eq!(
1530 1530 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1531 1531 VisitChildrenSet::This
1532 1532 );
1533 1533 assert_eq!(
1534 1534 matcher.visit_children_set(HgPath::new(b"folder")),
1535 1535 VisitChildrenSet::Empty
1536 1536 );
1537 1537
1538 1538 // VisitchildrensetRootfilesin
1539 1539 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1540 PatternSyntax::RootFiles,
1540 PatternSyntax::RootFilesIn,
1541 1541 b"dir/subdir",
1542 1542 Path::new(""),
1543 1543 )])
1544 1544 .unwrap();
1545 1545
1546 1546 let mut set = HashSet::new();
1547 1547 set.insert(HgPathBuf::from_bytes(b"dir"));
1548 1548 assert_eq!(
1549 1549 matcher.visit_children_set(HgPath::new(b"")),
1550 1550 VisitChildrenSet::Set(set)
1551 1551 );
1552 1552
1553 1553 let mut set = HashSet::new();
1554 1554 set.insert(HgPathBuf::from_bytes(b"subdir"));
1555 1555 assert_eq!(
1556 1556 matcher.visit_children_set(HgPath::new(b"dir")),
1557 1557 VisitChildrenSet::Set(set)
1558 1558 );
1559 1559
1560 1560 assert_eq!(
1561 1561 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1562 1562 VisitChildrenSet::This
1563 1563 );
1564 1564 assert_eq!(
1565 1565 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1566 1566 VisitChildrenSet::Empty
1567 1567 );
1568 1568 assert_eq!(
1569 1569 matcher.visit_children_set(HgPath::new(b"folder")),
1570 1570 VisitChildrenSet::Empty
1571 1571 );
1572 1572
1573 1573 // VisitchildrensetGlob
1574 1574 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1575 1575 PatternSyntax::Glob,
1576 1576 b"dir/z*",
1577 1577 Path::new(""),
1578 1578 )])
1579 1579 .unwrap();
1580 1580
1581 1581 let mut set = HashSet::new();
1582 1582 set.insert(HgPathBuf::from_bytes(b"dir"));
1583 1583 assert_eq!(
1584 1584 matcher.visit_children_set(HgPath::new(b"")),
1585 1585 VisitChildrenSet::Set(set)
1586 1586 );
1587 1587 assert_eq!(
1588 1588 matcher.visit_children_set(HgPath::new(b"folder")),
1589 1589 VisitChildrenSet::Empty
1590 1590 );
1591 1591 assert_eq!(
1592 1592 matcher.visit_children_set(HgPath::new(b"dir")),
1593 1593 VisitChildrenSet::This
1594 1594 );
1595 1595 // OPT: these should probably be set().
1596 1596 assert_eq!(
1597 1597 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1598 1598 VisitChildrenSet::This
1599 1599 );
1600 1600 assert_eq!(
1601 1601 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1602 1602 VisitChildrenSet::This
1603 1603 );
1604 1604
1605 1605 // VisitchildrensetFilePath
1606 1606 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1607 1607 PatternSyntax::FilePath,
1608 1608 b"dir/z",
1609 1609 Path::new(""),
1610 1610 )])
1611 1611 .unwrap();
1612 1612
1613 1613 let mut set = HashSet::new();
1614 1614 set.insert(HgPathBuf::from_bytes(b"dir"));
1615 1615 assert_eq!(
1616 1616 matcher.visit_children_set(HgPath::new(b"")),
1617 1617 VisitChildrenSet::Set(set)
1618 1618 );
1619 1619 assert_eq!(
1620 1620 matcher.visit_children_set(HgPath::new(b"folder")),
1621 1621 VisitChildrenSet::Empty
1622 1622 );
1623 1623 let mut set = HashSet::new();
1624 1624 set.insert(HgPathBuf::from_bytes(b"z"));
1625 1625 assert_eq!(
1626 1626 matcher.visit_children_set(HgPath::new(b"dir")),
1627 1627 VisitChildrenSet::Set(set)
1628 1628 );
1629 1629 // OPT: these should probably be set().
1630 1630 assert_eq!(
1631 1631 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1632 1632 VisitChildrenSet::Empty
1633 1633 );
1634 1634 assert_eq!(
1635 1635 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1636 1636 VisitChildrenSet::Empty
1637 1637 );
1638 1638
1639 1639 // Test multiple patterns
1640 1640 let matcher = IncludeMatcher::new(vec![
1641 1641 IgnorePattern::new(PatternSyntax::RelPath, b"foo", Path::new("")),
1642 1642 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
1643 1643 ])
1644 1644 .unwrap();
1645 1645
1646 1646 assert_eq!(
1647 1647 matcher.visit_children_set(HgPath::new(b"")),
1648 1648 VisitChildrenSet::This
1649 1649 );
1650 1650
1651 1651 // Test multiple patterns
1652 1652 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
1653 1653 PatternSyntax::Glob,
1654 1654 b"**/*.exe",
1655 1655 Path::new(""),
1656 1656 )])
1657 1657 .unwrap();
1658 1658
1659 1659 assert_eq!(
1660 1660 matcher.visit_children_set(HgPath::new(b"")),
1661 1661 VisitChildrenSet::This
1662 1662 );
1663 1663 }
1664 1664
1665 1665 #[test]
1666 1666 fn test_unionmatcher() {
1667 1667 // Path + Rootfiles
1668 1668 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1669 1669 PatternSyntax::RelPath,
1670 1670 b"dir/subdir",
1671 1671 Path::new(""),
1672 1672 )])
1673 1673 .unwrap();
1674 1674 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1675 PatternSyntax::RootFiles,
1675 PatternSyntax::RootFilesIn,
1676 1676 b"dir",
1677 1677 Path::new(""),
1678 1678 )])
1679 1679 .unwrap();
1680 1680 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1681 1681
1682 1682 let mut set = HashSet::new();
1683 1683 set.insert(HgPathBuf::from_bytes(b"dir"));
1684 1684 assert_eq!(
1685 1685 matcher.visit_children_set(HgPath::new(b"")),
1686 1686 VisitChildrenSet::Set(set)
1687 1687 );
1688 1688 assert_eq!(
1689 1689 matcher.visit_children_set(HgPath::new(b"dir")),
1690 1690 VisitChildrenSet::This
1691 1691 );
1692 1692 assert_eq!(
1693 1693 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1694 1694 VisitChildrenSet::Recursive
1695 1695 );
1696 1696 assert_eq!(
1697 1697 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1698 1698 VisitChildrenSet::Empty
1699 1699 );
1700 1700 assert_eq!(
1701 1701 matcher.visit_children_set(HgPath::new(b"folder")),
1702 1702 VisitChildrenSet::Empty
1703 1703 );
1704 1704 assert_eq!(
1705 1705 matcher.visit_children_set(HgPath::new(b"folder")),
1706 1706 VisitChildrenSet::Empty
1707 1707 );
1708 1708
1709 1709 // OPT: These next two could be 'all' instead of 'this'.
1710 1710 assert_eq!(
1711 1711 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1712 1712 VisitChildrenSet::This
1713 1713 );
1714 1714 assert_eq!(
1715 1715 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1716 1716 VisitChildrenSet::This
1717 1717 );
1718 1718
1719 1719 // Path + unrelated Path
1720 1720 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1721 1721 PatternSyntax::RelPath,
1722 1722 b"dir/subdir",
1723 1723 Path::new(""),
1724 1724 )])
1725 1725 .unwrap();
1726 1726 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1727 1727 PatternSyntax::RelPath,
1728 1728 b"folder",
1729 1729 Path::new(""),
1730 1730 )])
1731 1731 .unwrap();
1732 1732 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1733 1733
1734 1734 let mut set = HashSet::new();
1735 1735 set.insert(HgPathBuf::from_bytes(b"folder"));
1736 1736 set.insert(HgPathBuf::from_bytes(b"dir"));
1737 1737 assert_eq!(
1738 1738 matcher.visit_children_set(HgPath::new(b"")),
1739 1739 VisitChildrenSet::Set(set)
1740 1740 );
1741 1741 let mut set = HashSet::new();
1742 1742 set.insert(HgPathBuf::from_bytes(b"subdir"));
1743 1743 assert_eq!(
1744 1744 matcher.visit_children_set(HgPath::new(b"dir")),
1745 1745 VisitChildrenSet::Set(set)
1746 1746 );
1747 1747
1748 1748 assert_eq!(
1749 1749 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1750 1750 VisitChildrenSet::Recursive
1751 1751 );
1752 1752 assert_eq!(
1753 1753 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1754 1754 VisitChildrenSet::Empty
1755 1755 );
1756 1756
1757 1757 assert_eq!(
1758 1758 matcher.visit_children_set(HgPath::new(b"folder")),
1759 1759 VisitChildrenSet::Recursive
1760 1760 );
1761 1761 // OPT: These next two could be 'all' instead of 'this'.
1762 1762 assert_eq!(
1763 1763 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1764 1764 VisitChildrenSet::This
1765 1765 );
1766 1766 assert_eq!(
1767 1767 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1768 1768 VisitChildrenSet::This
1769 1769 );
1770 1770
1771 1771 // Path + subpath
1772 1772 let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
1773 1773 PatternSyntax::RelPath,
1774 1774 b"dir/subdir/x",
1775 1775 Path::new(""),
1776 1776 )])
1777 1777 .unwrap();
1778 1778 let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
1779 1779 PatternSyntax::RelPath,
1780 1780 b"dir/subdir",
1781 1781 Path::new(""),
1782 1782 )])
1783 1783 .unwrap();
1784 1784 let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
1785 1785
1786 1786 let mut set = HashSet::new();
1787 1787 set.insert(HgPathBuf::from_bytes(b"dir"));
1788 1788 assert_eq!(
1789 1789 matcher.visit_children_set(HgPath::new(b"")),
1790 1790 VisitChildrenSet::Set(set)
1791 1791 );
1792 1792 let mut set = HashSet::new();
1793 1793 set.insert(HgPathBuf::from_bytes(b"subdir"));
1794 1794 assert_eq!(
1795 1795 matcher.visit_children_set(HgPath::new(b"dir")),
1796 1796 VisitChildrenSet::Set(set)
1797 1797 );
1798 1798
1799 1799 assert_eq!(
1800 1800 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1801 1801 VisitChildrenSet::Recursive
1802 1802 );
1803 1803 assert_eq!(
1804 1804 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1805 1805 VisitChildrenSet::Empty
1806 1806 );
1807 1807
1808 1808 assert_eq!(
1809 1809 matcher.visit_children_set(HgPath::new(b"folder")),
1810 1810 VisitChildrenSet::Empty
1811 1811 );
1812 1812 assert_eq!(
1813 1813 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1814 1814 VisitChildrenSet::Recursive
1815 1815 );
1816 1816 // OPT: this should probably be 'all' not 'this'.
1817 1817 assert_eq!(
1818 1818 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1819 1819 VisitChildrenSet::This
1820 1820 );
1821 1821 }
1822 1822
1823 1823 #[test]
1824 1824 fn test_intersectionmatcher() {
1825 1825 // Include path + Include rootfiles
1826 1826 let m1 = Box::new(
1827 1827 IncludeMatcher::new(vec![IgnorePattern::new(
1828 1828 PatternSyntax::RelPath,
1829 1829 b"dir/subdir",
1830 1830 Path::new(""),
1831 1831 )])
1832 1832 .unwrap(),
1833 1833 );
1834 1834 let m2 = Box::new(
1835 1835 IncludeMatcher::new(vec![IgnorePattern::new(
1836 PatternSyntax::RootFiles,
1836 PatternSyntax::RootFilesIn,
1837 1837 b"dir",
1838 1838 Path::new(""),
1839 1839 )])
1840 1840 .unwrap(),
1841 1841 );
1842 1842 let matcher = IntersectionMatcher::new(m1, m2);
1843 1843
1844 1844 let mut set = HashSet::new();
1845 1845 set.insert(HgPathBuf::from_bytes(b"dir"));
1846 1846 assert_eq!(
1847 1847 matcher.visit_children_set(HgPath::new(b"")),
1848 1848 VisitChildrenSet::Set(set)
1849 1849 );
1850 1850 assert_eq!(
1851 1851 matcher.visit_children_set(HgPath::new(b"dir")),
1852 1852 VisitChildrenSet::This
1853 1853 );
1854 1854 assert_eq!(
1855 1855 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1856 1856 VisitChildrenSet::Empty
1857 1857 );
1858 1858 assert_eq!(
1859 1859 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1860 1860 VisitChildrenSet::Empty
1861 1861 );
1862 1862 assert_eq!(
1863 1863 matcher.visit_children_set(HgPath::new(b"folder")),
1864 1864 VisitChildrenSet::Empty
1865 1865 );
1866 1866 assert_eq!(
1867 1867 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1868 1868 VisitChildrenSet::Empty
1869 1869 );
1870 1870 assert_eq!(
1871 1871 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1872 1872 VisitChildrenSet::Empty
1873 1873 );
1874 1874
1875 1875 // Non intersecting paths
1876 1876 let m1 = Box::new(
1877 1877 IncludeMatcher::new(vec![IgnorePattern::new(
1878 1878 PatternSyntax::RelPath,
1879 1879 b"dir/subdir",
1880 1880 Path::new(""),
1881 1881 )])
1882 1882 .unwrap(),
1883 1883 );
1884 1884 let m2 = Box::new(
1885 1885 IncludeMatcher::new(vec![IgnorePattern::new(
1886 1886 PatternSyntax::RelPath,
1887 1887 b"folder",
1888 1888 Path::new(""),
1889 1889 )])
1890 1890 .unwrap(),
1891 1891 );
1892 1892 let matcher = IntersectionMatcher::new(m1, m2);
1893 1893
1894 1894 assert_eq!(
1895 1895 matcher.visit_children_set(HgPath::new(b"")),
1896 1896 VisitChildrenSet::Empty
1897 1897 );
1898 1898 assert_eq!(
1899 1899 matcher.visit_children_set(HgPath::new(b"dir")),
1900 1900 VisitChildrenSet::Empty
1901 1901 );
1902 1902 assert_eq!(
1903 1903 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1904 1904 VisitChildrenSet::Empty
1905 1905 );
1906 1906 assert_eq!(
1907 1907 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1908 1908 VisitChildrenSet::Empty
1909 1909 );
1910 1910 assert_eq!(
1911 1911 matcher.visit_children_set(HgPath::new(b"folder")),
1912 1912 VisitChildrenSet::Empty
1913 1913 );
1914 1914 assert_eq!(
1915 1915 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1916 1916 VisitChildrenSet::Empty
1917 1917 );
1918 1918 assert_eq!(
1919 1919 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1920 1920 VisitChildrenSet::Empty
1921 1921 );
1922 1922
1923 1923 // Nested paths
1924 1924 let m1 = Box::new(
1925 1925 IncludeMatcher::new(vec![IgnorePattern::new(
1926 1926 PatternSyntax::RelPath,
1927 1927 b"dir/subdir/x",
1928 1928 Path::new(""),
1929 1929 )])
1930 1930 .unwrap(),
1931 1931 );
1932 1932 let m2 = Box::new(
1933 1933 IncludeMatcher::new(vec![IgnorePattern::new(
1934 1934 PatternSyntax::RelPath,
1935 1935 b"dir/subdir",
1936 1936 Path::new(""),
1937 1937 )])
1938 1938 .unwrap(),
1939 1939 );
1940 1940 let matcher = IntersectionMatcher::new(m1, m2);
1941 1941
1942 1942 let mut set = HashSet::new();
1943 1943 set.insert(HgPathBuf::from_bytes(b"dir"));
1944 1944 assert_eq!(
1945 1945 matcher.visit_children_set(HgPath::new(b"")),
1946 1946 VisitChildrenSet::Set(set)
1947 1947 );
1948 1948
1949 1949 let mut set = HashSet::new();
1950 1950 set.insert(HgPathBuf::from_bytes(b"subdir"));
1951 1951 assert_eq!(
1952 1952 matcher.visit_children_set(HgPath::new(b"dir")),
1953 1953 VisitChildrenSet::Set(set)
1954 1954 );
1955 1955 let mut set = HashSet::new();
1956 1956 set.insert(HgPathBuf::from_bytes(b"x"));
1957 1957 assert_eq!(
1958 1958 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
1959 1959 VisitChildrenSet::Set(set)
1960 1960 );
1961 1961 assert_eq!(
1962 1962 matcher.visit_children_set(HgPath::new(b"dir/foo")),
1963 1963 VisitChildrenSet::Empty
1964 1964 );
1965 1965 assert_eq!(
1966 1966 matcher.visit_children_set(HgPath::new(b"folder")),
1967 1967 VisitChildrenSet::Empty
1968 1968 );
1969 1969 assert_eq!(
1970 1970 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
1971 1971 VisitChildrenSet::Empty
1972 1972 );
1973 1973 // OPT: this should probably be 'all' not 'this'.
1974 1974 assert_eq!(
1975 1975 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
1976 1976 VisitChildrenSet::This
1977 1977 );
1978 1978
1979 1979 // Diverging paths
1980 1980 let m1 = Box::new(
1981 1981 IncludeMatcher::new(vec![IgnorePattern::new(
1982 1982 PatternSyntax::RelPath,
1983 1983 b"dir/subdir/x",
1984 1984 Path::new(""),
1985 1985 )])
1986 1986 .unwrap(),
1987 1987 );
1988 1988 let m2 = Box::new(
1989 1989 IncludeMatcher::new(vec![IgnorePattern::new(
1990 1990 PatternSyntax::RelPath,
1991 1991 b"dir/subdir/z",
1992 1992 Path::new(""),
1993 1993 )])
1994 1994 .unwrap(),
1995 1995 );
1996 1996 let matcher = IntersectionMatcher::new(m1, m2);
1997 1997
1998 1998 // OPT: these next two could probably be Empty as well.
1999 1999 let mut set = HashSet::new();
2000 2000 set.insert(HgPathBuf::from_bytes(b"dir"));
2001 2001 assert_eq!(
2002 2002 matcher.visit_children_set(HgPath::new(b"")),
2003 2003 VisitChildrenSet::Set(set)
2004 2004 );
2005 2005 // OPT: these next two could probably be Empty as well.
2006 2006 let mut set = HashSet::new();
2007 2007 set.insert(HgPathBuf::from_bytes(b"subdir"));
2008 2008 assert_eq!(
2009 2009 matcher.visit_children_set(HgPath::new(b"dir")),
2010 2010 VisitChildrenSet::Set(set)
2011 2011 );
2012 2012 assert_eq!(
2013 2013 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
2014 2014 VisitChildrenSet::Empty
2015 2015 );
2016 2016 assert_eq!(
2017 2017 matcher.visit_children_set(HgPath::new(b"dir/foo")),
2018 2018 VisitChildrenSet::Empty
2019 2019 );
2020 2020 assert_eq!(
2021 2021 matcher.visit_children_set(HgPath::new(b"folder")),
2022 2022 VisitChildrenSet::Empty
2023 2023 );
2024 2024 assert_eq!(
2025 2025 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
2026 2026 VisitChildrenSet::Empty
2027 2027 );
2028 2028 assert_eq!(
2029 2029 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
2030 2030 VisitChildrenSet::Empty
2031 2031 );
2032 2032 }
2033 2033
2034 2034 #[test]
2035 2035 fn test_differencematcher() {
2036 2036 // Two alwaysmatchers should function like a nevermatcher
2037 2037 let m1 = AlwaysMatcher;
2038 2038 let m2 = AlwaysMatcher;
2039 2039 let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2));
2040 2040
2041 2041 for case in &[
2042 2042 &b""[..],
2043 2043 b"dir",
2044 2044 b"dir/subdir",
2045 2045 b"dir/subdir/z",
2046 2046 b"dir/foo",
2047 2047 b"dir/subdir/x",
2048 2048 b"folder",
2049 2049 ] {
2050 2050 assert_eq!(
2051 2051 matcher.visit_children_set(HgPath::new(case)),
2052 2052 VisitChildrenSet::Empty
2053 2053 );
2054 2054 }
2055 2055
2056 2056 // One always and one never should behave the same as an always
2057 2057 let m1 = AlwaysMatcher;
2058 2058 let m2 = NeverMatcher;
2059 2059 let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2));
2060 2060
2061 2061 for case in &[
2062 2062 &b""[..],
2063 2063 b"dir",
2064 2064 b"dir/subdir",
2065 2065 b"dir/subdir/z",
2066 2066 b"dir/foo",
2067 2067 b"dir/subdir/x",
2068 2068 b"folder",
2069 2069 ] {
2070 2070 assert_eq!(
2071 2071 matcher.visit_children_set(HgPath::new(case)),
2072 2072 VisitChildrenSet::Recursive
2073 2073 );
2074 2074 }
2075 2075
2076 2076 // Two include matchers
2077 2077 let m1 = Box::new(
2078 2078 IncludeMatcher::new(vec![IgnorePattern::new(
2079 2079 PatternSyntax::RelPath,
2080 2080 b"dir/subdir",
2081 2081 Path::new("/repo"),
2082 2082 )])
2083 2083 .unwrap(),
2084 2084 );
2085 2085 let m2 = Box::new(
2086 2086 IncludeMatcher::new(vec![IgnorePattern::new(
2087 PatternSyntax::RootFiles,
2087 PatternSyntax::RootFilesIn,
2088 2088 b"dir",
2089 2089 Path::new("/repo"),
2090 2090 )])
2091 2091 .unwrap(),
2092 2092 );
2093 2093
2094 2094 let matcher = DifferenceMatcher::new(m1, m2);
2095 2095
2096 2096 let mut set = HashSet::new();
2097 2097 set.insert(HgPathBuf::from_bytes(b"dir"));
2098 2098 assert_eq!(
2099 2099 matcher.visit_children_set(HgPath::new(b"")),
2100 2100 VisitChildrenSet::Set(set)
2101 2101 );
2102 2102
2103 2103 let mut set = HashSet::new();
2104 2104 set.insert(HgPathBuf::from_bytes(b"subdir"));
2105 2105 assert_eq!(
2106 2106 matcher.visit_children_set(HgPath::new(b"dir")),
2107 2107 VisitChildrenSet::Set(set)
2108 2108 );
2109 2109 assert_eq!(
2110 2110 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
2111 2111 VisitChildrenSet::Recursive
2112 2112 );
2113 2113 assert_eq!(
2114 2114 matcher.visit_children_set(HgPath::new(b"dir/foo")),
2115 2115 VisitChildrenSet::Empty
2116 2116 );
2117 2117 assert_eq!(
2118 2118 matcher.visit_children_set(HgPath::new(b"folder")),
2119 2119 VisitChildrenSet::Empty
2120 2120 );
2121 2121 assert_eq!(
2122 2122 matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
2123 2123 VisitChildrenSet::This
2124 2124 );
2125 2125 assert_eq!(
2126 2126 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
2127 2127 VisitChildrenSet::This
2128 2128 );
2129 2129 }
2130 2130
2131 2131 mod invariants {
2132 2132 pub mod visit_children_set {
2133 2133
2134 2134 use crate::{
2135 2135 matchers::{tests::Tree, Matcher, VisitChildrenSet},
2136 2136 utils::hg_path::HgPath,
2137 2137 };
2138 2138
2139 2139 #[allow(dead_code)]
2140 2140 #[derive(Debug)]
2141 2141 struct Error<'a, M> {
2142 2142 matcher: &'a M,
2143 2143 path: &'a HgPath,
2144 2144 matching: &'a Tree,
2145 2145 visit_children_set: &'a VisitChildrenSet,
2146 2146 }
2147 2147
2148 2148 fn holds(matching: &Tree, vcs: &VisitChildrenSet) -> bool {
2149 2149 match vcs {
2150 2150 VisitChildrenSet::Empty => matching.is_empty(),
2151 2151 VisitChildrenSet::This => {
2152 2152 // `This` does not come with any obligations.
2153 2153 true
2154 2154 }
2155 2155 VisitChildrenSet::Recursive => {
2156 2156 // `Recursive` does not come with any correctness
2157 2157 // obligations.
2158 2158 // It instructs the caller to stop calling
2159 2159 // `visit_children_set` for all
2160 2160 // descendants, so may have negative performance
2161 2161 // implications, but we're not testing against that
2162 2162 // here.
2163 2163 true
2164 2164 }
2165 2165 VisitChildrenSet::Set(allowed_children) => {
2166 2166 // `allowed_children` does not distinguish between
2167 2167 // files and directories: if it's not included, it
2168 2168 // must not be matched.
2169 2169 for k in matching.dirs.keys() {
2170 2170 if !(allowed_children.contains(k)) {
2171 2171 return false;
2172 2172 }
2173 2173 }
2174 2174 for k in matching.files.iter() {
2175 2175 if !(allowed_children.contains(k)) {
2176 2176 return false;
2177 2177 }
2178 2178 }
2179 2179 true
2180 2180 }
2181 2181 }
2182 2182 }
2183 2183
2184 2184 pub fn check<M: Matcher + std::fmt::Debug>(
2185 2185 matcher: &M,
2186 2186 path: &HgPath,
2187 2187 matching: &Tree,
2188 2188 visit_children_set: &VisitChildrenSet,
2189 2189 ) {
2190 2190 if !holds(matching, visit_children_set) {
2191 2191 panic!(
2192 2192 "{:#?}",
2193 2193 Error {
2194 2194 matcher,
2195 2195 path,
2196 2196 visit_children_set,
2197 2197 matching
2198 2198 }
2199 2199 )
2200 2200 }
2201 2201 }
2202 2202 }
2203 2203 }
2204 2204
2205 2205 #[derive(Debug, Clone)]
2206 2206 pub struct Tree {
2207 2207 files: BTreeSet<HgPathBuf>,
2208 2208 dirs: BTreeMap<HgPathBuf, Tree>,
2209 2209 }
2210 2210
2211 2211 impl Tree {
2212 2212 fn len(&self) -> usize {
2213 2213 let mut n = 0;
2214 2214 n += self.files.len();
2215 2215 for d in self.dirs.values() {
2216 2216 n += d.len();
2217 2217 }
2218 2218 n
2219 2219 }
2220 2220
2221 2221 fn is_empty(&self) -> bool {
2222 2222 self.files.is_empty() && self.dirs.is_empty()
2223 2223 }
2224 2224
2225 2225 fn filter_and_check<M: Matcher + Debug>(
2226 2226 &self,
2227 2227 m: &M,
2228 2228 path: &HgPath,
2229 2229 ) -> Self {
2230 2230 let files: BTreeSet<HgPathBuf> = self
2231 2231 .files
2232 2232 .iter()
2233 2233 .filter(|v| m.matches(&path.join(v)))
2234 2234 .map(|f| f.to_owned())
2235 2235 .collect();
2236 2236 let dirs: BTreeMap<HgPathBuf, Tree> = self
2237 2237 .dirs
2238 2238 .iter()
2239 2239 .filter_map(|(k, v)| {
2240 2240 let path = path.join(k);
2241 2241 let v = v.filter_and_check(m, &path);
2242 2242 if v.is_empty() {
2243 2243 None
2244 2244 } else {
2245 2245 Some((k.to_owned(), v))
2246 2246 }
2247 2247 })
2248 2248 .collect();
2249 2249 let matching = Self { files, dirs };
2250 2250 let vcs = m.visit_children_set(path);
2251 2251 invariants::visit_children_set::check(m, path, &matching, &vcs);
2252 2252 matching
2253 2253 }
2254 2254
2255 2255 fn check_matcher<M: Matcher + Debug>(
2256 2256 &self,
2257 2257 m: &M,
2258 2258 expect_count: usize,
2259 2259 ) {
2260 2260 let res = self.filter_and_check(m, &HgPathBuf::new());
2261 2261 if expect_count != res.len() {
2262 2262 eprintln!(
2263 2263 "warning: expected {} matches, got {} for {:#?}",
2264 2264 expect_count,
2265 2265 res.len(),
2266 2266 m
2267 2267 );
2268 2268 }
2269 2269 }
2270 2270 }
2271 2271
2272 2272 fn mkdir(children: &[(&[u8], &Tree)]) -> Tree {
2273 2273 let p = HgPathBuf::from_bytes;
2274 2274 let names = [
2275 2275 p(b"a"),
2276 2276 p(b"b.txt"),
2277 2277 p(b"file.txt"),
2278 2278 p(b"c.c"),
2279 2279 p(b"c.h"),
2280 2280 p(b"dir1"),
2281 2281 p(b"dir2"),
2282 2282 p(b"subdir"),
2283 2283 ];
2284 2284 let files: BTreeSet<HgPathBuf> = BTreeSet::from(names);
2285 2285 let dirs = children
2286 2286 .iter()
2287 2287 .map(|(name, t)| (p(name), (*t).clone()))
2288 2288 .collect();
2289 2289 Tree { files, dirs }
2290 2290 }
2291 2291
2292 2292 fn make_example_tree() -> Tree {
2293 2293 let leaf = mkdir(&[]);
2294 2294 let abc = mkdir(&[(b"d", &leaf)]);
2295 2295 let ab = mkdir(&[(b"c", &abc)]);
2296 2296 let a = mkdir(&[(b"b", &ab)]);
2297 2297 let dir = mkdir(&[(b"subdir", &leaf), (b"subdir.c", &leaf)]);
2298 2298 mkdir(&[(b"dir", &dir), (b"dir1", &dir), (b"dir2", &dir), (b"a", &a)])
2299 2299 }
2300 2300
2301 2301 #[test]
2302 2302 fn test_pattern_matcher_visit_children_set() {
2303 2303 let tree = make_example_tree();
2304 2304 let pattern_dir1_glob_c =
2305 2305 PatternMatcher::new(vec![IgnorePattern::new(
2306 2306 PatternSyntax::Glob,
2307 2307 b"dir1/*.c",
2308 2308 Path::new(""),
2309 2309 )])
2310 2310 .unwrap();
2311 2311 let pattern_dir1 = || {
2312 2312 PatternMatcher::new(vec![IgnorePattern::new(
2313 2313 PatternSyntax::Path,
2314 2314 b"dir1",
2315 2315 Path::new(""),
2316 2316 )])
2317 2317 .unwrap()
2318 2318 };
2319 2319 let pattern_dir1_a = PatternMatcher::new(vec![IgnorePattern::new(
2320 2320 PatternSyntax::Glob,
2321 2321 b"dir1/a",
2322 2322 Path::new(""),
2323 2323 )])
2324 2324 .unwrap();
2325 2325 let pattern_relglob_c = || {
2326 2326 PatternMatcher::new(vec![IgnorePattern::new(
2327 2327 PatternSyntax::RelGlob,
2328 2328 b"*.c",
2329 2329 Path::new(""),
2330 2330 )])
2331 2331 .unwrap()
2332 2332 };
2333 2333 let files = vec![HgPathBuf::from_bytes(b"dir/subdir/b.txt")];
2334 2334 let file_dir_subdir_b = FileMatcher::new(files).unwrap();
2335 2335
2336 2336 let files = vec![
2337 2337 HgPathBuf::from_bytes(b"file.txt"),
2338 2338 HgPathBuf::from_bytes(b"a/file.txt"),
2339 2339 HgPathBuf::from_bytes(b"a/b/file.txt"),
2340 2340 // No file in a/b/c
2341 2341 HgPathBuf::from_bytes(b"a/b/c/d/file.txt"),
2342 2342 ];
2343 2343 let file_abcdfile = FileMatcher::new(files).unwrap();
2344 2344 let _rootfilesin_dir = PatternMatcher::new(vec![IgnorePattern::new(
2345 PatternSyntax::RootFiles,
2345 PatternSyntax::RootFilesIn,
2346 2346 b"dir",
2347 2347 Path::new(""),
2348 2348 )])
2349 2349 .unwrap();
2350 2350
2351 2351 let pattern_filepath_dir_subdir =
2352 2352 PatternMatcher::new(vec![IgnorePattern::new(
2353 2353 PatternSyntax::FilePath,
2354 2354 b"dir/subdir",
2355 2355 Path::new(""),
2356 2356 )])
2357 2357 .unwrap();
2358 2358
2359 2359 let include_dir_subdir =
2360 2360 IncludeMatcher::new(vec![IgnorePattern::new(
2361 2361 PatternSyntax::RelPath,
2362 2362 b"dir/subdir",
2363 2363 Path::new(""),
2364 2364 )])
2365 2365 .unwrap();
2366 2366
2367 2367 let more_includematchers = [
2368 2368 IncludeMatcher::new(vec![IgnorePattern::new(
2369 2369 PatternSyntax::Glob,
2370 2370 b"dir/s*",
2371 2371 Path::new(""),
2372 2372 )])
2373 2373 .unwrap(),
2374 2374 // Test multiple patterns
2375 2375 IncludeMatcher::new(vec![
2376 2376 IgnorePattern::new(
2377 2377 PatternSyntax::RelPath,
2378 2378 b"dir",
2379 2379 Path::new(""),
2380 2380 ),
2381 2381 IgnorePattern::new(PatternSyntax::Glob, b"s*", Path::new("")),
2382 2382 ])
2383 2383 .unwrap(),
2384 2384 // Test multiple patterns
2385 2385 IncludeMatcher::new(vec![IgnorePattern::new(
2386 2386 PatternSyntax::Glob,
2387 2387 b"**/*.c",
2388 2388 Path::new(""),
2389 2389 )])
2390 2390 .unwrap(),
2391 2391 ];
2392 2392
2393 2393 tree.check_matcher(&pattern_dir1(), 25);
2394 2394 tree.check_matcher(&pattern_dir1_a, 1);
2395 2395 tree.check_matcher(&pattern_dir1_glob_c, 2);
2396 2396 tree.check_matcher(&pattern_relglob_c(), 14);
2397 2397 tree.check_matcher(&AlwaysMatcher, 112);
2398 2398 tree.check_matcher(&NeverMatcher, 0);
2399 2399 tree.check_matcher(
2400 2400 &IntersectionMatcher::new(
2401 2401 Box::new(pattern_relglob_c()),
2402 2402 Box::new(pattern_dir1()),
2403 2403 ),
2404 2404 3,
2405 2405 );
2406 2406 tree.check_matcher(
2407 2407 &UnionMatcher::new(vec![
2408 2408 Box::new(pattern_relglob_c()),
2409 2409 Box::new(pattern_dir1()),
2410 2410 ]),
2411 2411 36,
2412 2412 );
2413 2413 tree.check_matcher(
2414 2414 &DifferenceMatcher::new(
2415 2415 Box::new(pattern_relglob_c()),
2416 2416 Box::new(pattern_dir1()),
2417 2417 ),
2418 2418 11,
2419 2419 );
2420 2420 tree.check_matcher(&file_dir_subdir_b, 1);
2421 2421 tree.check_matcher(&file_abcdfile, 4);
2422 2422 // // TODO: re-enable this test when the corresponding bug is
2423 2423 // fixed
2424 2424 //
2425 2425 // if false {
2426 2426 // tree.check_matcher(&rootfilesin_dir, 6);
2427 2427 // }
2428 2428 tree.check_matcher(&pattern_filepath_dir_subdir, 1);
2429 2429 tree.check_matcher(&include_dir_subdir, 9);
2430 2430 tree.check_matcher(&more_includematchers[0], 17);
2431 2431 tree.check_matcher(&more_includematchers[1], 25);
2432 2432 tree.check_matcher(&more_includematchers[2], 35);
2433 2433 }
2434 2434 }
General Comments 0
You need to be logged in to leave comments. Login now