##// END OF EJS Templates
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
Raphaël Gomès -
r45311:e0414fcd default
parent child Browse files
Show More
@@ -1,665 +1,660 b''
1 1 // filepatterns.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Handling of Mercurial-specific patterns.
9 9
10 10 use crate::{
11 11 utils::{
12 12 files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
13 13 hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
14 14 SliceExt,
15 15 },
16 16 FastHashMap, PatternError,
17 17 };
18 18 use lazy_static::lazy_static;
19 19 use regex::bytes::{NoExpand, Regex};
20 20 use std::fs::File;
21 21 use std::io::Read;
22 22 use std::ops::Deref;
23 23 use std::path::{Path, PathBuf};
24 24 use std::vec::Vec;
25 25
26 26 lazy_static! {
27 27 static ref RE_ESCAPE: Vec<Vec<u8>> = {
28 28 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
29 29 let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
30 30 for byte in to_escape {
31 31 v[*byte as usize].insert(0, b'\\');
32 32 }
33 33 v
34 34 };
35 35 }
36 36
37 37 /// These are matched in order
38 38 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
39 39 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
40 40
41 41 /// Appended to the regexp of globs
42 42 const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";
43 43
44 44 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
45 45 pub enum PatternSyntax {
46 46 /// A regular expression
47 47 Regexp,
48 48 /// Glob that matches at the front of the path
49 49 RootGlob,
50 50 /// Glob that matches at any suffix of the path (still anchored at
51 51 /// slashes)
52 52 Glob,
53 53 /// a path relative to repository root, which is matched recursively
54 54 Path,
55 55 /// A path relative to cwd
56 56 RelPath,
57 57 /// an unrooted glob (*.rs matches Rust files in all dirs)
58 58 RelGlob,
59 59 /// A regexp that needn't match the start of a name
60 60 RelRegexp,
61 61 /// A path relative to repository root, which is matched non-recursively
62 62 /// (will not match subdirectories)
63 63 RootFiles,
64 64 /// A file of patterns to read and include
65 65 Include,
66 66 /// A file of patterns to match against files under the same directory
67 67 SubInclude,
68 68 }
69 69
70 70 /// Transforms a glob pattern into a regex
71 71 fn glob_to_re(pat: &[u8]) -> Vec<u8> {
72 72 let mut input = pat;
73 73 let mut res: Vec<u8> = vec![];
74 74 let mut group_depth = 0;
75 75
76 76 while let Some((c, rest)) = input.split_first() {
77 77 input = rest;
78 78
79 79 match c {
80 80 b'*' => {
81 81 for (source, repl) in GLOB_REPLACEMENTS {
82 82 if let Some(rest) = input.drop_prefix(source) {
83 83 input = rest;
84 84 res.extend(*repl);
85 85 break;
86 86 }
87 87 }
88 88 }
89 89 b'?' => res.extend(b"."),
90 90 b'[' => {
91 91 match input.iter().skip(1).position(|b| *b == b']') {
92 92 None => res.extend(b"\\["),
93 93 Some(end) => {
94 94 // Account for the one we skipped
95 95 let end = end + 1;
96 96
97 97 res.extend(b"[");
98 98
99 99 for (i, b) in input[..end].iter().enumerate() {
100 100 if *b == b'!' && i == 0 {
101 101 res.extend(b"^")
102 102 } else if *b == b'^' && i == 0 {
103 103 res.extend(b"\\^")
104 104 } else if *b == b'\\' {
105 105 res.extend(b"\\\\")
106 106 } else {
107 107 res.push(*b)
108 108 }
109 109 }
110 110 res.extend(b"]");
111 111 input = &input[end + 1..];
112 112 }
113 113 }
114 114 }
115 115 b'{' => {
116 116 group_depth += 1;
117 117 res.extend(b"(?:")
118 118 }
119 119 b'}' if group_depth > 0 => {
120 120 group_depth -= 1;
121 121 res.extend(b")");
122 122 }
123 123 b',' if group_depth > 0 => res.extend(b"|"),
124 124 b'\\' => {
125 125 let c = {
126 126 if let Some((c, rest)) = input.split_first() {
127 127 input = rest;
128 128 c
129 129 } else {
130 130 c
131 131 }
132 132 };
133 133 res.extend(&RE_ESCAPE[*c as usize])
134 134 }
135 135 _ => res.extend(&RE_ESCAPE[*c as usize]),
136 136 }
137 137 }
138 138 res
139 139 }
140 140
141 141 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
142 142 pattern
143 143 .iter()
144 144 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
145 145 .collect()
146 146 }
147 147
148 148 pub fn parse_pattern_syntax(
149 149 kind: &[u8],
150 150 ) -> Result<PatternSyntax, PatternError> {
151 151 match kind {
152 152 b"re:" => Ok(PatternSyntax::Regexp),
153 153 b"path:" => Ok(PatternSyntax::Path),
154 154 b"relpath:" => Ok(PatternSyntax::RelPath),
155 155 b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
156 156 b"relglob:" => Ok(PatternSyntax::RelGlob),
157 157 b"relre:" => Ok(PatternSyntax::RelRegexp),
158 158 b"glob:" => Ok(PatternSyntax::Glob),
159 159 b"rootglob:" => Ok(PatternSyntax::RootGlob),
160 160 b"include:" => Ok(PatternSyntax::Include),
161 161 b"subinclude:" => Ok(PatternSyntax::SubInclude),
162 162 _ => Err(PatternError::UnsupportedSyntax(
163 163 String::from_utf8_lossy(kind).to_string(),
164 164 )),
165 165 }
166 166 }
167 167
168 168 /// Builds the regex that corresponds to the given pattern.
169 169 /// If within a `syntax: regexp` context, returns the pattern,
170 170 /// otherwise, returns the corresponding regex.
171 171 fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> {
172 172 let IgnorePattern {
173 173 syntax, pattern, ..
174 174 } = entry;
175 175 if pattern.is_empty() {
176 176 return vec![];
177 177 }
178 178 match syntax {
179 179 // The `regex` crate adds `.*` to the start and end of expressions
180 180 // if there are no anchors, so add them.
181 181 PatternSyntax::Regexp => [b"^", &pattern[..], b"$"].concat(),
182 182 PatternSyntax::RelRegexp => {
183 183 // The `regex` crate accepts `**` while `re2` and Python's `re`
184 184 // do not. Checking for `*` correctly triggers the same error all
185 185 // engines.
186 186 if pattern[0] == b'^' || pattern[0] == b'*' {
187 187 return pattern.to_owned();
188 188 }
189 189 [&b".*"[..], pattern].concat()
190 190 }
191 191 PatternSyntax::Path | PatternSyntax::RelPath => {
192 192 if pattern == b"." {
193 193 return vec![];
194 194 }
195 195 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
196 196 }
197 197 PatternSyntax::RootFiles => {
198 198 let mut res = if pattern == b"." {
199 199 vec![b'^']
200 200 } else {
201 201 // Pattern is a directory name.
202 202 [b"^", escape_pattern(pattern).as_slice(), b"/"].concat()
203 203 };
204 204
205 205 // Anything after the pattern must be a non-directory.
206 206 res.extend(b"[^/]+$");
207 207 res.push(b'$');
208 208 res
209 209 }
210 210 PatternSyntax::RelGlob => {
211 211 let glob_re = glob_to_re(pattern);
212 212 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
213 213 [b".*", rest, GLOB_SUFFIX].concat()
214 214 } else {
215 215 [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat()
216 216 }
217 217 }
218 218 PatternSyntax::Glob | PatternSyntax::RootGlob => {
219 219 [b"^", glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
220 220 }
221 221 PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(),
222 222 }
223 223 }
224 224
225 225 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
226 226 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
227 227
228 228 /// TODO support other platforms
229 229 #[cfg(unix)]
230 230 pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
231 231 if bytes.is_empty() {
232 232 return b".".to_vec();
233 233 }
234 234 let sep = b'/';
235 235
236 236 let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
237 237 if initial_slashes > 2 {
238 238 // POSIX allows one or two initial slashes, but treats three or more
239 239 // as single slash.
240 240 initial_slashes = 1;
241 241 }
242 242 let components = bytes
243 243 .split(|b| *b == sep)
244 244 .filter(|c| !(c.is_empty() || c == b"."))
245 245 .fold(vec![], |mut acc, component| {
246 246 if component != b".."
247 247 || (initial_slashes == 0 && acc.is_empty())
248 248 || (!acc.is_empty() && acc[acc.len() - 1] == b"..")
249 249 {
250 250 acc.push(component)
251 251 } else if !acc.is_empty() {
252 252 acc.pop();
253 253 }
254 254 acc
255 255 });
256 256 let mut new_bytes = components.join(&sep);
257 257
258 258 if initial_slashes > 0 {
259 259 let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
260 260 buf.extend(new_bytes);
261 261 new_bytes = buf;
262 262 }
263 263 if new_bytes.is_empty() {
264 264 b".".to_vec()
265 265 } else {
266 266 new_bytes
267 267 }
268 268 }
269 269
270 270 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
271 271 /// that don't need to be transformed into a regex.
272 272 pub fn build_single_regex(
273 273 entry: &IgnorePattern,
274 ) -> Result<Vec<u8>, PatternError> {
274 ) -> Result<Option<Vec<u8>>, PatternError> {
275 275 let IgnorePattern {
276 276 pattern, syntax, ..
277 277 } = entry;
278 278 let pattern = match syntax {
279 279 PatternSyntax::RootGlob
280 280 | PatternSyntax::Path
281 281 | PatternSyntax::RelGlob
282 282 | PatternSyntax::RootFiles => normalize_path_bytes(&pattern),
283 283 PatternSyntax::Include | PatternSyntax::SubInclude => {
284 284 return Err(PatternError::NonRegexPattern(entry.clone()))
285 285 }
286 286 _ => pattern.to_owned(),
287 287 };
288 288 if *syntax == PatternSyntax::RootGlob
289 289 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
290 290 {
291 // The `regex` crate adds `.*` to the start and end of expressions
292 // if there are no anchors, so add the start anchor.
293 let mut escaped = vec![b'^'];
294 escaped.extend(escape_pattern(&pattern));
295 escaped.extend(GLOB_SUFFIX);
296 Ok(escaped)
291 Ok(None)
297 292 } else {
298 293 let mut entry = entry.clone();
299 294 entry.pattern = pattern;
300 Ok(_build_single_regex(&entry))
295 Ok(Some(_build_single_regex(&entry)))
301 296 }
302 297 }
303 298
304 299 lazy_static! {
305 300 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
306 301 let mut m = FastHashMap::default();
307 302
308 303 m.insert(b"re".as_ref(), b"relre:".as_ref());
309 304 m.insert(b"regexp".as_ref(), b"relre:".as_ref());
310 305 m.insert(b"glob".as_ref(), b"relglob:".as_ref());
311 306 m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
312 307 m.insert(b"include".as_ref(), b"include:".as_ref());
313 308 m.insert(b"subinclude".as_ref(), b"subinclude:".as_ref());
314 309 m
315 310 };
316 311 }
317 312
318 313 #[derive(Debug)]
319 314 pub enum PatternFileWarning {
320 315 /// (file path, syntax bytes)
321 316 InvalidSyntax(PathBuf, Vec<u8>),
322 317 /// File path
323 318 NoSuchFile(PathBuf),
324 319 }
325 320
326 321 pub fn parse_pattern_file_contents<P: AsRef<Path>>(
327 322 lines: &[u8],
328 323 file_path: P,
329 324 warn: bool,
330 325 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
331 326 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
332 327 let comment_escape_regex = Regex::new(r"\\#").unwrap();
333 328 let mut inputs: Vec<IgnorePattern> = vec![];
334 329 let mut warnings: Vec<PatternFileWarning> = vec![];
335 330
336 331 let mut current_syntax = b"relre:".as_ref();
337 332
338 333 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
339 334 let line_number = line_number + 1;
340 335
341 336 let line_buf;
342 337 if line.contains(&b'#') {
343 338 if let Some(cap) = comment_regex.captures(line) {
344 339 line = &line[..cap.get(1).unwrap().end()]
345 340 }
346 341 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
347 342 line = &line_buf;
348 343 }
349 344
350 345 let mut line = line.trim_end();
351 346
352 347 if line.is_empty() {
353 348 continue;
354 349 }
355 350
356 351 if let Some(syntax) = line.drop_prefix(b"syntax:") {
357 352 let syntax = syntax.trim();
358 353
359 354 if let Some(rel_syntax) = SYNTAXES.get(syntax) {
360 355 current_syntax = rel_syntax;
361 356 } else if warn {
362 357 warnings.push(PatternFileWarning::InvalidSyntax(
363 358 file_path.as_ref().to_owned(),
364 359 syntax.to_owned(),
365 360 ));
366 361 }
367 362 continue;
368 363 }
369 364
370 365 let mut line_syntax: &[u8] = &current_syntax;
371 366
372 367 for (s, rels) in SYNTAXES.iter() {
373 368 if let Some(rest) = line.drop_prefix(rels) {
374 369 line_syntax = rels;
375 370 line = rest;
376 371 break;
377 372 }
378 373 if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) {
379 374 line_syntax = rels;
380 375 line = rest;
381 376 break;
382 377 }
383 378 }
384 379
385 380 inputs.push(IgnorePattern::new(
386 381 parse_pattern_syntax(&line_syntax).map_err(|e| match e {
387 382 PatternError::UnsupportedSyntax(syntax) => {
388 383 PatternError::UnsupportedSyntaxInFile(
389 384 syntax,
390 385 file_path.as_ref().to_string_lossy().into(),
391 386 line_number,
392 387 )
393 388 }
394 389 _ => e,
395 390 })?,
396 391 &line,
397 392 &file_path,
398 393 ));
399 394 }
400 395 Ok((inputs, warnings))
401 396 }
402 397
403 398 pub fn read_pattern_file<P: AsRef<Path>>(
404 399 file_path: P,
405 400 warn: bool,
406 401 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
407 402 let mut f = match File::open(file_path.as_ref()) {
408 403 Ok(f) => Ok(f),
409 404 Err(e) => match e.kind() {
410 405 std::io::ErrorKind::NotFound => {
411 406 return Ok((
412 407 vec![],
413 408 vec![PatternFileWarning::NoSuchFile(
414 409 file_path.as_ref().to_owned(),
415 410 )],
416 411 ))
417 412 }
418 413 _ => Err(e),
419 414 },
420 415 }?;
421 416 let mut contents = Vec::new();
422 417
423 418 f.read_to_end(&mut contents)?;
424 419
425 420 Ok(parse_pattern_file_contents(&contents, file_path, warn)?)
426 421 }
427 422
428 423 /// Represents an entry in an "ignore" file.
429 424 #[derive(Debug, Eq, PartialEq, Clone)]
430 425 pub struct IgnorePattern {
431 426 pub syntax: PatternSyntax,
432 427 pub pattern: Vec<u8>,
433 428 pub source: PathBuf,
434 429 }
435 430
436 431 impl IgnorePattern {
437 432 pub fn new(
438 433 syntax: PatternSyntax,
439 434 pattern: &[u8],
440 435 source: impl AsRef<Path>,
441 436 ) -> Self {
442 437 Self {
443 438 syntax,
444 439 pattern: pattern.to_owned(),
445 440 source: source.as_ref().to_owned(),
446 441 }
447 442 }
448 443 }
449 444
450 445 pub type PatternResult<T> = Result<T, PatternError>;
451 446
452 447 /// Wrapper for `read_pattern_file` that also recursively expands `include:`
453 448 /// patterns.
454 449 ///
455 450 /// `subinclude:` is not treated as a special pattern here: unraveling them
456 451 /// needs to occur in the "ignore" phase.
457 452 pub fn get_patterns_from_file(
458 453 pattern_file: impl AsRef<Path>,
459 454 root_dir: impl AsRef<Path>,
460 455 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
461 456 let (patterns, mut warnings) = read_pattern_file(&pattern_file, true)?;
462 457 let patterns = patterns
463 458 .into_iter()
464 459 .flat_map(|entry| -> PatternResult<_> {
465 460 let IgnorePattern {
466 461 syntax,
467 462 pattern,
468 463 source: _,
469 464 } = &entry;
470 465 Ok(match syntax {
471 466 PatternSyntax::Include => {
472 467 let inner_include =
473 468 root_dir.as_ref().join(get_path_from_bytes(&pattern));
474 469 let (inner_pats, inner_warnings) = get_patterns_from_file(
475 470 &inner_include,
476 471 root_dir.as_ref(),
477 472 )?;
478 473 warnings.extend(inner_warnings);
479 474 inner_pats
480 475 }
481 476 _ => vec![entry],
482 477 })
483 478 })
484 479 .flatten()
485 480 .collect();
486 481
487 482 Ok((patterns, warnings))
488 483 }
489 484
490 485 /// Holds all the information needed to handle a `subinclude:` pattern.
491 486 pub struct SubInclude {
492 487 /// Will be used for repository (hg) paths that start with this prefix.
493 488 /// It is relative to the current working directory, so comparing against
494 489 /// repository paths is painless.
495 490 pub prefix: HgPathBuf,
496 491 /// The file itself, containing the patterns
497 492 pub path: PathBuf,
498 493 /// Folder in the filesystem where this it applies
499 494 pub root: PathBuf,
500 495 }
501 496
502 497 impl SubInclude {
503 498 pub fn new(
504 499 root_dir: impl AsRef<Path>,
505 500 pattern: &[u8],
506 501 source: impl AsRef<Path>,
507 502 ) -> Result<SubInclude, HgPathError> {
508 503 let normalized_source =
509 504 normalize_path_bytes(&get_bytes_from_path(source));
510 505
511 506 let source_root = get_path_from_bytes(&normalized_source);
512 507 let source_root = source_root.parent().unwrap_or(source_root.deref());
513 508
514 509 let path = source_root.join(get_path_from_bytes(pattern));
515 510 let new_root = path.parent().unwrap_or(path.deref());
516 511
517 512 let prefix = canonical_path(&root_dir, &root_dir, new_root)?;
518 513
519 514 Ok(Self {
520 515 prefix: path_to_hg_path_buf(prefix).and_then(|mut p| {
521 516 if !p.is_empty() {
522 517 p.push(b'/');
523 518 }
524 519 Ok(p)
525 520 })?,
526 521 path: path.to_owned(),
527 522 root: new_root.to_owned(),
528 523 })
529 524 }
530 525 }
531 526
532 527 /// Separate and pre-process subincludes from other patterns for the "ignore"
533 528 /// phase.
534 529 pub fn filter_subincludes(
535 530 ignore_patterns: &[IgnorePattern],
536 531 root_dir: impl AsRef<Path>,
537 532 ) -> Result<(Vec<SubInclude>, Vec<&IgnorePattern>), HgPathError> {
538 533 let mut subincludes = vec![];
539 534 let mut others = vec![];
540 535
541 536 for ignore_pattern in ignore_patterns.iter() {
542 537 let IgnorePattern {
543 538 syntax,
544 539 pattern,
545 540 source,
546 541 } = ignore_pattern;
547 542 if *syntax == PatternSyntax::SubInclude {
548 543 subincludes.push(SubInclude::new(&root_dir, pattern, &source)?);
549 544 } else {
550 545 others.push(ignore_pattern)
551 546 }
552 547 }
553 548 Ok((subincludes, others))
554 549 }
555 550
556 551 #[cfg(test)]
557 552 mod tests {
558 553 use super::*;
559 554 use pretty_assertions::assert_eq;
560 555
561 556 #[test]
562 557 fn escape_pattern_test() {
563 558 let untouched =
564 559 br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
565 560 assert_eq!(escape_pattern(untouched), untouched.to_vec());
566 561 // All escape codes
567 562 assert_eq!(
568 563 escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
569 564 br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
570 565 .to_vec()
571 566 );
572 567 }
573 568
574 569 #[test]
575 570 fn glob_test() {
576 571 assert_eq!(glob_to_re(br#"?"#), br#"."#);
577 572 assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
578 573 assert_eq!(glob_to_re(br#"**"#), br#".*"#);
579 574 assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
580 575 assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
581 576 assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
582 577 assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
583 578 assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
584 579 }
585 580
586 581 #[test]
587 582 fn test_parse_pattern_file_contents() {
588 583 let lines = b"syntax: glob\n*.elc";
589 584
590 585 assert_eq!(
591 586 parse_pattern_file_contents(lines, Path::new("file_path"), false)
592 587 .unwrap()
593 588 .0,
594 589 vec![IgnorePattern::new(
595 590 PatternSyntax::RelGlob,
596 591 b"*.elc",
597 592 Path::new("file_path")
598 593 )],
599 594 );
600 595
601 596 let lines = b"syntax: include\nsyntax: glob";
602 597
603 598 assert_eq!(
604 599 parse_pattern_file_contents(lines, Path::new("file_path"), false)
605 600 .unwrap()
606 601 .0,
607 602 vec![]
608 603 );
609 604 let lines = b"glob:**.o";
610 605 assert_eq!(
611 606 parse_pattern_file_contents(lines, Path::new("file_path"), false)
612 607 .unwrap()
613 608 .0,
614 609 vec![IgnorePattern::new(
615 610 PatternSyntax::RelGlob,
616 611 b"**.o",
617 612 Path::new("file_path")
618 613 )]
619 614 );
620 615 }
621 616
622 617 #[test]
623 618 fn test_build_single_regex() {
624 619 assert_eq!(
625 620 build_single_regex(&IgnorePattern::new(
626 621 PatternSyntax::RelGlob,
627 622 b"rust/target/",
628 623 Path::new("")
629 624 ))
630 625 .unwrap(),
631 br"(?:.*/)?rust/target(?:/|$)".to_vec(),
626 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
632 627 );
633 628 }
634 629
635 630 #[test]
636 631 fn test_build_single_regex_shortcut() {
637 632 assert_eq!(
638 633 build_single_regex(&IgnorePattern::new(
639 634 PatternSyntax::RootGlob,
640 635 b"",
641 636 Path::new("")
642 637 ))
643 638 .unwrap(),
644 br"^\.(?:/|$)".to_vec(),
639 None,
645 640 );
646 641 assert_eq!(
647 642 build_single_regex(&IgnorePattern::new(
648 643 PatternSyntax::RootGlob,
649 644 b"whatever",
650 645 Path::new("")
651 646 ))
652 647 .unwrap(),
653 br"^whatever(?:/|$)".to_vec(),
648 None,
654 649 );
655 650 assert_eq!(
656 651 build_single_regex(&IgnorePattern::new(
657 652 PatternSyntax::RootGlob,
658 653 b"*.o",
659 654 Path::new("")
660 655 ))
661 656 .unwrap(),
662 br"^[^/]*\.o(?:/|$)".to_vec(),
657 Some(br"^[^/]*\.o(?:/|$)".to_vec()),
663 658 );
664 659 }
665 660 }
@@ -1,930 +1,948 b''
1 1 // matchers.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Structs and types for matching files and directories.
9 9
10 10 #[cfg(feature = "with-re2")]
11 11 use crate::re2::Re2;
12 12 use crate::{
13 13 dirstate::dirs_multiset::DirsChildrenMultiset,
14 14 filepatterns::{
15 15 build_single_regex, filter_subincludes, get_patterns_from_file,
16 16 PatternFileWarning, PatternResult, SubInclude,
17 17 },
18 18 utils::{
19 19 files::find_dirs,
20 20 hg_path::{HgPath, HgPathBuf},
21 21 Escaped,
22 22 },
23 23 DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
24 24 PatternSyntax,
25 25 };
26 26
27 use crate::filepatterns::normalize_path_bytes;
27 28 use std::borrow::ToOwned;
28 29 use std::collections::HashSet;
29 30 use std::fmt::{Display, Error, Formatter};
30 31 use std::iter::FromIterator;
31 32 use std::ops::Deref;
32 33 use std::path::{Path, PathBuf};
33 34
34 35 use micro_timer::timed;
35 36
36 37 #[derive(Debug, PartialEq)]
37 38 pub enum VisitChildrenSet<'a> {
38 39 /// Don't visit anything
39 40 Empty,
40 41 /// Only visit this directory
41 42 This,
42 43 /// Visit this directory and these subdirectories
43 44 /// TODO Should we implement a `NonEmptyHashSet`?
44 45 Set(HashSet<&'a HgPath>),
45 46 /// Visit this directory and all subdirectories
46 47 Recursive,
47 48 }
48 49
49 50 pub trait Matcher {
50 51 /// Explicitly listed files
51 52 fn file_set(&self) -> Option<&HashSet<&HgPath>>;
52 53 /// Returns whether `filename` is in `file_set`
53 54 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool;
54 55 /// Returns whether `filename` is matched by this matcher
55 56 fn matches(&self, filename: impl AsRef<HgPath>) -> bool;
56 57 /// Decides whether a directory should be visited based on whether it
57 58 /// has potential matches in it or one of its subdirectories, and
58 59 /// potentially lists which subdirectories of that directory should be
59 60 /// visited. This is based on the match's primary, included, and excluded
60 61 /// patterns.
61 62 ///
62 63 /// # Example
63 64 ///
64 65 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
65 66 /// return the following values (assuming the implementation of
66 67 /// visit_children_set is capable of recognizing this; some implementations
67 68 /// are not).
68 69 ///
69 70 /// ```text
70 71 /// ```ignore
71 72 /// '' -> {'foo', 'qux'}
72 73 /// 'baz' -> set()
73 74 /// 'foo' -> {'bar'}
74 75 /// // Ideally this would be `Recursive`, but since the prefix nature of
75 76 /// // matchers is applied to the entire matcher, we have to downgrade this
76 77 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
77 78 /// // `RootFilesIn'-kind matcher being mixed in.
78 79 /// 'foo/bar' -> 'this'
79 80 /// 'qux' -> 'this'
80 81 /// ```
81 82 /// # Important
82 83 ///
83 84 /// Most matchers do not know if they're representing files or
84 85 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
85 86 /// file or a directory, so `visit_children_set('dir')` for most matchers
86 87 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
87 88 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
88 89 /// it may return `VisitChildrenSet::This`.
89 90 /// Do not rely on the return being a `HashSet` indicating that there are
90 91 /// no files in this dir to investigate (or equivalently that if there are
91 92 /// files to investigate in 'dir' that it will always return
92 93 /// `VisitChildrenSet::This`).
93 94 fn visit_children_set(
94 95 &self,
95 96 directory: impl AsRef<HgPath>,
96 97 ) -> VisitChildrenSet;
97 98 /// Matcher will match everything and `files_set()` will be empty:
98 99 /// optimization might be possible.
99 100 fn matches_everything(&self) -> bool;
100 101 /// Matcher will match exactly the files in `files_set()`: optimization
101 102 /// might be possible.
102 103 fn is_exact(&self) -> bool;
103 104 }
104 105
105 106 /// Matches everything.
106 107 ///```
107 108 /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
108 109 ///
109 110 /// let matcher = AlwaysMatcher;
110 111 ///
111 112 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
112 113 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
113 114 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
114 115 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
115 116 /// ```
116 117 #[derive(Debug)]
117 118 pub struct AlwaysMatcher;
118 119
119 120 impl Matcher for AlwaysMatcher {
120 121 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
121 122 None
122 123 }
123 124 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
124 125 false
125 126 }
126 127 fn matches(&self, _filename: impl AsRef<HgPath>) -> bool {
127 128 true
128 129 }
129 130 fn visit_children_set(
130 131 &self,
131 132 _directory: impl AsRef<HgPath>,
132 133 ) -> VisitChildrenSet {
133 134 VisitChildrenSet::Recursive
134 135 }
135 136 fn matches_everything(&self) -> bool {
136 137 true
137 138 }
138 139 fn is_exact(&self) -> bool {
139 140 false
140 141 }
141 142 }
142 143
143 144 /// Matches the input files exactly. They are interpreted as paths, not
144 145 /// patterns.
145 146 ///
146 147 ///```
147 148 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::HgPath };
148 149 ///
149 150 /// let files = [HgPath::new(b"a.txt"), HgPath::new(br"re:.*\.c$")];
150 151 /// let matcher = FileMatcher::new(&files).unwrap();
151 152 ///
152 153 /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
153 154 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
154 155 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
155 156 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
156 157 /// ```
157 158 #[derive(Debug)]
158 159 pub struct FileMatcher<'a> {
159 160 files: HashSet<&'a HgPath>,
160 161 dirs: DirsMultiset,
161 162 }
162 163
163 164 impl<'a> FileMatcher<'a> {
164 165 pub fn new(
165 166 files: &'a [impl AsRef<HgPath>],
166 167 ) -> Result<Self, DirstateMapError> {
167 168 Ok(Self {
168 169 files: HashSet::from_iter(files.iter().map(|f| f.as_ref())),
169 170 dirs: DirsMultiset::from_manifest(files)?,
170 171 })
171 172 }
172 173 fn inner_matches(&self, filename: impl AsRef<HgPath>) -> bool {
173 174 self.files.contains(filename.as_ref())
174 175 }
175 176 }
176 177
177 178 impl<'a> Matcher for FileMatcher<'a> {
178 179 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
179 180 Some(&self.files)
180 181 }
181 182 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool {
182 183 self.inner_matches(filename)
183 184 }
184 185 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
185 186 self.inner_matches(filename)
186 187 }
187 188 fn visit_children_set(
188 189 &self,
189 190 directory: impl AsRef<HgPath>,
190 191 ) -> VisitChildrenSet {
191 192 if self.files.is_empty() || !self.dirs.contains(&directory) {
192 193 return VisitChildrenSet::Empty;
193 194 }
194 195 let dirs_as_set = self.dirs.iter().map(|k| k.deref()).collect();
195 196
196 197 let mut candidates: HashSet<&HgPath> =
197 198 self.files.union(&dirs_as_set).map(|k| *k).collect();
198 199 candidates.remove(HgPath::new(b""));
199 200
200 201 if !directory.as_ref().is_empty() {
201 202 let directory = [directory.as_ref().as_bytes(), b"/"].concat();
202 203 candidates = candidates
203 204 .iter()
204 205 .filter_map(|c| {
205 206 if c.as_bytes().starts_with(&directory) {
206 207 Some(HgPath::new(&c.as_bytes()[directory.len()..]))
207 208 } else {
208 209 None
209 210 }
210 211 })
211 212 .collect();
212 213 }
213 214
214 215 // `self.dirs` includes all of the directories, recursively, so if
215 216 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
216 217 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
217 218 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
218 219 // subdir will be in there without a slash.
219 220 VisitChildrenSet::Set(
220 221 candidates
221 222 .iter()
222 223 .filter_map(|c| {
223 224 if c.bytes().all(|b| *b != b'/') {
224 225 Some(*c)
225 226 } else {
226 227 None
227 228 }
228 229 })
229 230 .collect(),
230 231 )
231 232 }
232 233 fn matches_everything(&self) -> bool {
233 234 false
234 235 }
235 236 fn is_exact(&self) -> bool {
236 237 true
237 238 }
238 239 }
239 240
240 241 /// Matches files that are included in the ignore rules.
241 242 #[cfg_attr(
242 243 feature = "with-re2",
243 244 doc = r##"
244 245 ```
245 246 use hg::{
246 247 matchers::{IncludeMatcher, Matcher},
247 248 IgnorePattern,
248 249 PatternSyntax,
249 250 utils::hg_path::HgPath
250 251 };
251 252 use std::path::Path;
252 253 ///
253 254 let ignore_patterns =
254 255 vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
255 256 let (matcher, _) = IncludeMatcher::new(ignore_patterns, "").unwrap();
256 257 ///
257 258 assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
258 259 assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
259 260 assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
260 261 assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
261 262 ```
262 263 "##
263 264 )]
264 265 pub struct IncludeMatcher<'a> {
265 266 patterns: Vec<u8>,
266 267 match_fn: Box<dyn for<'r> Fn(&'r HgPath) -> bool + 'a + Sync>,
267 268 /// Whether all the patterns match a prefix (i.e. recursively)
268 269 prefix: bool,
269 270 roots: HashSet<HgPathBuf>,
270 271 dirs: HashSet<HgPathBuf>,
271 272 parents: HashSet<HgPathBuf>,
272 273 }
273 274
274 275 impl<'a> Matcher for IncludeMatcher<'a> {
275 276 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
276 277 None
277 278 }
278 279
279 280 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
280 281 false
281 282 }
282 283
283 284 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
284 285 (self.match_fn)(filename.as_ref())
285 286 }
286 287
287 288 fn visit_children_set(
288 289 &self,
289 290 directory: impl AsRef<HgPath>,
290 291 ) -> VisitChildrenSet {
291 292 let dir = directory.as_ref();
292 293 if self.prefix && self.roots.contains(dir) {
293 294 return VisitChildrenSet::Recursive;
294 295 }
295 296 if self.roots.contains(HgPath::new(b""))
296 297 || self.roots.contains(dir)
297 298 || self.dirs.contains(dir)
298 299 || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
299 300 {
300 301 return VisitChildrenSet::This;
301 302 }
302 303
303 304 if self.parents.contains(directory.as_ref()) {
304 305 let multiset = self.get_all_parents_children();
305 306 if let Some(children) = multiset.get(dir) {
306 307 return VisitChildrenSet::Set(children.to_owned());
307 308 }
308 309 }
309 310 VisitChildrenSet::Empty
310 311 }
311 312
312 313 fn matches_everything(&self) -> bool {
313 314 false
314 315 }
315 316
316 317 fn is_exact(&self) -> bool {
317 318 false
318 319 }
319 320 }
320 321
321 322 #[cfg(feature = "with-re2")]
322 323 /// Returns a function that matches an `HgPath` against the given regex
323 324 /// pattern.
324 325 ///
325 326 /// This can fail when the pattern is invalid or not supported by the
326 327 /// underlying engine `Re2`, for instance anything with back-references.
327 328 #[timed]
328 329 fn re_matcher(
329 330 pattern: &[u8],
330 331 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
331 332 let regex = Re2::new(pattern);
332 333 let regex = regex.map_err(|e| PatternError::UnsupportedSyntax(e))?;
333 334 Ok(move |path: &HgPath| regex.is_match(path.as_bytes()))
334 335 }
335 336
336 337 #[cfg(not(feature = "with-re2"))]
337 338 /// Returns a function that matches an `HgPath` against the given regex
338 339 /// pattern.
339 340 ///
340 341 /// This can fail when the pattern is invalid or not supported by the
341 342 /// underlying engine (the `regex` crate), for instance anything with
342 343 /// back-references.
343 344 #[timed]
344 345 fn re_matcher(
345 346 pattern: &[u8],
346 347 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
347 348 use std::io::Write;
348 349
349 350 let mut escaped_bytes = vec![];
350 351 for byte in pattern {
351 352 if *byte > 127 {
352 353 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
353 354 } else {
354 355 escaped_bytes.push(*byte);
355 356 }
356 357 }
357 358
358 359 // Avoid the cost of UTF8 checking
359 360 //
360 361 // # Safety
361 362 // This is safe because we escaped all non-ASCII bytes.
362 363 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
363 364 let re = regex::bytes::RegexBuilder::new(&pattern_string)
364 365 .unicode(false)
365 366 .build()
366 367 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
367 368
368 369 Ok(move |path: &HgPath| re.is_match(path.as_bytes()))
369 370 }
370 371
371 372 /// Returns the regex pattern and a function that matches an `HgPath` against
372 373 /// said regex formed by the given ignore patterns.
373 374 fn build_regex_match<'a>(
374 375 ignore_patterns: &'a [&'a IgnorePattern],
375 376 ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)> {
376 let regexps: Result<Vec<_>, PatternError> = ignore_patterns
377 .into_iter()
378 .map(|k| build_single_regex(*k))
379 .collect();
380 let regexps = regexps?;
377 let mut regexps = vec![];
378 let mut exact_set = HashSet::new();
379
380 for pattern in ignore_patterns {
381 if let Some(re) = build_single_regex(pattern)? {
382 regexps.push(re);
383 } else {
384 let exact = normalize_path_bytes(&pattern.pattern);
385 exact_set.insert(HgPathBuf::from_bytes(&exact));
386 }
387 }
388
381 389 let full_regex = regexps.join(&b'|');
382 390
383 let matcher = re_matcher(&full_regex)?;
384 let func = Box::new(move |filename: &HgPath| matcher(filename));
391 // An empty pattern would cause the regex engine to incorrectly match the
392 // (empty) root directory
393 let func = if !(regexps.is_empty()) {
394 let matcher = re_matcher(&full_regex)?;
395 let func = move |filename: &HgPath| {
396 exact_set.contains(filename) || matcher(filename)
397 };
398 Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
399 } else {
400 let func = move |filename: &HgPath| exact_set.contains(filename);
401 Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
402 };
385 403
386 404 Ok((full_regex, func))
387 405 }
388 406
389 407 /// Returns roots and directories corresponding to each pattern.
390 408 ///
391 409 /// This calculates the roots and directories exactly matching the patterns and
392 410 /// returns a tuple of (roots, dirs). It does not return other directories
393 411 /// which may also need to be considered, like the parent directories.
394 412 fn roots_and_dirs(
395 413 ignore_patterns: &[IgnorePattern],
396 414 ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
397 415 let mut roots = Vec::new();
398 416 let mut dirs = Vec::new();
399 417
400 418 for ignore_pattern in ignore_patterns {
401 419 let IgnorePattern {
402 420 syntax, pattern, ..
403 421 } = ignore_pattern;
404 422 match syntax {
405 423 PatternSyntax::RootGlob | PatternSyntax::Glob => {
406 424 let mut root = vec![];
407 425
408 426 for p in pattern.split(|c| *c == b'/') {
409 427 if p.iter().any(|c| match *c {
410 428 b'[' | b'{' | b'*' | b'?' => true,
411 429 _ => false,
412 430 }) {
413 431 break;
414 432 }
415 433 root.push(HgPathBuf::from_bytes(p));
416 434 }
417 435 let buf =
418 436 root.iter().fold(HgPathBuf::new(), |acc, r| acc.join(r));
419 437 roots.push(buf);
420 438 }
421 439 PatternSyntax::Path | PatternSyntax::RelPath => {
422 440 let pat = HgPath::new(if pattern == b"." {
423 441 &[] as &[u8]
424 442 } else {
425 443 pattern
426 444 });
427 445 roots.push(pat.to_owned());
428 446 }
429 447 PatternSyntax::RootFiles => {
430 448 let pat = if pattern == b"." {
431 449 &[] as &[u8]
432 450 } else {
433 451 pattern
434 452 };
435 453 dirs.push(HgPathBuf::from_bytes(pat));
436 454 }
437 455 _ => {
438 456 roots.push(HgPathBuf::new());
439 457 }
440 458 }
441 459 }
442 460 (roots, dirs)
443 461 }
444 462
445 463 /// Paths extracted from patterns
446 464 #[derive(Debug, PartialEq)]
447 465 struct RootsDirsAndParents {
448 466 /// Directories to match recursively
449 467 pub roots: HashSet<HgPathBuf>,
450 468 /// Directories to match non-recursively
451 469 pub dirs: HashSet<HgPathBuf>,
452 470 /// Implicitly required directories to go to items in either roots or dirs
453 471 pub parents: HashSet<HgPathBuf>,
454 472 }
455 473
456 474 /// Extract roots, dirs and parents from patterns.
457 475 fn roots_dirs_and_parents(
458 476 ignore_patterns: &[IgnorePattern],
459 477 ) -> PatternResult<RootsDirsAndParents> {
460 478 let (roots, dirs) = roots_and_dirs(ignore_patterns);
461 479
462 480 let mut parents = HashSet::new();
463 481
464 482 parents.extend(
465 483 DirsMultiset::from_manifest(&dirs)
466 484 .map_err(|e| match e {
467 485 DirstateMapError::InvalidPath(e) => e,
468 486 _ => unreachable!(),
469 487 })?
470 488 .iter()
471 489 .map(|k| k.to_owned()),
472 490 );
473 491 parents.extend(
474 492 DirsMultiset::from_manifest(&roots)
475 493 .map_err(|e| match e {
476 494 DirstateMapError::InvalidPath(e) => e,
477 495 _ => unreachable!(),
478 496 })?
479 497 .iter()
480 498 .map(|k| k.to_owned()),
481 499 );
482 500
483 501 Ok(RootsDirsAndParents {
484 502 roots: HashSet::from_iter(roots),
485 503 dirs: HashSet::from_iter(dirs),
486 504 parents,
487 505 })
488 506 }
489 507
490 508 /// Returns a function that checks whether a given file (in the general sense)
491 509 /// should be matched.
492 510 fn build_match<'a, 'b>(
493 511 ignore_patterns: &'a [IgnorePattern],
494 512 root_dir: impl AsRef<Path>,
495 513 ) -> PatternResult<(
496 514 Vec<u8>,
497 515 Box<dyn Fn(&HgPath) -> bool + 'b + Sync>,
498 516 Vec<PatternFileWarning>,
499 517 )> {
500 518 let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![];
501 519 // For debugging and printing
502 520 let mut patterns = vec![];
503 521 let mut all_warnings = vec![];
504 522
505 523 let (subincludes, ignore_patterns) =
506 524 filter_subincludes(ignore_patterns, root_dir)?;
507 525
508 526 if !subincludes.is_empty() {
509 527 // Build prefix-based matcher functions for subincludes
510 528 let mut submatchers = FastHashMap::default();
511 529 let mut prefixes = vec![];
512 530
513 531 for SubInclude { prefix, root, path } in subincludes.into_iter() {
514 532 let (match_fn, warnings) =
515 533 get_ignore_function(vec![path.to_path_buf()], root)?;
516 534 all_warnings.extend(warnings);
517 535 prefixes.push(prefix.to_owned());
518 536 submatchers.insert(prefix.to_owned(), match_fn);
519 537 }
520 538
521 539 let match_subinclude = move |filename: &HgPath| {
522 540 for prefix in prefixes.iter() {
523 541 if let Some(rel) = filename.relative_to(prefix) {
524 542 if (submatchers.get(prefix).unwrap())(rel) {
525 543 return true;
526 544 }
527 545 }
528 546 }
529 547 false
530 548 };
531 549
532 550 match_funcs.push(Box::new(match_subinclude));
533 551 }
534 552
535 553 if !ignore_patterns.is_empty() {
536 554 // Either do dumb matching if all patterns are rootfiles, or match
537 555 // with a regex.
538 556 if ignore_patterns
539 557 .iter()
540 558 .all(|k| k.syntax == PatternSyntax::RootFiles)
541 559 {
542 560 let dirs: HashSet<_> = ignore_patterns
543 561 .iter()
544 562 .map(|k| k.pattern.to_owned())
545 563 .collect();
546 564 let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
547 565
548 566 let match_func = move |path: &HgPath| -> bool {
549 567 let path = path.as_bytes();
550 568 let i = path.iter().rfind(|a| **a == b'/');
551 569 let dir = if let Some(i) = i {
552 570 &path[..*i as usize]
553 571 } else {
554 572 b"."
555 573 };
556 574 dirs.contains(dir.deref())
557 575 };
558 576 match_funcs.push(Box::new(match_func));
559 577
560 578 patterns.extend(b"rootfilesin: ");
561 579 dirs_vec.sort();
562 580 patterns.extend(dirs_vec.escaped_bytes());
563 581 } else {
564 582 let (new_re, match_func) = build_regex_match(&ignore_patterns)?;
565 583 patterns = new_re;
566 584 match_funcs.push(match_func)
567 585 }
568 586 }
569 587
570 588 Ok(if match_funcs.len() == 1 {
571 589 (patterns, match_funcs.remove(0), all_warnings)
572 590 } else {
573 591 (
574 592 patterns,
575 593 Box::new(move |f: &HgPath| -> bool {
576 594 match_funcs.iter().any(|match_func| match_func(f))
577 595 }),
578 596 all_warnings,
579 597 )
580 598 })
581 599 }
582 600
583 601 /// Parses all "ignore" files with their recursive includes and returns a
584 602 /// function that checks whether a given file (in the general sense) should be
585 603 /// ignored.
586 604 pub fn get_ignore_function<'a>(
587 605 all_pattern_files: Vec<PathBuf>,
588 606 root_dir: impl AsRef<Path>,
589 607 ) -> PatternResult<(
590 608 Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>,
591 609 Vec<PatternFileWarning>,
592 610 )> {
593 611 let mut all_patterns = vec![];
594 612 let mut all_warnings = vec![];
595 613
596 614 for pattern_file in all_pattern_files.into_iter() {
597 615 let (patterns, warnings) =
598 616 get_patterns_from_file(pattern_file, &root_dir)?;
599 617
600 618 all_patterns.extend(patterns.to_owned());
601 619 all_warnings.extend(warnings);
602 620 }
603 621 let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?;
604 622 all_warnings.extend(warnings);
605 623 Ok((
606 624 Box::new(move |path: &HgPath| matcher.matches(path)),
607 625 all_warnings,
608 626 ))
609 627 }
610 628
611 629 impl<'a> IncludeMatcher<'a> {
612 630 pub fn new(
613 631 ignore_patterns: Vec<IgnorePattern>,
614 632 root_dir: impl AsRef<Path>,
615 633 ) -> PatternResult<(Self, Vec<PatternFileWarning>)> {
616 634 let (patterns, match_fn, warnings) =
617 635 build_match(&ignore_patterns, root_dir)?;
618 636 let RootsDirsAndParents {
619 637 roots,
620 638 dirs,
621 639 parents,
622 640 } = roots_dirs_and_parents(&ignore_patterns)?;
623 641
624 642 let prefix = ignore_patterns.iter().any(|k| match k.syntax {
625 643 PatternSyntax::Path | PatternSyntax::RelPath => true,
626 644 _ => false,
627 645 });
628 646
629 647 Ok((
630 648 Self {
631 649 patterns,
632 650 match_fn,
633 651 prefix,
634 652 roots,
635 653 dirs,
636 654 parents,
637 655 },
638 656 warnings,
639 657 ))
640 658 }
641 659
642 660 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
643 661 // TODO cache
644 662 let thing = self
645 663 .dirs
646 664 .iter()
647 665 .chain(self.roots.iter())
648 666 .chain(self.parents.iter());
649 667 DirsChildrenMultiset::new(thing, Some(&self.parents))
650 668 }
651 669 }
652 670
653 671 impl<'a> Display for IncludeMatcher<'a> {
654 672 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
655 673 write!(
656 674 f,
657 675 "IncludeMatcher(includes='{}')",
658 676 String::from_utf8_lossy(&self.patterns.escaped_bytes())
659 677 )
660 678 }
661 679 }
662 680
663 681 #[cfg(test)]
664 682 mod tests {
665 683 use super::*;
666 684 use pretty_assertions::assert_eq;
667 685 use std::path::Path;
668 686
669 687 #[test]
670 688 fn test_roots_and_dirs() {
671 689 let pats = vec![
672 690 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
673 691 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
674 692 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
675 693 ];
676 694 let (roots, dirs) = roots_and_dirs(&pats);
677 695
678 696 assert_eq!(
679 697 roots,
680 698 vec!(
681 699 HgPathBuf::from_bytes(b"g/h"),
682 700 HgPathBuf::from_bytes(b"g/h"),
683 701 HgPathBuf::new()
684 702 ),
685 703 );
686 704 assert_eq!(dirs, vec!());
687 705 }
688 706
689 707 #[test]
690 708 fn test_roots_dirs_and_parents() {
691 709 let pats = vec![
692 710 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
693 711 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
694 712 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
695 713 ];
696 714
697 715 let mut roots = HashSet::new();
698 716 roots.insert(HgPathBuf::from_bytes(b"g/h"));
699 717 roots.insert(HgPathBuf::new());
700 718
701 719 let dirs = HashSet::new();
702 720
703 721 let mut parents = HashSet::new();
704 722 parents.insert(HgPathBuf::new());
705 723 parents.insert(HgPathBuf::from_bytes(b"g"));
706 724
707 725 assert_eq!(
708 726 roots_dirs_and_parents(&pats).unwrap(),
709 727 RootsDirsAndParents {
710 728 roots,
711 729 dirs,
712 730 parents
713 731 }
714 732 );
715 733 }
716 734
717 735 #[test]
718 736 fn test_filematcher_visit_children_set() {
719 737 // Visitchildrenset
720 738 let files = vec![HgPath::new(b"dir/subdir/foo.txt")];
721 739 let matcher = FileMatcher::new(&files).unwrap();
722 740
723 741 let mut set = HashSet::new();
724 742 set.insert(HgPath::new(b"dir"));
725 743 assert_eq!(
726 744 matcher.visit_children_set(HgPath::new(b"")),
727 745 VisitChildrenSet::Set(set)
728 746 );
729 747
730 748 let mut set = HashSet::new();
731 749 set.insert(HgPath::new(b"subdir"));
732 750 assert_eq!(
733 751 matcher.visit_children_set(HgPath::new(b"dir")),
734 752 VisitChildrenSet::Set(set)
735 753 );
736 754
737 755 let mut set = HashSet::new();
738 756 set.insert(HgPath::new(b"foo.txt"));
739 757 assert_eq!(
740 758 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
741 759 VisitChildrenSet::Set(set)
742 760 );
743 761
744 762 assert_eq!(
745 763 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
746 764 VisitChildrenSet::Empty
747 765 );
748 766 assert_eq!(
749 767 matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
750 768 VisitChildrenSet::Empty
751 769 );
752 770 assert_eq!(
753 771 matcher.visit_children_set(HgPath::new(b"folder")),
754 772 VisitChildrenSet::Empty
755 773 );
756 774 }
757 775
758 776 #[test]
759 777 fn test_filematcher_visit_children_set_files_and_dirs() {
760 778 let files = vec![
761 779 HgPath::new(b"rootfile.txt"),
762 780 HgPath::new(b"a/file1.txt"),
763 781 HgPath::new(b"a/b/file2.txt"),
764 782 // No file in a/b/c
765 783 HgPath::new(b"a/b/c/d/file4.txt"),
766 784 ];
767 785 let matcher = FileMatcher::new(&files).unwrap();
768 786
769 787 let mut set = HashSet::new();
770 788 set.insert(HgPath::new(b"a"));
771 789 set.insert(HgPath::new(b"rootfile.txt"));
772 790 assert_eq!(
773 791 matcher.visit_children_set(HgPath::new(b"")),
774 792 VisitChildrenSet::Set(set)
775 793 );
776 794
777 795 let mut set = HashSet::new();
778 796 set.insert(HgPath::new(b"b"));
779 797 set.insert(HgPath::new(b"file1.txt"));
780 798 assert_eq!(
781 799 matcher.visit_children_set(HgPath::new(b"a")),
782 800 VisitChildrenSet::Set(set)
783 801 );
784 802
785 803 let mut set = HashSet::new();
786 804 set.insert(HgPath::new(b"c"));
787 805 set.insert(HgPath::new(b"file2.txt"));
788 806 assert_eq!(
789 807 matcher.visit_children_set(HgPath::new(b"a/b")),
790 808 VisitChildrenSet::Set(set)
791 809 );
792 810
793 811 let mut set = HashSet::new();
794 812 set.insert(HgPath::new(b"d"));
795 813 assert_eq!(
796 814 matcher.visit_children_set(HgPath::new(b"a/b/c")),
797 815 VisitChildrenSet::Set(set)
798 816 );
799 817 let mut set = HashSet::new();
800 818 set.insert(HgPath::new(b"file4.txt"));
801 819 assert_eq!(
802 820 matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
803 821 VisitChildrenSet::Set(set)
804 822 );
805 823
806 824 assert_eq!(
807 825 matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
808 826 VisitChildrenSet::Empty
809 827 );
810 828 assert_eq!(
811 829 matcher.visit_children_set(HgPath::new(b"folder")),
812 830 VisitChildrenSet::Empty
813 831 );
814 832 }
815 833
816 834 #[cfg(feature = "with-re2")]
817 835 #[test]
818 836 fn test_includematcher() {
819 837 // VisitchildrensetPrefix
820 838 let (matcher, _) = IncludeMatcher::new(
821 839 vec![IgnorePattern::new(
822 840 PatternSyntax::RelPath,
823 841 b"dir/subdir",
824 842 Path::new(""),
825 843 )],
826 844 "",
827 845 )
828 846 .unwrap();
829 847
830 848 let mut set = HashSet::new();
831 849 set.insert(HgPath::new(b"dir"));
832 850 assert_eq!(
833 851 matcher.visit_children_set(HgPath::new(b"")),
834 852 VisitChildrenSet::Set(set)
835 853 );
836 854
837 855 let mut set = HashSet::new();
838 856 set.insert(HgPath::new(b"subdir"));
839 857 assert_eq!(
840 858 matcher.visit_children_set(HgPath::new(b"dir")),
841 859 VisitChildrenSet::Set(set)
842 860 );
843 861 assert_eq!(
844 862 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
845 863 VisitChildrenSet::Recursive
846 864 );
847 865 // OPT: This should probably be 'all' if its parent is?
848 866 assert_eq!(
849 867 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
850 868 VisitChildrenSet::This
851 869 );
852 870 assert_eq!(
853 871 matcher.visit_children_set(HgPath::new(b"folder")),
854 872 VisitChildrenSet::Empty
855 873 );
856 874
857 875 // VisitchildrensetRootfilesin
858 876 let (matcher, _) = IncludeMatcher::new(
859 877 vec![IgnorePattern::new(
860 878 PatternSyntax::RootFiles,
861 879 b"dir/subdir",
862 880 Path::new(""),
863 881 )],
864 882 "",
865 883 )
866 884 .unwrap();
867 885
868 886 let mut set = HashSet::new();
869 887 set.insert(HgPath::new(b"dir"));
870 888 assert_eq!(
871 889 matcher.visit_children_set(HgPath::new(b"")),
872 890 VisitChildrenSet::Set(set)
873 891 );
874 892
875 893 let mut set = HashSet::new();
876 894 set.insert(HgPath::new(b"subdir"));
877 895 assert_eq!(
878 896 matcher.visit_children_set(HgPath::new(b"dir")),
879 897 VisitChildrenSet::Set(set)
880 898 );
881 899
882 900 assert_eq!(
883 901 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
884 902 VisitChildrenSet::This
885 903 );
886 904 assert_eq!(
887 905 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
888 906 VisitChildrenSet::Empty
889 907 );
890 908 assert_eq!(
891 909 matcher.visit_children_set(HgPath::new(b"folder")),
892 910 VisitChildrenSet::Empty
893 911 );
894 912
895 913 // VisitchildrensetGlob
896 914 let (matcher, _) = IncludeMatcher::new(
897 915 vec![IgnorePattern::new(
898 916 PatternSyntax::Glob,
899 917 b"dir/z*",
900 918 Path::new(""),
901 919 )],
902 920 "",
903 921 )
904 922 .unwrap();
905 923
906 924 let mut set = HashSet::new();
907 925 set.insert(HgPath::new(b"dir"));
908 926 assert_eq!(
909 927 matcher.visit_children_set(HgPath::new(b"")),
910 928 VisitChildrenSet::Set(set)
911 929 );
912 930 assert_eq!(
913 931 matcher.visit_children_set(HgPath::new(b"folder")),
914 932 VisitChildrenSet::Empty
915 933 );
916 934 assert_eq!(
917 935 matcher.visit_children_set(HgPath::new(b"dir")),
918 936 VisitChildrenSet::This
919 937 );
920 938 // OPT: these should probably be set().
921 939 assert_eq!(
922 940 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
923 941 VisitChildrenSet::This
924 942 );
925 943 assert_eq!(
926 944 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
927 945 VisitChildrenSet::This
928 946 );
929 947 }
930 948 }
General Comments 0
You need to be logged in to leave comments. Login now