##// END OF EJS Templates
rust-matchers: use the `regex` crate...
Raphaël Gomès -
r45084:496868f1 default
parent child Browse files
Show More
@@ -1,656 +1,665 b''
1 1 // filepatterns.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Handling of Mercurial-specific patterns.
9 9
10 10 use crate::{
11 11 utils::{
12 12 files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
13 13 hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
14 14 SliceExt,
15 15 },
16 16 FastHashMap, PatternError,
17 17 };
18 18 use lazy_static::lazy_static;
19 19 use regex::bytes::{NoExpand, Regex};
20 20 use std::fs::File;
21 21 use std::io::Read;
22 22 use std::ops::Deref;
23 23 use std::path::{Path, PathBuf};
24 24 use std::vec::Vec;
25 25
26 26 lazy_static! {
27 27 static ref RE_ESCAPE: Vec<Vec<u8>> = {
28 28 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
29 29 let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
30 30 for byte in to_escape {
31 31 v[*byte as usize].insert(0, b'\\');
32 32 }
33 33 v
34 34 };
35 35 }
36 36
37 37 /// These are matched in order
38 38 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
39 39 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
40 40
41 41 /// Appended to the regexp of globs
42 42 const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";
43 43
44 44 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
45 45 pub enum PatternSyntax {
46 46 /// A regular expression
47 47 Regexp,
48 48 /// Glob that matches at the front of the path
49 49 RootGlob,
50 50 /// Glob that matches at any suffix of the path (still anchored at
51 51 /// slashes)
52 52 Glob,
53 53 /// a path relative to repository root, which is matched recursively
54 54 Path,
55 55 /// A path relative to cwd
56 56 RelPath,
57 57 /// an unrooted glob (*.rs matches Rust files in all dirs)
58 58 RelGlob,
59 59 /// A regexp that needn't match the start of a name
60 60 RelRegexp,
61 61 /// A path relative to repository root, which is matched non-recursively
62 62 /// (will not match subdirectories)
63 63 RootFiles,
64 64 /// A file of patterns to read and include
65 65 Include,
66 66 /// A file of patterns to match against files under the same directory
67 67 SubInclude,
68 68 }
69 69
70 70 /// Transforms a glob pattern into a regex
71 71 fn glob_to_re(pat: &[u8]) -> Vec<u8> {
72 72 let mut input = pat;
73 73 let mut res: Vec<u8> = vec![];
74 74 let mut group_depth = 0;
75 75
76 76 while let Some((c, rest)) = input.split_first() {
77 77 input = rest;
78 78
79 79 match c {
80 80 b'*' => {
81 81 for (source, repl) in GLOB_REPLACEMENTS {
82 82 if let Some(rest) = input.drop_prefix(source) {
83 83 input = rest;
84 84 res.extend(*repl);
85 85 break;
86 86 }
87 87 }
88 88 }
89 89 b'?' => res.extend(b"."),
90 90 b'[' => {
91 91 match input.iter().skip(1).position(|b| *b == b']') {
92 92 None => res.extend(b"\\["),
93 93 Some(end) => {
94 94 // Account for the one we skipped
95 95 let end = end + 1;
96 96
97 97 res.extend(b"[");
98 98
99 99 for (i, b) in input[..end].iter().enumerate() {
100 100 if *b == b'!' && i == 0 {
101 101 res.extend(b"^")
102 102 } else if *b == b'^' && i == 0 {
103 103 res.extend(b"\\^")
104 104 } else if *b == b'\\' {
105 105 res.extend(b"\\\\")
106 106 } else {
107 107 res.push(*b)
108 108 }
109 109 }
110 110 res.extend(b"]");
111 111 input = &input[end + 1..];
112 112 }
113 113 }
114 114 }
115 115 b'{' => {
116 116 group_depth += 1;
117 117 res.extend(b"(?:")
118 118 }
119 119 b'}' if group_depth > 0 => {
120 120 group_depth -= 1;
121 121 res.extend(b")");
122 122 }
123 123 b',' if group_depth > 0 => res.extend(b"|"),
124 124 b'\\' => {
125 125 let c = {
126 126 if let Some((c, rest)) = input.split_first() {
127 127 input = rest;
128 128 c
129 129 } else {
130 130 c
131 131 }
132 132 };
133 133 res.extend(&RE_ESCAPE[*c as usize])
134 134 }
135 135 _ => res.extend(&RE_ESCAPE[*c as usize]),
136 136 }
137 137 }
138 138 res
139 139 }
140 140
141 141 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
142 142 pattern
143 143 .iter()
144 144 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
145 145 .collect()
146 146 }
147 147
148 148 pub fn parse_pattern_syntax(
149 149 kind: &[u8],
150 150 ) -> Result<PatternSyntax, PatternError> {
151 151 match kind {
152 152 b"re:" => Ok(PatternSyntax::Regexp),
153 153 b"path:" => Ok(PatternSyntax::Path),
154 154 b"relpath:" => Ok(PatternSyntax::RelPath),
155 155 b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
156 156 b"relglob:" => Ok(PatternSyntax::RelGlob),
157 157 b"relre:" => Ok(PatternSyntax::RelRegexp),
158 158 b"glob:" => Ok(PatternSyntax::Glob),
159 159 b"rootglob:" => Ok(PatternSyntax::RootGlob),
160 160 b"include:" => Ok(PatternSyntax::Include),
161 161 b"subinclude:" => Ok(PatternSyntax::SubInclude),
162 162 _ => Err(PatternError::UnsupportedSyntax(
163 163 String::from_utf8_lossy(kind).to_string(),
164 164 )),
165 165 }
166 166 }
167 167
168 168 /// Builds the regex that corresponds to the given pattern.
169 169 /// If within a `syntax: regexp` context, returns the pattern,
170 170 /// otherwise, returns the corresponding regex.
171 171 fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> {
172 172 let IgnorePattern {
173 173 syntax, pattern, ..
174 174 } = entry;
175 175 if pattern.is_empty() {
176 176 return vec![];
177 177 }
178 178 match syntax {
179 PatternSyntax::Regexp => pattern.to_owned(),
179 // The `regex` crate adds `.*` to the start and end of expressions
180 // if there are no anchors, so add them.
181 PatternSyntax::Regexp => [b"^", &pattern[..], b"$"].concat(),
180 182 PatternSyntax::RelRegexp => {
181 if pattern[0] == b'^' {
183 // The `regex` crate accepts `**` while `re2` and Python's `re`
184 // do not. Checking for `*` correctly triggers the same error all
185 // engines.
186 if pattern[0] == b'^' || pattern[0] == b'*' {
182 187 return pattern.to_owned();
183 188 }
184 189 [&b".*"[..], pattern].concat()
185 190 }
186 191 PatternSyntax::Path | PatternSyntax::RelPath => {
187 192 if pattern == b"." {
188 193 return vec![];
189 194 }
190 195 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
191 196 }
192 197 PatternSyntax::RootFiles => {
193 198 let mut res = if pattern == b"." {
194 vec![]
199 vec![b'^']
195 200 } else {
196 201 // Pattern is a directory name.
197 [escape_pattern(pattern).as_slice(), b"/"].concat()
202 [b"^", escape_pattern(pattern).as_slice(), b"/"].concat()
198 203 };
199 204
200 205 // Anything after the pattern must be a non-directory.
201 206 res.extend(b"[^/]+$");
207 res.push(b'$');
202 208 res
203 209 }
204 210 PatternSyntax::RelGlob => {
205 211 let glob_re = glob_to_re(pattern);
206 212 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
207 213 [b".*", rest, GLOB_SUFFIX].concat()
208 214 } else {
209 [b"(?:|.*/)", glob_re.as_slice(), GLOB_SUFFIX].concat()
215 [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat()
210 216 }
211 217 }
212 218 PatternSyntax::Glob | PatternSyntax::RootGlob => {
213 [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
219 [b"^", glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
214 220 }
215 221 PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(),
216 222 }
217 223 }
218 224
219 225 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
220 226 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
221 227
222 228 /// TODO support other platforms
223 229 #[cfg(unix)]
224 230 pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
225 231 if bytes.is_empty() {
226 232 return b".".to_vec();
227 233 }
228 234 let sep = b'/';
229 235
230 236 let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
231 237 if initial_slashes > 2 {
232 238 // POSIX allows one or two initial slashes, but treats three or more
233 239 // as single slash.
234 240 initial_slashes = 1;
235 241 }
236 242 let components = bytes
237 243 .split(|b| *b == sep)
238 244 .filter(|c| !(c.is_empty() || c == b"."))
239 245 .fold(vec![], |mut acc, component| {
240 246 if component != b".."
241 247 || (initial_slashes == 0 && acc.is_empty())
242 248 || (!acc.is_empty() && acc[acc.len() - 1] == b"..")
243 249 {
244 250 acc.push(component)
245 251 } else if !acc.is_empty() {
246 252 acc.pop();
247 253 }
248 254 acc
249 255 });
250 256 let mut new_bytes = components.join(&sep);
251 257
252 258 if initial_slashes > 0 {
253 259 let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
254 260 buf.extend(new_bytes);
255 261 new_bytes = buf;
256 262 }
257 263 if new_bytes.is_empty() {
258 264 b".".to_vec()
259 265 } else {
260 266 new_bytes
261 267 }
262 268 }
263 269
264 270 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
265 271 /// that don't need to be transformed into a regex.
266 272 pub fn build_single_regex(
267 273 entry: &IgnorePattern,
268 274 ) -> Result<Vec<u8>, PatternError> {
269 275 let IgnorePattern {
270 276 pattern, syntax, ..
271 277 } = entry;
272 278 let pattern = match syntax {
273 279 PatternSyntax::RootGlob
274 280 | PatternSyntax::Path
275 281 | PatternSyntax::RelGlob
276 282 | PatternSyntax::RootFiles => normalize_path_bytes(&pattern),
277 283 PatternSyntax::Include | PatternSyntax::SubInclude => {
278 284 return Err(PatternError::NonRegexPattern(entry.clone()))
279 285 }
280 286 _ => pattern.to_owned(),
281 287 };
282 288 if *syntax == PatternSyntax::RootGlob
283 289 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
284 290 {
285 let mut escaped = escape_pattern(&pattern);
291 // The `regex` crate adds `.*` to the start and end of expressions
292 // if there are no anchors, so add the start anchor.
293 let mut escaped = vec![b'^'];
294 escaped.extend(escape_pattern(&pattern));
286 295 escaped.extend(GLOB_SUFFIX);
287 296 Ok(escaped)
288 297 } else {
289 298 let mut entry = entry.clone();
290 299 entry.pattern = pattern;
291 300 Ok(_build_single_regex(&entry))
292 301 }
293 302 }
294 303
295 304 lazy_static! {
296 305 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
297 306 let mut m = FastHashMap::default();
298 307
299 308 m.insert(b"re".as_ref(), b"relre:".as_ref());
300 309 m.insert(b"regexp".as_ref(), b"relre:".as_ref());
301 310 m.insert(b"glob".as_ref(), b"relglob:".as_ref());
302 311 m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
303 312 m.insert(b"include".as_ref(), b"include:".as_ref());
304 313 m.insert(b"subinclude".as_ref(), b"subinclude:".as_ref());
305 314 m
306 315 };
307 316 }
308 317
309 318 #[derive(Debug)]
310 319 pub enum PatternFileWarning {
311 320 /// (file path, syntax bytes)
312 321 InvalidSyntax(PathBuf, Vec<u8>),
313 322 /// File path
314 323 NoSuchFile(PathBuf),
315 324 }
316 325
317 326 pub fn parse_pattern_file_contents<P: AsRef<Path>>(
318 327 lines: &[u8],
319 328 file_path: P,
320 329 warn: bool,
321 330 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
322 331 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
323 332 let comment_escape_regex = Regex::new(r"\\#").unwrap();
324 333 let mut inputs: Vec<IgnorePattern> = vec![];
325 334 let mut warnings: Vec<PatternFileWarning> = vec![];
326 335
327 336 let mut current_syntax = b"relre:".as_ref();
328 337
329 338 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
330 339 let line_number = line_number + 1;
331 340
332 341 let line_buf;
333 342 if line.contains(&b'#') {
334 343 if let Some(cap) = comment_regex.captures(line) {
335 344 line = &line[..cap.get(1).unwrap().end()]
336 345 }
337 346 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
338 347 line = &line_buf;
339 348 }
340 349
341 350 let mut line = line.trim_end();
342 351
343 352 if line.is_empty() {
344 353 continue;
345 354 }
346 355
347 356 if let Some(syntax) = line.drop_prefix(b"syntax:") {
348 357 let syntax = syntax.trim();
349 358
350 359 if let Some(rel_syntax) = SYNTAXES.get(syntax) {
351 360 current_syntax = rel_syntax;
352 361 } else if warn {
353 362 warnings.push(PatternFileWarning::InvalidSyntax(
354 363 file_path.as_ref().to_owned(),
355 364 syntax.to_owned(),
356 365 ));
357 366 }
358 367 continue;
359 368 }
360 369
361 370 let mut line_syntax: &[u8] = &current_syntax;
362 371
363 372 for (s, rels) in SYNTAXES.iter() {
364 373 if let Some(rest) = line.drop_prefix(rels) {
365 374 line_syntax = rels;
366 375 line = rest;
367 376 break;
368 377 }
369 378 if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) {
370 379 line_syntax = rels;
371 380 line = rest;
372 381 break;
373 382 }
374 383 }
375 384
376 385 inputs.push(IgnorePattern::new(
377 386 parse_pattern_syntax(&line_syntax).map_err(|e| match e {
378 387 PatternError::UnsupportedSyntax(syntax) => {
379 388 PatternError::UnsupportedSyntaxInFile(
380 389 syntax,
381 390 file_path.as_ref().to_string_lossy().into(),
382 391 line_number,
383 392 )
384 393 }
385 394 _ => e,
386 395 })?,
387 396 &line,
388 397 &file_path,
389 398 ));
390 399 }
391 400 Ok((inputs, warnings))
392 401 }
393 402
394 403 pub fn read_pattern_file<P: AsRef<Path>>(
395 404 file_path: P,
396 405 warn: bool,
397 406 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
398 407 let mut f = match File::open(file_path.as_ref()) {
399 408 Ok(f) => Ok(f),
400 409 Err(e) => match e.kind() {
401 410 std::io::ErrorKind::NotFound => {
402 411 return Ok((
403 412 vec![],
404 413 vec![PatternFileWarning::NoSuchFile(
405 414 file_path.as_ref().to_owned(),
406 415 )],
407 416 ))
408 417 }
409 418 _ => Err(e),
410 419 },
411 420 }?;
412 421 let mut contents = Vec::new();
413 422
414 423 f.read_to_end(&mut contents)?;
415 424
416 425 Ok(parse_pattern_file_contents(&contents, file_path, warn)?)
417 426 }
418 427
419 428 /// Represents an entry in an "ignore" file.
420 429 #[derive(Debug, Eq, PartialEq, Clone)]
421 430 pub struct IgnorePattern {
422 431 pub syntax: PatternSyntax,
423 432 pub pattern: Vec<u8>,
424 433 pub source: PathBuf,
425 434 }
426 435
427 436 impl IgnorePattern {
428 437 pub fn new(
429 438 syntax: PatternSyntax,
430 439 pattern: &[u8],
431 440 source: impl AsRef<Path>,
432 441 ) -> Self {
433 442 Self {
434 443 syntax,
435 444 pattern: pattern.to_owned(),
436 445 source: source.as_ref().to_owned(),
437 446 }
438 447 }
439 448 }
440 449
441 450 pub type PatternResult<T> = Result<T, PatternError>;
442 451
443 452 /// Wrapper for `read_pattern_file` that also recursively expands `include:`
444 453 /// patterns.
445 454 ///
446 455 /// `subinclude:` is not treated as a special pattern here: unraveling them
447 456 /// needs to occur in the "ignore" phase.
448 457 pub fn get_patterns_from_file(
449 458 pattern_file: impl AsRef<Path>,
450 459 root_dir: impl AsRef<Path>,
451 460 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
452 461 let (patterns, mut warnings) = read_pattern_file(&pattern_file, true)?;
453 462 let patterns = patterns
454 463 .into_iter()
455 464 .flat_map(|entry| -> PatternResult<_> {
456 465 let IgnorePattern {
457 466 syntax,
458 467 pattern,
459 468 source: _,
460 469 } = &entry;
461 470 Ok(match syntax {
462 471 PatternSyntax::Include => {
463 472 let inner_include =
464 473 root_dir.as_ref().join(get_path_from_bytes(&pattern));
465 474 let (inner_pats, inner_warnings) = get_patterns_from_file(
466 475 &inner_include,
467 476 root_dir.as_ref(),
468 477 )?;
469 478 warnings.extend(inner_warnings);
470 479 inner_pats
471 480 }
472 481 _ => vec![entry],
473 482 })
474 483 })
475 484 .flatten()
476 485 .collect();
477 486
478 487 Ok((patterns, warnings))
479 488 }
480 489
481 490 /// Holds all the information needed to handle a `subinclude:` pattern.
482 491 pub struct SubInclude {
483 492 /// Will be used for repository (hg) paths that start with this prefix.
484 493 /// It is relative to the current working directory, so comparing against
485 494 /// repository paths is painless.
486 495 pub prefix: HgPathBuf,
487 496 /// The file itself, containing the patterns
488 497 pub path: PathBuf,
489 498 /// Folder in the filesystem where this it applies
490 499 pub root: PathBuf,
491 500 }
492 501
493 502 impl SubInclude {
494 503 pub fn new(
495 504 root_dir: impl AsRef<Path>,
496 505 pattern: &[u8],
497 506 source: impl AsRef<Path>,
498 507 ) -> Result<SubInclude, HgPathError> {
499 508 let normalized_source =
500 509 normalize_path_bytes(&get_bytes_from_path(source));
501 510
502 511 let source_root = get_path_from_bytes(&normalized_source);
503 512 let source_root = source_root.parent().unwrap_or(source_root.deref());
504 513
505 514 let path = source_root.join(get_path_from_bytes(pattern));
506 515 let new_root = path.parent().unwrap_or(path.deref());
507 516
508 517 let prefix = canonical_path(&root_dir, &root_dir, new_root)?;
509 518
510 519 Ok(Self {
511 520 prefix: path_to_hg_path_buf(prefix).and_then(|mut p| {
512 521 if !p.is_empty() {
513 522 p.push(b'/');
514 523 }
515 524 Ok(p)
516 525 })?,
517 526 path: path.to_owned(),
518 527 root: new_root.to_owned(),
519 528 })
520 529 }
521 530 }
522 531
523 532 /// Separate and pre-process subincludes from other patterns for the "ignore"
524 533 /// phase.
525 534 pub fn filter_subincludes(
526 535 ignore_patterns: &[IgnorePattern],
527 536 root_dir: impl AsRef<Path>,
528 537 ) -> Result<(Vec<SubInclude>, Vec<&IgnorePattern>), HgPathError> {
529 538 let mut subincludes = vec![];
530 539 let mut others = vec![];
531 540
532 541 for ignore_pattern in ignore_patterns.iter() {
533 542 let IgnorePattern {
534 543 syntax,
535 544 pattern,
536 545 source,
537 546 } = ignore_pattern;
538 547 if *syntax == PatternSyntax::SubInclude {
539 548 subincludes.push(SubInclude::new(&root_dir, pattern, &source)?);
540 549 } else {
541 550 others.push(ignore_pattern)
542 551 }
543 552 }
544 553 Ok((subincludes, others))
545 554 }
546 555
547 556 #[cfg(test)]
548 557 mod tests {
549 558 use super::*;
550 559 use pretty_assertions::assert_eq;
551 560
552 561 #[test]
553 562 fn escape_pattern_test() {
554 563 let untouched =
555 564 br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
556 565 assert_eq!(escape_pattern(untouched), untouched.to_vec());
557 566 // All escape codes
558 567 assert_eq!(
559 568 escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
560 569 br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
561 570 .to_vec()
562 571 );
563 572 }
564 573
565 574 #[test]
566 575 fn glob_test() {
567 576 assert_eq!(glob_to_re(br#"?"#), br#"."#);
568 577 assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
569 578 assert_eq!(glob_to_re(br#"**"#), br#".*"#);
570 579 assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
571 580 assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
572 581 assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
573 582 assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
574 583 assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
575 584 }
576 585
577 586 #[test]
578 587 fn test_parse_pattern_file_contents() {
579 588 let lines = b"syntax: glob\n*.elc";
580 589
581 590 assert_eq!(
582 591 parse_pattern_file_contents(lines, Path::new("file_path"), false)
583 592 .unwrap()
584 593 .0,
585 594 vec![IgnorePattern::new(
586 595 PatternSyntax::RelGlob,
587 596 b"*.elc",
588 597 Path::new("file_path")
589 598 )],
590 599 );
591 600
592 601 let lines = b"syntax: include\nsyntax: glob";
593 602
594 603 assert_eq!(
595 604 parse_pattern_file_contents(lines, Path::new("file_path"), false)
596 605 .unwrap()
597 606 .0,
598 607 vec![]
599 608 );
600 609 let lines = b"glob:**.o";
601 610 assert_eq!(
602 611 parse_pattern_file_contents(lines, Path::new("file_path"), false)
603 612 .unwrap()
604 613 .0,
605 614 vec![IgnorePattern::new(
606 615 PatternSyntax::RelGlob,
607 616 b"**.o",
608 617 Path::new("file_path")
609 618 )]
610 619 );
611 620 }
612 621
613 622 #[test]
614 623 fn test_build_single_regex() {
615 624 assert_eq!(
616 625 build_single_regex(&IgnorePattern::new(
617 626 PatternSyntax::RelGlob,
618 627 b"rust/target/",
619 628 Path::new("")
620 629 ))
621 630 .unwrap(),
622 br"(?:|.*/)rust/target(?:/|$)".to_vec(),
631 br"(?:.*/)?rust/target(?:/|$)".to_vec(),
623 632 );
624 633 }
625 634
626 635 #[test]
627 636 fn test_build_single_regex_shortcut() {
628 637 assert_eq!(
629 638 build_single_regex(&IgnorePattern::new(
630 639 PatternSyntax::RootGlob,
631 640 b"",
632 641 Path::new("")
633 642 ))
634 643 .unwrap(),
635 br"\.(?:/|$)".to_vec(),
644 br"^\.(?:/|$)".to_vec(),
636 645 );
637 646 assert_eq!(
638 647 build_single_regex(&IgnorePattern::new(
639 648 PatternSyntax::RootGlob,
640 649 b"whatever",
641 650 Path::new("")
642 651 ))
643 652 .unwrap(),
644 br"whatever(?:/|$)".to_vec(),
653 br"^whatever(?:/|$)".to_vec(),
645 654 );
646 655 assert_eq!(
647 656 build_single_regex(&IgnorePattern::new(
648 657 PatternSyntax::RootGlob,
649 658 b"*.o",
650 659 Path::new("")
651 660 ))
652 661 .unwrap(),
653 br"[^/]*\.o(?:/|$)".to_vec(),
662 br"^[^/]*\.o(?:/|$)".to_vec(),
654 663 );
655 664 }
656 665 }
@@ -1,894 +1,923 b''
1 1 // matchers.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Structs and types for matching files and directories.
9 9
10 10 #[cfg(feature = "with-re2")]
11 11 use crate::re2::Re2;
12 12 use crate::{
13 13 dirstate::dirs_multiset::DirsChildrenMultiset,
14 14 filepatterns::{
15 15 build_single_regex, filter_subincludes, get_patterns_from_file,
16 16 PatternFileWarning, PatternResult, SubInclude,
17 17 },
18 18 utils::{
19 19 files::find_dirs,
20 20 hg_path::{HgPath, HgPathBuf},
21 21 Escaped,
22 22 },
23 23 DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
24 24 PatternSyntax,
25 25 };
26 26
27 27 use micro_timer::timed;
28 28 use std::collections::HashSet;
29 29 use std::fmt::{Display, Error, Formatter};
30 30 use std::iter::FromIterator;
31 31 use std::ops::Deref;
32 32 use std::path::Path;
33 33
34 34 #[derive(Debug, PartialEq)]
35 35 pub enum VisitChildrenSet<'a> {
36 36 /// Don't visit anything
37 37 Empty,
38 38 /// Only visit this directory
39 39 This,
40 40 /// Visit this directory and these subdirectories
41 41 /// TODO Should we implement a `NonEmptyHashSet`?
42 42 Set(HashSet<&'a HgPath>),
43 43 /// Visit this directory and all subdirectories
44 44 Recursive,
45 45 }
46 46
47 47 pub trait Matcher {
48 48 /// Explicitly listed files
49 49 fn file_set(&self) -> Option<&HashSet<&HgPath>>;
50 50 /// Returns whether `filename` is in `file_set`
51 51 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool;
52 52 /// Returns whether `filename` is matched by this matcher
53 53 fn matches(&self, filename: impl AsRef<HgPath>) -> bool;
54 54 /// Decides whether a directory should be visited based on whether it
55 55 /// has potential matches in it or one of its subdirectories, and
56 56 /// potentially lists which subdirectories of that directory should be
57 57 /// visited. This is based on the match's primary, included, and excluded
58 58 /// patterns.
59 59 ///
60 60 /// # Example
61 61 ///
62 62 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
63 63 /// return the following values (assuming the implementation of
64 64 /// visit_children_set is capable of recognizing this; some implementations
65 65 /// are not).
66 66 ///
67 67 /// ```text
68 68 /// ```ignore
69 69 /// '' -> {'foo', 'qux'}
70 70 /// 'baz' -> set()
71 71 /// 'foo' -> {'bar'}
72 72 /// // Ideally this would be `Recursive`, but since the prefix nature of
73 73 /// // matchers is applied to the entire matcher, we have to downgrade this
74 74 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
75 75 /// // `RootFilesIn'-kind matcher being mixed in.
76 76 /// 'foo/bar' -> 'this'
77 77 /// 'qux' -> 'this'
78 78 /// ```
79 79 /// # Important
80 80 ///
81 81 /// Most matchers do not know if they're representing files or
82 82 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
83 83 /// file or a directory, so `visit_children_set('dir')` for most matchers
84 84 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
85 85 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
86 86 /// it may return `VisitChildrenSet::This`.
87 87 /// Do not rely on the return being a `HashSet` indicating that there are
88 88 /// no files in this dir to investigate (or equivalently that if there are
89 89 /// files to investigate in 'dir' that it will always return
90 90 /// `VisitChildrenSet::This`).
91 91 fn visit_children_set(
92 92 &self,
93 93 directory: impl AsRef<HgPath>,
94 94 ) -> VisitChildrenSet;
95 95 /// Matcher will match everything and `files_set()` will be empty:
96 96 /// optimization might be possible.
97 97 fn matches_everything(&self) -> bool;
98 98 /// Matcher will match exactly the files in `files_set()`: optimization
99 99 /// might be possible.
100 100 fn is_exact(&self) -> bool;
101 101 }
102 102
103 103 /// Matches everything.
104 104 ///```
105 105 /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
106 106 ///
107 107 /// let matcher = AlwaysMatcher;
108 108 ///
109 109 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
110 110 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
111 111 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
112 112 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
113 113 /// ```
114 114 #[derive(Debug)]
115 115 pub struct AlwaysMatcher;
116 116
117 117 impl Matcher for AlwaysMatcher {
118 118 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
119 119 None
120 120 }
121 121 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
122 122 false
123 123 }
124 124 fn matches(&self, _filename: impl AsRef<HgPath>) -> bool {
125 125 true
126 126 }
127 127 fn visit_children_set(
128 128 &self,
129 129 _directory: impl AsRef<HgPath>,
130 130 ) -> VisitChildrenSet {
131 131 VisitChildrenSet::Recursive
132 132 }
133 133 fn matches_everything(&self) -> bool {
134 134 true
135 135 }
136 136 fn is_exact(&self) -> bool {
137 137 false
138 138 }
139 139 }
140 140
141 141 /// Matches the input files exactly. They are interpreted as paths, not
142 142 /// patterns.
143 143 ///
144 144 ///```
145 145 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::HgPath };
146 146 ///
147 147 /// let files = [HgPath::new(b"a.txt"), HgPath::new(br"re:.*\.c$")];
148 148 /// let matcher = FileMatcher::new(&files).unwrap();
149 149 ///
150 150 /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
151 151 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
152 152 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
153 153 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
154 154 /// ```
155 155 #[derive(Debug)]
156 156 pub struct FileMatcher<'a> {
157 157 files: HashSet<&'a HgPath>,
158 158 dirs: DirsMultiset,
159 159 }
160 160
161 161 impl<'a> FileMatcher<'a> {
162 162 pub fn new(
163 163 files: &'a [impl AsRef<HgPath>],
164 164 ) -> Result<Self, DirstateMapError> {
165 165 Ok(Self {
166 166 files: HashSet::from_iter(files.iter().map(|f| f.as_ref())),
167 167 dirs: DirsMultiset::from_manifest(files)?,
168 168 })
169 169 }
170 170 fn inner_matches(&self, filename: impl AsRef<HgPath>) -> bool {
171 171 self.files.contains(filename.as_ref())
172 172 }
173 173 }
174 174
175 175 impl<'a> Matcher for FileMatcher<'a> {
176 176 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
177 177 Some(&self.files)
178 178 }
179 179 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool {
180 180 self.inner_matches(filename)
181 181 }
182 182 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
183 183 self.inner_matches(filename)
184 184 }
185 185 fn visit_children_set(
186 186 &self,
187 187 directory: impl AsRef<HgPath>,
188 188 ) -> VisitChildrenSet {
189 189 if self.files.is_empty() || !self.dirs.contains(&directory) {
190 190 return VisitChildrenSet::Empty;
191 191 }
192 192 let dirs_as_set = self.dirs.iter().map(|k| k.deref()).collect();
193 193
194 194 let mut candidates: HashSet<&HgPath> =
195 195 self.files.union(&dirs_as_set).map(|k| *k).collect();
196 196 candidates.remove(HgPath::new(b""));
197 197
198 198 if !directory.as_ref().is_empty() {
199 199 let directory = [directory.as_ref().as_bytes(), b"/"].concat();
200 200 candidates = candidates
201 201 .iter()
202 202 .filter_map(|c| {
203 203 if c.as_bytes().starts_with(&directory) {
204 204 Some(HgPath::new(&c.as_bytes()[directory.len()..]))
205 205 } else {
206 206 None
207 207 }
208 208 })
209 209 .collect();
210 210 }
211 211
212 212 // `self.dirs` includes all of the directories, recursively, so if
213 213 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
214 214 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
215 215 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
216 216 // subdir will be in there without a slash.
217 217 VisitChildrenSet::Set(
218 218 candidates
219 219 .iter()
220 220 .filter_map(|c| {
221 221 if c.bytes().all(|b| *b != b'/') {
222 222 Some(*c)
223 223 } else {
224 224 None
225 225 }
226 226 })
227 227 .collect(),
228 228 )
229 229 }
230 230 fn matches_everything(&self) -> bool {
231 231 false
232 232 }
233 233 fn is_exact(&self) -> bool {
234 234 true
235 235 }
236 236 }
237 237
238 238 /// Matches files that are included in the ignore rules.
239 239 #[cfg_attr(
240 240 feature = "with-re2",
241 241 doc = r##"
242 242 ```
243 243 use hg::{
244 244 matchers::{IncludeMatcher, Matcher},
245 245 IgnorePattern,
246 246 PatternSyntax,
247 247 utils::hg_path::HgPath
248 248 };
249 249 use std::path::Path;
250 250 ///
251 251 let ignore_patterns =
252 252 vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
253 253 let (matcher, _) = IncludeMatcher::new(ignore_patterns, "").unwrap();
254 254 ///
255 255 assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
256 256 assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
257 257 assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
258 258 assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
259 259 ```
260 260 "##
261 261 )]
262 262 pub struct IncludeMatcher<'a> {
263 263 patterns: Vec<u8>,
264 264 match_fn: Box<dyn for<'r> Fn(&'r HgPath) -> bool + 'a + Sync>,
265 265 /// Whether all the patterns match a prefix (i.e. recursively)
266 266 prefix: bool,
267 267 roots: HashSet<HgPathBuf>,
268 268 dirs: HashSet<HgPathBuf>,
269 269 parents: HashSet<HgPathBuf>,
270 270 }
271 271
272 272 impl<'a> Matcher for IncludeMatcher<'a> {
273 273 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
274 274 None
275 275 }
276 276
277 277 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
278 278 false
279 279 }
280 280
281 281 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
282 282 (self.match_fn)(filename.as_ref())
283 283 }
284 284
285 285 fn visit_children_set(
286 286 &self,
287 287 directory: impl AsRef<HgPath>,
288 288 ) -> VisitChildrenSet {
289 289 let dir = directory.as_ref();
290 290 if self.prefix && self.roots.contains(dir) {
291 291 return VisitChildrenSet::Recursive;
292 292 }
293 293 if self.roots.contains(HgPath::new(b""))
294 294 || self.roots.contains(dir)
295 295 || self.dirs.contains(dir)
296 296 || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
297 297 {
298 298 return VisitChildrenSet::This;
299 299 }
300 300
301 301 if self.parents.contains(directory.as_ref()) {
302 302 let multiset = self.get_all_parents_children();
303 303 if let Some(children) = multiset.get(dir) {
304 304 return VisitChildrenSet::Set(children.to_owned());
305 305 }
306 306 }
307 307 VisitChildrenSet::Empty
308 308 }
309 309
310 310 fn matches_everything(&self) -> bool {
311 311 false
312 312 }
313 313
314 314 fn is_exact(&self) -> bool {
315 315 false
316 316 }
317 317 }
318 318
319 319 #[cfg(feature = "with-re2")]
320 320 /// Returns a function that matches an `HgPath` against the given regex
321 321 /// pattern.
322 322 ///
323 323 /// This can fail when the pattern is invalid or not supported by the
324 324 /// underlying engine `Re2`, for instance anything with back-references.
325 325 fn re_matcher(
326 326 pattern: &[u8],
327 327 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
328 328 let regex = Re2::new(pattern);
329 329 let regex = regex.map_err(|e| PatternError::UnsupportedSyntax(e))?;
330 330 Ok(move |path: &HgPath| regex.is_match(path.as_bytes()))
331 331 }
332 332
333 333 #[cfg(not(feature = "with-re2"))]
334 fn re_matcher(_: &[u8]) -> PatternResult<Box<dyn Fn(&HgPath) -> bool + Sync>> {
335 Err(PatternError::Re2NotInstalled)
334 /// Returns a function that matches an `HgPath` against the given regex
335 /// pattern.
336 ///
337 /// This can fail when the pattern is invalid or not supported by the
338 /// underlying engine (the `regex` crate), for instance anything with
339 /// back-references.
340 fn re_matcher(
341 pattern: &[u8],
342 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
343 use std::io::Write;
344
345 let mut escaped_bytes = vec![];
346 for byte in pattern {
347 if *byte > 127 {
348 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
349 } else {
350 escaped_bytes.push(*byte);
351 }
352 }
353
354 // Avoid the cost of UTF8 checking
355 //
356 // # Safety
357 // This is safe because we escaped all non-ASCII bytes.
358 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
359 let re = regex::bytes::RegexBuilder::new(&pattern_string)
360 .unicode(false)
361 .build()
362 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
363
364 Ok(move |path: &HgPath| re.is_match(path.as_bytes()))
336 365 }
337 366
338 367 /// Returns the regex pattern and a function that matches an `HgPath` against
339 368 /// said regex formed by the given ignore patterns.
340 369 fn build_regex_match<'a>(
341 370 ignore_patterns: &'a [&'a IgnorePattern],
342 371 ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)> {
343 372 let regexps: Result<Vec<_>, PatternError> = ignore_patterns
344 373 .into_iter()
345 374 .map(|k| build_single_regex(*k))
346 375 .collect();
347 376 let regexps = regexps?;
348 377 let full_regex = regexps.join(&b'|');
349 378
350 379 let matcher = re_matcher(&full_regex)?;
351 380 let func = Box::new(move |filename: &HgPath| matcher(filename));
352 381
353 382 Ok((full_regex, func))
354 383 }
355 384
356 385 /// Returns roots and directories corresponding to each pattern.
357 386 ///
358 387 /// This calculates the roots and directories exactly matching the patterns and
359 388 /// returns a tuple of (roots, dirs). It does not return other directories
360 389 /// which may also need to be considered, like the parent directories.
361 390 fn roots_and_dirs(
362 391 ignore_patterns: &[IgnorePattern],
363 392 ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
364 393 let mut roots = Vec::new();
365 394 let mut dirs = Vec::new();
366 395
367 396 for ignore_pattern in ignore_patterns {
368 397 let IgnorePattern {
369 398 syntax, pattern, ..
370 399 } = ignore_pattern;
371 400 match syntax {
372 401 PatternSyntax::RootGlob | PatternSyntax::Glob => {
373 402 let mut root = vec![];
374 403
375 404 for p in pattern.split(|c| *c == b'/') {
376 405 if p.iter().any(|c| match *c {
377 406 b'[' | b'{' | b'*' | b'?' => true,
378 407 _ => false,
379 408 }) {
380 409 break;
381 410 }
382 411 root.push(HgPathBuf::from_bytes(p));
383 412 }
384 413 let buf =
385 414 root.iter().fold(HgPathBuf::new(), |acc, r| acc.join(r));
386 415 roots.push(buf);
387 416 }
388 417 PatternSyntax::Path | PatternSyntax::RelPath => {
389 418 let pat = HgPath::new(if pattern == b"." {
390 419 &[] as &[u8]
391 420 } else {
392 421 pattern
393 422 });
394 423 roots.push(pat.to_owned());
395 424 }
396 425 PatternSyntax::RootFiles => {
397 426 let pat = if pattern == b"." {
398 427 &[] as &[u8]
399 428 } else {
400 429 pattern
401 430 };
402 431 dirs.push(HgPathBuf::from_bytes(pat));
403 432 }
404 433 _ => {
405 434 roots.push(HgPathBuf::new());
406 435 }
407 436 }
408 437 }
409 438 (roots, dirs)
410 439 }
411 440
412 441 /// Paths extracted from patterns
413 442 #[derive(Debug, PartialEq)]
414 443 struct RootsDirsAndParents {
415 444 /// Directories to match recursively
416 445 pub roots: HashSet<HgPathBuf>,
417 446 /// Directories to match non-recursively
418 447 pub dirs: HashSet<HgPathBuf>,
419 448 /// Implicitly required directories to go to items in either roots or dirs
420 449 pub parents: HashSet<HgPathBuf>,
421 450 }
422 451
423 452 /// Extract roots, dirs and parents from patterns.
424 453 fn roots_dirs_and_parents(
425 454 ignore_patterns: &[IgnorePattern],
426 455 ) -> PatternResult<RootsDirsAndParents> {
427 456 let (roots, dirs) = roots_and_dirs(ignore_patterns);
428 457
429 458 let mut parents = HashSet::new();
430 459
431 460 parents.extend(
432 461 DirsMultiset::from_manifest(&dirs)
433 462 .map_err(|e| match e {
434 463 DirstateMapError::InvalidPath(e) => e,
435 464 _ => unreachable!(),
436 465 })?
437 466 .iter()
438 467 .map(|k| k.to_owned()),
439 468 );
440 469 parents.extend(
441 470 DirsMultiset::from_manifest(&roots)
442 471 .map_err(|e| match e {
443 472 DirstateMapError::InvalidPath(e) => e,
444 473 _ => unreachable!(),
445 474 })?
446 475 .iter()
447 476 .map(|k| k.to_owned()),
448 477 );
449 478
450 479 Ok(RootsDirsAndParents {
451 480 roots: HashSet::from_iter(roots),
452 481 dirs: HashSet::from_iter(dirs),
453 482 parents,
454 483 })
455 484 }
456 485
457 486 /// Returns a function that checks whether a given file (in the general sense)
458 487 /// should be matched.
459 488 fn build_match<'a, 'b>(
460 489 ignore_patterns: &'a [IgnorePattern],
461 490 root_dir: impl AsRef<Path>,
462 491 ) -> PatternResult<(
463 492 Vec<u8>,
464 493 Box<dyn Fn(&HgPath) -> bool + 'b + Sync>,
465 494 Vec<PatternFileWarning>,
466 495 )> {
467 496 let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![];
468 497 // For debugging and printing
469 498 let mut patterns = vec![];
470 499 let mut all_warnings = vec![];
471 500
472 501 let (subincludes, ignore_patterns) =
473 502 filter_subincludes(ignore_patterns, root_dir)?;
474 503
475 504 if !subincludes.is_empty() {
476 505 // Build prefix-based matcher functions for subincludes
477 506 let mut submatchers = FastHashMap::default();
478 507 let mut prefixes = vec![];
479 508
480 509 for SubInclude { prefix, root, path } in subincludes.into_iter() {
481 510 let (match_fn, warnings) = get_ignore_function(&[path], root)?;
482 511 all_warnings.extend(warnings);
483 512 prefixes.push(prefix.to_owned());
484 513 submatchers.insert(prefix.to_owned(), match_fn);
485 514 }
486 515
487 516 let match_subinclude = move |filename: &HgPath| {
488 517 for prefix in prefixes.iter() {
489 518 if let Some(rel) = filename.relative_to(prefix) {
490 519 if (submatchers.get(prefix).unwrap())(rel) {
491 520 return true;
492 521 }
493 522 }
494 523 }
495 524 false
496 525 };
497 526
498 527 match_funcs.push(Box::new(match_subinclude));
499 528 }
500 529
501 530 if !ignore_patterns.is_empty() {
502 531 // Either do dumb matching if all patterns are rootfiles, or match
503 532 // with a regex.
504 533 if ignore_patterns
505 534 .iter()
506 535 .all(|k| k.syntax == PatternSyntax::RootFiles)
507 536 {
508 537 let dirs: HashSet<_> = ignore_patterns
509 538 .iter()
510 539 .map(|k| k.pattern.to_owned())
511 540 .collect();
512 541 let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
513 542
514 543 let match_func = move |path: &HgPath| -> bool {
515 544 let path = path.as_bytes();
516 545 let i = path.iter().rfind(|a| **a == b'/');
517 546 let dir = if let Some(i) = i {
518 547 &path[..*i as usize]
519 548 } else {
520 549 b"."
521 550 };
522 551 dirs.contains(dir.deref())
523 552 };
524 553 match_funcs.push(Box::new(match_func));
525 554
526 555 patterns.extend(b"rootfilesin: ");
527 556 dirs_vec.sort();
528 557 patterns.extend(dirs_vec.escaped_bytes());
529 558 } else {
530 559 let (new_re, match_func) = build_regex_match(&ignore_patterns)?;
531 560 patterns = new_re;
532 561 match_funcs.push(match_func)
533 562 }
534 563 }
535 564
536 565 Ok(if match_funcs.len() == 1 {
537 566 (patterns, match_funcs.remove(0), all_warnings)
538 567 } else {
539 568 (
540 569 patterns,
541 570 Box::new(move |f: &HgPath| -> bool {
542 571 match_funcs.iter().any(|match_func| match_func(f))
543 572 }),
544 573 all_warnings,
545 574 )
546 575 })
547 576 }
548 577
549 578 /// Parses all "ignore" files with their recursive includes and returns a
550 579 /// function that checks whether a given file (in the general sense) should be
551 580 /// ignored.
552 581 #[timed]
553 582 pub fn get_ignore_function<'a>(
554 583 all_pattern_files: &[impl AsRef<Path>],
555 584 root_dir: impl AsRef<Path>,
556 585 ) -> PatternResult<(
557 586 impl for<'r> Fn(&'r HgPath) -> bool + Sync,
558 587 Vec<PatternFileWarning>,
559 588 )> {
560 589 let mut all_patterns = vec![];
561 590 let mut all_warnings = vec![];
562 591
563 592 for pattern_file in all_pattern_files.into_iter() {
564 593 let (patterns, warnings) =
565 594 get_patterns_from_file(pattern_file, &root_dir)?;
566 595
567 596 all_patterns.extend(patterns);
568 597 all_warnings.extend(warnings);
569 598 }
570 599 let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?;
571 600 all_warnings.extend(warnings);
572 601 Ok((move |path: &HgPath| matcher.matches(path), all_warnings))
573 602 }
574 603
575 604 impl<'a> IncludeMatcher<'a> {
576 605 pub fn new(
577 606 ignore_patterns: Vec<IgnorePattern>,
578 607 root_dir: impl AsRef<Path>,
579 608 ) -> PatternResult<(Self, Vec<PatternFileWarning>)> {
580 609 let (patterns, match_fn, warnings) =
581 610 build_match(&ignore_patterns, root_dir)?;
582 611 let RootsDirsAndParents {
583 612 roots,
584 613 dirs,
585 614 parents,
586 615 } = roots_dirs_and_parents(&ignore_patterns)?;
587 616
588 617 let prefix = ignore_patterns.iter().any(|k| match k.syntax {
589 618 PatternSyntax::Path | PatternSyntax::RelPath => true,
590 619 _ => false,
591 620 });
592 621
593 622 Ok((
594 623 Self {
595 624 patterns,
596 625 match_fn,
597 626 prefix,
598 627 roots,
599 628 dirs,
600 629 parents,
601 630 },
602 631 warnings,
603 632 ))
604 633 }
605 634
606 635 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
607 636 // TODO cache
608 637 let thing = self
609 638 .dirs
610 639 .iter()
611 640 .chain(self.roots.iter())
612 641 .chain(self.parents.iter());
613 642 DirsChildrenMultiset::new(thing, Some(&self.parents))
614 643 }
615 644 }
616 645
617 646 impl<'a> Display for IncludeMatcher<'a> {
618 647 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
619 648 write!(
620 649 f,
621 650 "IncludeMatcher(includes='{}')",
622 651 String::from_utf8_lossy(&self.patterns.escaped_bytes())
623 652 )
624 653 }
625 654 }
626 655
627 656 #[cfg(test)]
628 657 mod tests {
629 658 use super::*;
630 659 use pretty_assertions::assert_eq;
631 660 use std::path::Path;
632 661
633 662 #[test]
634 663 fn test_roots_and_dirs() {
635 664 let pats = vec![
636 665 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
637 666 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
638 667 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
639 668 ];
640 669 let (roots, dirs) = roots_and_dirs(&pats);
641 670
642 671 assert_eq!(
643 672 roots,
644 673 vec!(
645 674 HgPathBuf::from_bytes(b"g/h"),
646 675 HgPathBuf::from_bytes(b"g/h"),
647 676 HgPathBuf::new()
648 677 ),
649 678 );
650 679 assert_eq!(dirs, vec!());
651 680 }
652 681
653 682 #[test]
654 683 fn test_roots_dirs_and_parents() {
655 684 let pats = vec![
656 685 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
657 686 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
658 687 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
659 688 ];
660 689
661 690 let mut roots = HashSet::new();
662 691 roots.insert(HgPathBuf::from_bytes(b"g/h"));
663 692 roots.insert(HgPathBuf::new());
664 693
665 694 let dirs = HashSet::new();
666 695
667 696 let mut parents = HashSet::new();
668 697 parents.insert(HgPathBuf::new());
669 698 parents.insert(HgPathBuf::from_bytes(b"g"));
670 699
671 700 assert_eq!(
672 701 roots_dirs_and_parents(&pats).unwrap(),
673 702 RootsDirsAndParents {
674 703 roots,
675 704 dirs,
676 705 parents
677 706 }
678 707 );
679 708 }
680 709
681 710 #[test]
682 711 fn test_filematcher_visit_children_set() {
683 712 // Visitchildrenset
684 713 let files = vec![HgPath::new(b"dir/subdir/foo.txt")];
685 714 let matcher = FileMatcher::new(&files).unwrap();
686 715
687 716 let mut set = HashSet::new();
688 717 set.insert(HgPath::new(b"dir"));
689 718 assert_eq!(
690 719 matcher.visit_children_set(HgPath::new(b"")),
691 720 VisitChildrenSet::Set(set)
692 721 );
693 722
694 723 let mut set = HashSet::new();
695 724 set.insert(HgPath::new(b"subdir"));
696 725 assert_eq!(
697 726 matcher.visit_children_set(HgPath::new(b"dir")),
698 727 VisitChildrenSet::Set(set)
699 728 );
700 729
701 730 let mut set = HashSet::new();
702 731 set.insert(HgPath::new(b"foo.txt"));
703 732 assert_eq!(
704 733 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
705 734 VisitChildrenSet::Set(set)
706 735 );
707 736
708 737 assert_eq!(
709 738 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
710 739 VisitChildrenSet::Empty
711 740 );
712 741 assert_eq!(
713 742 matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
714 743 VisitChildrenSet::Empty
715 744 );
716 745 assert_eq!(
717 746 matcher.visit_children_set(HgPath::new(b"folder")),
718 747 VisitChildrenSet::Empty
719 748 );
720 749 }
721 750
722 751 #[test]
723 752 fn test_filematcher_visit_children_set_files_and_dirs() {
724 753 let files = vec![
725 754 HgPath::new(b"rootfile.txt"),
726 755 HgPath::new(b"a/file1.txt"),
727 756 HgPath::new(b"a/b/file2.txt"),
728 757 // No file in a/b/c
729 758 HgPath::new(b"a/b/c/d/file4.txt"),
730 759 ];
731 760 let matcher = FileMatcher::new(&files).unwrap();
732 761
733 762 let mut set = HashSet::new();
734 763 set.insert(HgPath::new(b"a"));
735 764 set.insert(HgPath::new(b"rootfile.txt"));
736 765 assert_eq!(
737 766 matcher.visit_children_set(HgPath::new(b"")),
738 767 VisitChildrenSet::Set(set)
739 768 );
740 769
741 770 let mut set = HashSet::new();
742 771 set.insert(HgPath::new(b"b"));
743 772 set.insert(HgPath::new(b"file1.txt"));
744 773 assert_eq!(
745 774 matcher.visit_children_set(HgPath::new(b"a")),
746 775 VisitChildrenSet::Set(set)
747 776 );
748 777
749 778 let mut set = HashSet::new();
750 779 set.insert(HgPath::new(b"c"));
751 780 set.insert(HgPath::new(b"file2.txt"));
752 781 assert_eq!(
753 782 matcher.visit_children_set(HgPath::new(b"a/b")),
754 783 VisitChildrenSet::Set(set)
755 784 );
756 785
757 786 let mut set = HashSet::new();
758 787 set.insert(HgPath::new(b"d"));
759 788 assert_eq!(
760 789 matcher.visit_children_set(HgPath::new(b"a/b/c")),
761 790 VisitChildrenSet::Set(set)
762 791 );
763 792 let mut set = HashSet::new();
764 793 set.insert(HgPath::new(b"file4.txt"));
765 794 assert_eq!(
766 795 matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
767 796 VisitChildrenSet::Set(set)
768 797 );
769 798
770 799 assert_eq!(
771 800 matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
772 801 VisitChildrenSet::Empty
773 802 );
774 803 assert_eq!(
775 804 matcher.visit_children_set(HgPath::new(b"folder")),
776 805 VisitChildrenSet::Empty
777 806 );
778 807 }
779 808
780 809 #[cfg(feature = "with-re2")]
781 810 #[test]
782 811 fn test_includematcher() {
783 812 // VisitchildrensetPrefix
784 813 let (matcher, _) = IncludeMatcher::new(
785 814 vec![IgnorePattern::new(
786 815 PatternSyntax::RelPath,
787 816 b"dir/subdir",
788 817 Path::new(""),
789 818 )],
790 819 "",
791 820 )
792 821 .unwrap();
793 822
794 823 let mut set = HashSet::new();
795 824 set.insert(HgPath::new(b"dir"));
796 825 assert_eq!(
797 826 matcher.visit_children_set(HgPath::new(b"")),
798 827 VisitChildrenSet::Set(set)
799 828 );
800 829
801 830 let mut set = HashSet::new();
802 831 set.insert(HgPath::new(b"subdir"));
803 832 assert_eq!(
804 833 matcher.visit_children_set(HgPath::new(b"dir")),
805 834 VisitChildrenSet::Set(set)
806 835 );
807 836 assert_eq!(
808 837 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
809 838 VisitChildrenSet::Recursive
810 839 );
811 840 // OPT: This should probably be 'all' if its parent is?
812 841 assert_eq!(
813 842 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
814 843 VisitChildrenSet::This
815 844 );
816 845 assert_eq!(
817 846 matcher.visit_children_set(HgPath::new(b"folder")),
818 847 VisitChildrenSet::Empty
819 848 );
820 849
821 850 // VisitchildrensetRootfilesin
822 851 let (matcher, _) = IncludeMatcher::new(
823 852 vec![IgnorePattern::new(
824 853 PatternSyntax::RootFiles,
825 854 b"dir/subdir",
826 855 Path::new(""),
827 856 )],
828 857 "",
829 858 )
830 859 .unwrap();
831 860
832 861 let mut set = HashSet::new();
833 862 set.insert(HgPath::new(b"dir"));
834 863 assert_eq!(
835 864 matcher.visit_children_set(HgPath::new(b"")),
836 865 VisitChildrenSet::Set(set)
837 866 );
838 867
839 868 let mut set = HashSet::new();
840 869 set.insert(HgPath::new(b"subdir"));
841 870 assert_eq!(
842 871 matcher.visit_children_set(HgPath::new(b"dir")),
843 872 VisitChildrenSet::Set(set)
844 873 );
845 874
846 875 assert_eq!(
847 876 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
848 877 VisitChildrenSet::This
849 878 );
850 879 assert_eq!(
851 880 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
852 881 VisitChildrenSet::Empty
853 882 );
854 883 assert_eq!(
855 884 matcher.visit_children_set(HgPath::new(b"folder")),
856 885 VisitChildrenSet::Empty
857 886 );
858 887
859 888 // VisitchildrensetGlob
860 889 let (matcher, _) = IncludeMatcher::new(
861 890 vec![IgnorePattern::new(
862 891 PatternSyntax::Glob,
863 892 b"dir/z*",
864 893 Path::new(""),
865 894 )],
866 895 "",
867 896 )
868 897 .unwrap();
869 898
870 899 let mut set = HashSet::new();
871 900 set.insert(HgPath::new(b"dir"));
872 901 assert_eq!(
873 902 matcher.visit_children_set(HgPath::new(b"")),
874 903 VisitChildrenSet::Set(set)
875 904 );
876 905 assert_eq!(
877 906 matcher.visit_children_set(HgPath::new(b"folder")),
878 907 VisitChildrenSet::Empty
879 908 );
880 909 assert_eq!(
881 910 matcher.visit_children_set(HgPath::new(b"dir")),
882 911 VisitChildrenSet::This
883 912 );
884 913 // OPT: these should probably be set().
885 914 assert_eq!(
886 915 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
887 916 VisitChildrenSet::This
888 917 );
889 918 assert_eq!(
890 919 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
891 920 VisitChildrenSet::This
892 921 );
893 922 }
894 923 }
General Comments 0
You need to be logged in to leave comments. Login now