##// END OF EJS Templates
matcher: fix the issue with regex inline-flag in rust oo...
marmoute -
r50499:086b0c4f stable
parent child Browse files
Show More
@@ -1,706 +1,756
1 1 // filepatterns.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Handling of Mercurial-specific patterns.
9 9
10 10 use crate::{
11 11 utils::{
12 12 files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
13 13 hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
14 14 SliceExt,
15 15 },
16 16 FastHashMap, PatternError,
17 17 };
18 18 use lazy_static::lazy_static;
19 19 use regex::bytes::{NoExpand, Regex};
20 20 use std::ops::Deref;
21 21 use std::path::{Path, PathBuf};
22 22 use std::vec::Vec;
23 23
24 24 lazy_static! {
25 25 static ref RE_ESCAPE: Vec<Vec<u8>> = {
26 26 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
27 27 let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
28 28 for byte in to_escape {
29 29 v[*byte as usize].insert(0, b'\\');
30 30 }
31 31 v
32 32 };
33 33 }
34 34
35 35 /// These are matched in order
36 36 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
37 37 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
38 38
39 39 /// Appended to the regexp of globs
40 40 const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";
41 41
42 42 #[derive(Debug, Clone, PartialEq, Eq)]
43 43 pub enum PatternSyntax {
44 44 /// A regular expression
45 45 Regexp,
46 46 /// Glob that matches at the front of the path
47 47 RootGlob,
48 48 /// Glob that matches at any suffix of the path (still anchored at
49 49 /// slashes)
50 50 Glob,
51 51 /// a path relative to repository root, which is matched recursively
52 52 Path,
53 53 /// A path relative to cwd
54 54 RelPath,
55 55 /// an unrooted glob (*.rs matches Rust files in all dirs)
56 56 RelGlob,
57 57 /// A regexp that needn't match the start of a name
58 58 RelRegexp,
59 59 /// A path relative to repository root, which is matched non-recursively
60 60 /// (will not match subdirectories)
61 61 RootFiles,
62 62 /// A file of patterns to read and include
63 63 Include,
64 64 /// A file of patterns to match against files under the same directory
65 65 SubInclude,
66 66 /// SubInclude with the result of parsing the included file
67 67 ///
68 68 /// Note: there is no ExpandedInclude because that expansion can be done
69 69 /// in place by replacing the Include pattern by the included patterns.
70 70 /// SubInclude requires more handling.
71 71 ///
72 72 /// Note: `Box` is used to minimize size impact on other enum variants
73 73 ExpandedSubInclude(Box<SubInclude>),
74 74 }
75 75
76 76 /// Transforms a glob pattern into a regex
77 77 fn glob_to_re(pat: &[u8]) -> Vec<u8> {
78 78 let mut input = pat;
79 79 let mut res: Vec<u8> = vec![];
80 80 let mut group_depth = 0;
81 81
82 82 while let Some((c, rest)) = input.split_first() {
83 83 input = rest;
84 84
85 85 match c {
86 86 b'*' => {
87 87 for (source, repl) in GLOB_REPLACEMENTS {
88 88 if let Some(rest) = input.drop_prefix(source) {
89 89 input = rest;
90 90 res.extend(*repl);
91 91 break;
92 92 }
93 93 }
94 94 }
95 95 b'?' => res.extend(b"."),
96 96 b'[' => {
97 97 match input.iter().skip(1).position(|b| *b == b']') {
98 98 None => res.extend(b"\\["),
99 99 Some(end) => {
100 100 // Account for the one we skipped
101 101 let end = end + 1;
102 102
103 103 res.extend(b"[");
104 104
105 105 for (i, b) in input[..end].iter().enumerate() {
106 106 if *b == b'!' && i == 0 {
107 107 res.extend(b"^")
108 108 } else if *b == b'^' && i == 0 {
109 109 res.extend(b"\\^")
110 110 } else if *b == b'\\' {
111 111 res.extend(b"\\\\")
112 112 } else {
113 113 res.push(*b)
114 114 }
115 115 }
116 116 res.extend(b"]");
117 117 input = &input[end + 1..];
118 118 }
119 119 }
120 120 }
121 121 b'{' => {
122 122 group_depth += 1;
123 123 res.extend(b"(?:")
124 124 }
125 125 b'}' if group_depth > 0 => {
126 126 group_depth -= 1;
127 127 res.extend(b")");
128 128 }
129 129 b',' if group_depth > 0 => res.extend(b"|"),
130 130 b'\\' => {
131 131 let c = {
132 132 if let Some((c, rest)) = input.split_first() {
133 133 input = rest;
134 134 c
135 135 } else {
136 136 c
137 137 }
138 138 };
139 139 res.extend(&RE_ESCAPE[*c as usize])
140 140 }
141 141 _ => res.extend(&RE_ESCAPE[*c as usize]),
142 142 }
143 143 }
144 144 res
145 145 }
146 146
147 147 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
148 148 pattern
149 149 .iter()
150 150 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
151 151 .collect()
152 152 }
153 153
154 154 pub fn parse_pattern_syntax(
155 155 kind: &[u8],
156 156 ) -> Result<PatternSyntax, PatternError> {
157 157 match kind {
158 158 b"re:" => Ok(PatternSyntax::Regexp),
159 159 b"path:" => Ok(PatternSyntax::Path),
160 160 b"relpath:" => Ok(PatternSyntax::RelPath),
161 161 b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
162 162 b"relglob:" => Ok(PatternSyntax::RelGlob),
163 163 b"relre:" => Ok(PatternSyntax::RelRegexp),
164 164 b"glob:" => Ok(PatternSyntax::Glob),
165 165 b"rootglob:" => Ok(PatternSyntax::RootGlob),
166 166 b"include:" => Ok(PatternSyntax::Include),
167 167 b"subinclude:" => Ok(PatternSyntax::SubInclude),
168 168 _ => Err(PatternError::UnsupportedSyntax(
169 169 String::from_utf8_lossy(kind).to_string(),
170 170 )),
171 171 }
172 172 }
173 173
174 lazy_static! {
175 static ref FLAG_RE: Regex = Regex::new(r"^\(\?[aiLmsux]+\)").unwrap();
176 }
177
174 178 /// Builds the regex that corresponds to the given pattern.
175 179 /// If within a `syntax: regexp` context, returns the pattern,
176 180 /// otherwise, returns the corresponding regex.
177 181 fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> {
178 182 let IgnorePattern {
179 183 syntax, pattern, ..
180 184 } = entry;
181 185 if pattern.is_empty() {
182 186 return vec![];
183 187 }
184 188 match syntax {
185 189 PatternSyntax::Regexp => pattern.to_owned(),
186 190 PatternSyntax::RelRegexp => {
187 191 // The `regex` crate accepts `**` while `re2` and Python's `re`
188 192 // do not. Checking for `*` correctly triggers the same error all
189 193 // engines.
190 194 if pattern[0] == b'^'
191 195 || pattern[0] == b'*'
192 196 || pattern.starts_with(b".*")
193 197 {
194 198 return pattern.to_owned();
195 199 }
196 [&b".*"[..], pattern].concat()
200 match FLAG_RE.find(pattern) {
201 Some(mat) => {
202 let s = mat.start();
203 let e = mat.end();
204 [
205 &b"(?"[..],
206 &pattern[s + 2..e - 1],
207 &b":"[..],
208 &b".*"[..],
209 &pattern[e..],
210 &b")"[..],
211 ]
212 .concat()
213 }
214 None => [&b".*"[..], pattern].concat(),
215 }
197 216 }
198 217 PatternSyntax::Path | PatternSyntax::RelPath => {
199 218 if pattern == b"." {
200 219 return vec![];
201 220 }
202 221 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
203 222 }
204 223 PatternSyntax::RootFiles => {
205 224 let mut res = if pattern == b"." {
206 225 vec![]
207 226 } else {
208 227 // Pattern is a directory name.
209 228 [escape_pattern(pattern).as_slice(), b"/"].concat()
210 229 };
211 230
212 231 // Anything after the pattern must be a non-directory.
213 232 res.extend(b"[^/]+$");
214 233 res
215 234 }
216 235 PatternSyntax::RelGlob => {
217 236 let glob_re = glob_to_re(pattern);
218 237 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
219 238 [b".*", rest, GLOB_SUFFIX].concat()
220 239 } else {
221 240 [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat()
222 241 }
223 242 }
224 243 PatternSyntax::Glob | PatternSyntax::RootGlob => {
225 244 [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
226 245 }
227 246 PatternSyntax::Include
228 247 | PatternSyntax::SubInclude
229 248 | PatternSyntax::ExpandedSubInclude(_) => unreachable!(),
230 249 }
231 250 }
232 251
233 252 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
234 253 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
235 254
236 255 /// TODO support other platforms
237 256 #[cfg(unix)]
238 257 pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
239 258 if bytes.is_empty() {
240 259 return b".".to_vec();
241 260 }
242 261 let sep = b'/';
243 262
244 263 let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
245 264 if initial_slashes > 2 {
246 265 // POSIX allows one or two initial slashes, but treats three or more
247 266 // as single slash.
248 267 initial_slashes = 1;
249 268 }
250 269 let components = bytes
251 270 .split(|b| *b == sep)
252 271 .filter(|c| !(c.is_empty() || c == b"."))
253 272 .fold(vec![], |mut acc, component| {
254 273 if component != b".."
255 274 || (initial_slashes == 0 && acc.is_empty())
256 275 || (!acc.is_empty() && acc[acc.len() - 1] == b"..")
257 276 {
258 277 acc.push(component)
259 278 } else if !acc.is_empty() {
260 279 acc.pop();
261 280 }
262 281 acc
263 282 });
264 283 let mut new_bytes = components.join(&sep);
265 284
266 285 if initial_slashes > 0 {
267 286 let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
268 287 buf.extend(new_bytes);
269 288 new_bytes = buf;
270 289 }
271 290 if new_bytes.is_empty() {
272 291 b".".to_vec()
273 292 } else {
274 293 new_bytes
275 294 }
276 295 }
277 296
278 297 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
279 298 /// that don't need to be transformed into a regex.
280 299 pub fn build_single_regex(
281 300 entry: &IgnorePattern,
282 301 ) -> Result<Option<Vec<u8>>, PatternError> {
283 302 let IgnorePattern {
284 303 pattern, syntax, ..
285 304 } = entry;
286 305 let pattern = match syntax {
287 306 PatternSyntax::RootGlob
288 307 | PatternSyntax::Path
289 308 | PatternSyntax::RelGlob
290 309 | PatternSyntax::RootFiles => normalize_path_bytes(&pattern),
291 310 PatternSyntax::Include | PatternSyntax::SubInclude => {
292 311 return Err(PatternError::NonRegexPattern(entry.clone()))
293 312 }
294 313 _ => pattern.to_owned(),
295 314 };
296 315 if *syntax == PatternSyntax::RootGlob
297 316 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
298 317 {
299 318 Ok(None)
300 319 } else {
301 320 let mut entry = entry.clone();
302 321 entry.pattern = pattern;
303 322 Ok(Some(_build_single_regex(&entry)))
304 323 }
305 324 }
306 325
307 326 lazy_static! {
308 327 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
309 328 let mut m = FastHashMap::default();
310 329
311 330 m.insert(b"re".as_ref(), b"relre:".as_ref());
312 331 m.insert(b"regexp".as_ref(), b"relre:".as_ref());
313 332 m.insert(b"glob".as_ref(), b"relglob:".as_ref());
314 333 m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
315 334 m.insert(b"include".as_ref(), b"include:".as_ref());
316 335 m.insert(b"subinclude".as_ref(), b"subinclude:".as_ref());
317 336 m.insert(b"path".as_ref(), b"path:".as_ref());
318 337 m.insert(b"rootfilesin".as_ref(), b"rootfilesin:".as_ref());
319 338 m
320 339 };
321 340 }
322 341
323 342 #[derive(Debug)]
324 343 pub enum PatternFileWarning {
325 344 /// (file path, syntax bytes)
326 345 InvalidSyntax(PathBuf, Vec<u8>),
327 346 /// File path
328 347 NoSuchFile(PathBuf),
329 348 }
330 349
331 350 pub fn parse_pattern_file_contents(
332 351 lines: &[u8],
333 352 file_path: &Path,
334 353 default_syntax_override: Option<&[u8]>,
335 354 warn: bool,
336 355 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
337 356 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
338 357
339 358 #[allow(clippy::trivial_regex)]
340 359 let comment_escape_regex = Regex::new(r"\\#").unwrap();
341 360 let mut inputs: Vec<IgnorePattern> = vec![];
342 361 let mut warnings: Vec<PatternFileWarning> = vec![];
343 362
344 363 let mut current_syntax =
345 364 default_syntax_override.unwrap_or(b"relre:".as_ref());
346 365
347 366 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
348 367 let line_number = line_number + 1;
349 368
350 369 let line_buf;
351 370 if line.contains(&b'#') {
352 371 if let Some(cap) = comment_regex.captures(line) {
353 372 line = &line[..cap.get(1).unwrap().end()]
354 373 }
355 374 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
356 375 line = &line_buf;
357 376 }
358 377
359 378 let mut line = line.trim_end();
360 379
361 380 if line.is_empty() {
362 381 continue;
363 382 }
364 383
365 384 if let Some(syntax) = line.drop_prefix(b"syntax:") {
366 385 let syntax = syntax.trim();
367 386
368 387 if let Some(rel_syntax) = SYNTAXES.get(syntax) {
369 388 current_syntax = rel_syntax;
370 389 } else if warn {
371 390 warnings.push(PatternFileWarning::InvalidSyntax(
372 391 file_path.to_owned(),
373 392 syntax.to_owned(),
374 393 ));
375 394 }
376 395 continue;
377 396 }
378 397
379 398 let mut line_syntax: &[u8] = &current_syntax;
380 399
381 400 for (s, rels) in SYNTAXES.iter() {
382 401 if let Some(rest) = line.drop_prefix(rels) {
383 402 line_syntax = rels;
384 403 line = rest;
385 404 break;
386 405 }
387 406 if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) {
388 407 line_syntax = rels;
389 408 line = rest;
390 409 break;
391 410 }
392 411 }
393 412
394 413 inputs.push(IgnorePattern::new(
395 414 parse_pattern_syntax(&line_syntax).map_err(|e| match e {
396 415 PatternError::UnsupportedSyntax(syntax) => {
397 416 PatternError::UnsupportedSyntaxInFile(
398 417 syntax,
399 418 file_path.to_string_lossy().into(),
400 419 line_number,
401 420 )
402 421 }
403 422 _ => e,
404 423 })?,
405 424 &line,
406 425 file_path,
407 426 ));
408 427 }
409 428 Ok((inputs, warnings))
410 429 }
411 430
412 431 pub fn read_pattern_file(
413 432 file_path: &Path,
414 433 warn: bool,
415 434 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
416 435 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
417 436 match std::fs::read(file_path) {
418 437 Ok(contents) => {
419 438 inspect_pattern_bytes(file_path, &contents);
420 439 parse_pattern_file_contents(&contents, file_path, None, warn)
421 440 }
422 441 Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok((
423 442 vec![],
424 443 vec![PatternFileWarning::NoSuchFile(file_path.to_owned())],
425 444 )),
426 445 Err(e) => Err(e.into()),
427 446 }
428 447 }
429 448
430 449 /// Represents an entry in an "ignore" file.
431 450 #[derive(Debug, Eq, PartialEq, Clone)]
432 451 pub struct IgnorePattern {
433 452 pub syntax: PatternSyntax,
434 453 pub pattern: Vec<u8>,
435 454 pub source: PathBuf,
436 455 }
437 456
438 457 impl IgnorePattern {
439 458 pub fn new(syntax: PatternSyntax, pattern: &[u8], source: &Path) -> Self {
440 459 Self {
441 460 syntax,
442 461 pattern: pattern.to_owned(),
443 462 source: source.to_owned(),
444 463 }
445 464 }
446 465 }
447 466
448 467 pub type PatternResult<T> = Result<T, PatternError>;
449 468
450 469 /// Wrapper for `read_pattern_file` that also recursively expands `include:`
451 470 /// and `subinclude:` patterns.
452 471 ///
453 472 /// The former are expanded in place, while `PatternSyntax::ExpandedSubInclude`
454 473 /// is used for the latter to form a tree of patterns.
455 474 pub fn get_patterns_from_file(
456 475 pattern_file: &Path,
457 476 root_dir: &Path,
458 477 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
459 478 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
460 479 let (patterns, mut warnings) =
461 480 read_pattern_file(pattern_file, true, inspect_pattern_bytes)?;
462 481 let patterns = patterns
463 482 .into_iter()
464 483 .flat_map(|entry| -> PatternResult<_> {
465 484 Ok(match &entry.syntax {
466 485 PatternSyntax::Include => {
467 486 let inner_include =
468 487 root_dir.join(get_path_from_bytes(&entry.pattern));
469 488 let (inner_pats, inner_warnings) = get_patterns_from_file(
470 489 &inner_include,
471 490 root_dir,
472 491 inspect_pattern_bytes,
473 492 )?;
474 493 warnings.extend(inner_warnings);
475 494 inner_pats
476 495 }
477 496 PatternSyntax::SubInclude => {
478 497 let mut sub_include = SubInclude::new(
479 498 &root_dir,
480 499 &entry.pattern,
481 500 &entry.source,
482 501 )?;
483 502 let (inner_patterns, inner_warnings) =
484 503 get_patterns_from_file(
485 504 &sub_include.path,
486 505 &sub_include.root,
487 506 inspect_pattern_bytes,
488 507 )?;
489 508 sub_include.included_patterns = inner_patterns;
490 509 warnings.extend(inner_warnings);
491 510 vec![IgnorePattern {
492 511 syntax: PatternSyntax::ExpandedSubInclude(Box::new(
493 512 sub_include,
494 513 )),
495 514 ..entry
496 515 }]
497 516 }
498 517 _ => vec![entry],
499 518 })
500 519 })
501 520 .flatten()
502 521 .collect();
503 522
504 523 Ok((patterns, warnings))
505 524 }
506 525
507 526 /// Holds all the information needed to handle a `subinclude:` pattern.
508 527 #[derive(Debug, PartialEq, Eq, Clone)]
509 528 pub struct SubInclude {
510 529 /// Will be used for repository (hg) paths that start with this prefix.
511 530 /// It is relative to the current working directory, so comparing against
512 531 /// repository paths is painless.
513 532 pub prefix: HgPathBuf,
514 533 /// The file itself, containing the patterns
515 534 pub path: PathBuf,
516 535 /// Folder in the filesystem where this it applies
517 536 pub root: PathBuf,
518 537
519 538 pub included_patterns: Vec<IgnorePattern>,
520 539 }
521 540
522 541 impl SubInclude {
523 542 pub fn new(
524 543 root_dir: &Path,
525 544 pattern: &[u8],
526 545 source: &Path,
527 546 ) -> Result<SubInclude, HgPathError> {
528 547 let normalized_source =
529 548 normalize_path_bytes(&get_bytes_from_path(source));
530 549
531 550 let source_root = get_path_from_bytes(&normalized_source);
532 551 let source_root =
533 552 source_root.parent().unwrap_or_else(|| source_root.deref());
534 553
535 554 let path = source_root.join(get_path_from_bytes(pattern));
536 555 let new_root = path.parent().unwrap_or_else(|| path.deref());
537 556
538 557 let prefix = canonical_path(root_dir, root_dir, new_root)?;
539 558
540 559 Ok(Self {
541 560 prefix: path_to_hg_path_buf(prefix).and_then(|mut p| {
542 561 if !p.is_empty() {
543 562 p.push_byte(b'/');
544 563 }
545 564 Ok(p)
546 565 })?,
547 566 path: path.to_owned(),
548 567 root: new_root.to_owned(),
549 568 included_patterns: Vec::new(),
550 569 })
551 570 }
552 571 }
553 572
554 573 /// Separate and pre-process subincludes from other patterns for the "ignore"
555 574 /// phase.
556 575 pub fn filter_subincludes(
557 576 ignore_patterns: Vec<IgnorePattern>,
558 577 ) -> Result<(Vec<Box<SubInclude>>, Vec<IgnorePattern>), HgPathError> {
559 578 let mut subincludes = vec![];
560 579 let mut others = vec![];
561 580
562 581 for pattern in ignore_patterns {
563 582 if let PatternSyntax::ExpandedSubInclude(sub_include) = pattern.syntax
564 583 {
565 584 subincludes.push(sub_include);
566 585 } else {
567 586 others.push(pattern)
568 587 }
569 588 }
570 589 Ok((subincludes, others))
571 590 }
572 591
573 592 #[cfg(test)]
574 593 mod tests {
575 594 use super::*;
576 595 use pretty_assertions::assert_eq;
577 596
578 597 #[test]
579 598 fn escape_pattern_test() {
580 599 let untouched =
581 600 br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
582 601 assert_eq!(escape_pattern(untouched), untouched.to_vec());
583 602 // All escape codes
584 603 assert_eq!(
585 604 escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
586 605 br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
587 606 .to_vec()
588 607 );
589 608 }
590 609
591 610 #[test]
592 611 fn glob_test() {
593 612 assert_eq!(glob_to_re(br#"?"#), br#"."#);
594 613 assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
595 614 assert_eq!(glob_to_re(br#"**"#), br#".*"#);
596 615 assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
597 616 assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
598 617 assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
599 618 assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
600 619 assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
601 620 }
602 621
603 622 #[test]
604 623 fn test_parse_pattern_file_contents() {
605 624 let lines = b"syntax: glob\n*.elc";
606 625
607 626 assert_eq!(
608 627 parse_pattern_file_contents(
609 628 lines,
610 629 Path::new("file_path"),
611 630 None,
612 631 false
613 632 )
614 633 .unwrap()
615 634 .0,
616 635 vec![IgnorePattern::new(
617 636 PatternSyntax::RelGlob,
618 637 b"*.elc",
619 638 Path::new("file_path")
620 639 )],
621 640 );
622 641
623 642 let lines = b"syntax: include\nsyntax: glob";
624 643
625 644 assert_eq!(
626 645 parse_pattern_file_contents(
627 646 lines,
628 647 Path::new("file_path"),
629 648 None,
630 649 false
631 650 )
632 651 .unwrap()
633 652 .0,
634 653 vec![]
635 654 );
636 655 let lines = b"glob:**.o";
637 656 assert_eq!(
638 657 parse_pattern_file_contents(
639 658 lines,
640 659 Path::new("file_path"),
641 660 None,
642 661 false
643 662 )
644 663 .unwrap()
645 664 .0,
646 665 vec![IgnorePattern::new(
647 666 PatternSyntax::RelGlob,
648 667 b"**.o",
649 668 Path::new("file_path")
650 669 )]
651 670 );
652 671 }
653 672
654 673 #[test]
655 674 fn test_build_single_regex() {
656 675 assert_eq!(
657 676 build_single_regex(&IgnorePattern::new(
658 677 PatternSyntax::RelGlob,
659 678 b"rust/target/",
660 679 Path::new("")
661 680 ))
662 681 .unwrap(),
663 682 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
664 683 );
665 684 assert_eq!(
666 685 build_single_regex(&IgnorePattern::new(
667 686 PatternSyntax::Regexp,
668 687 br"rust/target/\d+",
669 688 Path::new("")
670 689 ))
671 690 .unwrap(),
672 691 Some(br"rust/target/\d+".to_vec()),
673 692 );
674 693 }
675 694
676 695 #[test]
677 696 fn test_build_single_regex_shortcut() {
678 697 assert_eq!(
679 698 build_single_regex(&IgnorePattern::new(
680 699 PatternSyntax::RootGlob,
681 700 b"",
682 701 Path::new("")
683 702 ))
684 703 .unwrap(),
685 704 None,
686 705 );
687 706 assert_eq!(
688 707 build_single_regex(&IgnorePattern::new(
689 708 PatternSyntax::RootGlob,
690 709 b"whatever",
691 710 Path::new("")
692 711 ))
693 712 .unwrap(),
694 713 None,
695 714 );
696 715 assert_eq!(
697 716 build_single_regex(&IgnorePattern::new(
698 717 PatternSyntax::RootGlob,
699 718 b"*.o",
700 719 Path::new("")
701 720 ))
702 721 .unwrap(),
703 722 Some(br"[^/]*\.o(?:/|$)".to_vec()),
704 723 );
705 724 }
725
726 #[test]
727 fn test_build_single_relregex() {
728 assert_eq!(
729 build_single_regex(&IgnorePattern::new(
730 PatternSyntax::RelRegexp,
731 b"^ba{2}r",
732 Path::new("")
733 ))
734 .unwrap(),
735 Some(b"^ba{2}r".to_vec()),
736 );
737 assert_eq!(
738 build_single_regex(&IgnorePattern::new(
739 PatternSyntax::RelRegexp,
740 b"ba{2}r",
741 Path::new("")
742 ))
743 .unwrap(),
744 Some(b".*ba{2}r".to_vec()),
745 );
746 assert_eq!(
747 build_single_regex(&IgnorePattern::new(
748 PatternSyntax::RelRegexp,
749 b"(?ia)ba{2}r",
750 Path::new("")
751 ))
752 .unwrap(),
753 Some(b"(?ia:.*ba{2}r)".to_vec()),
754 );
755 }
706 756 }
@@ -1,515 +1,514
1 1 #testcases dirstate-v1 dirstate-v2
2 2
3 3 #if dirstate-v2
4 4 $ cat >> $HGRCPATH << EOF
5 5 > [format]
6 6 > use-dirstate-v2=1
7 7 > [storage]
8 8 > dirstate-v2.slow-path=allow
9 9 > EOF
10 10 #endif
11 11
12 12 $ hg init ignorerepo
13 13 $ cd ignorerepo
14 14
15 15 debugignore with no hgignore should be deterministic:
16 16 $ hg debugignore
17 17 <nevermatcher>
18 18
19 19 Issue562: .hgignore requires newline at end:
20 20
21 21 $ touch foo
22 22 $ touch bar
23 23 $ touch baz
24 24 $ cat > makeignore.py <<EOF
25 25 > f = open(".hgignore", "w")
26 26 > f.write("ignore\n")
27 27 > f.write("foo\n")
28 28 > # No EOL here
29 29 > f.write("bar")
30 30 > f.close()
31 31 > EOF
32 32
33 33 $ "$PYTHON" makeignore.py
34 34
35 35 Should display baz only:
36 36
37 37 $ hg status
38 38 ? baz
39 39
40 40 $ rm foo bar baz .hgignore makeignore.py
41 41
42 42 $ touch a.o
43 43 $ touch a.c
44 44 $ touch syntax
45 45 $ mkdir dir
46 46 $ touch dir/a.o
47 47 $ touch dir/b.o
48 48 $ touch dir/c.o
49 49
50 50 $ hg add dir/a.o
51 51 $ hg commit -m 0
52 52 $ hg add dir/b.o
53 53
54 54 $ hg status
55 55 A dir/b.o
56 56 ? a.c
57 57 ? a.o
58 58 ? dir/c.o
59 59 ? syntax
60 60
61 61 $ echo "*.o" > .hgignore
62 62 $ hg status
63 63 abort: $TESTTMP/ignorerepo/.hgignore: invalid pattern (relre): *.o (glob)
64 64 [255]
65 65
66 66 Test relre with flags (issue6759)
67 67 ---------------------------------
68 68
69 69 regexp with flag is the first one
70 70
71 71 $ echo 're:(?i)\.O$' > .hgignore
72 72 $ echo 're:.hgignore' >> .hgignore
73 73 $ hg status
74 74 A dir/b.o
75 75 ? a.c
76 76 ? syntax
77 77
78 78 regex with flag is not the first one
79 79
80 80 $ echo 're:.hgignore' > .hgignore
81 81 $ echo 're:(?i)\.O$' >> .hgignore
82 82 $ hg status
83 83 A dir/b.o
84 84 ? a.c
85 85 ? syntax
86 86
87 87 flag in a pattern should affect that pattern only
88 88
89 89 $ echo 're:(?i)\.O$' > .hgignore
90 90 $ echo 're:.HGIGNORE' >> .hgignore
91 91 $ hg status
92 92 A dir/b.o
93 ? .hgignore (no-rust !)
94 ? .hgignore (rust missing-correct-output !)
93 ? .hgignore
95 94 ? a.c
96 95 ? syntax
97 96
98 97 $ echo 're:.HGIGNORE' > .hgignore
99 98 $ echo 're:(?i)\.O$' >> .hgignore
100 99 $ hg status
101 100 A dir/b.o
102 101 ? .hgignore
103 102 ? a.c
104 103 ? syntax
105 104
106 105
107 106 further testing
108 107 ---------------
109 108
110 109 $ echo 're:^(?!a).*\.o$' > .hgignore
111 110 $ hg status
112 111 A dir/b.o
113 112 ? .hgignore
114 113 ? a.c
115 114 ? a.o
116 115 ? syntax
117 116 #if rhg
118 117 $ hg status --config rhg.on-unsupported=abort
119 118 unsupported feature: Unsupported syntax regex parse error:
120 119 ^(?:^(?!a).*\.o$)
121 120 ^^^
122 121 error: look-around, including look-ahead and look-behind, is not supported
123 122 [252]
124 123 #endif
125 124
126 125 Ensure given files are relative to cwd
127 126
128 127 $ echo "dir/.*\.o" > .hgignore
129 128 $ hg status -i
130 129 I dir/c.o
131 130
132 131 $ hg debugignore dir/c.o dir/missing.o
133 132 dir/c.o is ignored
134 133 (ignore rule in $TESTTMP/ignorerepo/.hgignore, line 1: 'dir/.*\.o') (glob)
135 134 dir/missing.o is ignored
136 135 (ignore rule in $TESTTMP/ignorerepo/.hgignore, line 1: 'dir/.*\.o') (glob)
137 136 $ cd dir
138 137 $ hg debugignore c.o missing.o
139 138 c.o is ignored
140 139 (ignore rule in $TESTTMP/ignorerepo/.hgignore, line 1: 'dir/.*\.o') (glob)
141 140 missing.o is ignored
142 141 (ignore rule in $TESTTMP/ignorerepo/.hgignore, line 1: 'dir/.*\.o') (glob)
143 142
144 143 For icasefs, inexact matches also work, except for missing files
145 144
146 145 #if icasefs
147 146 $ hg debugignore c.O missing.O
148 147 c.o is ignored
149 148 (ignore rule in $TESTTMP/ignorerepo/.hgignore, line 1: 'dir/.*\.o') (glob)
150 149 missing.O is not ignored
151 150 #endif
152 151
153 152 $ cd ..
154 153
155 154 $ echo ".*\.o" > .hgignore
156 155 $ hg status
157 156 A dir/b.o
158 157 ? .hgignore
159 158 ? a.c
160 159 ? syntax
161 160
162 161 Ensure that comments work:
163 162
164 163 $ touch 'foo#bar' 'quux#' 'quu0#'
165 164 #if no-windows
166 165 $ touch 'baz\' 'baz\wat' 'ba0\#wat' 'ba1\\' 'ba1\\wat' 'quu0\'
167 166 #endif
168 167
169 168 $ cat <<'EOF' >> .hgignore
170 169 > # full-line comment
171 170 > # whitespace-only comment line
172 171 > syntax# pattern, no whitespace, then comment
173 172 > a.c # pattern, then whitespace, then comment
174 173 > baz\\# # (escaped) backslash, then comment
175 174 > ba0\\\#w # (escaped) backslash, escaped comment character, then comment
176 175 > ba1\\\\# # (escaped) backslashes, then comment
177 176 > foo\#b # escaped comment character
178 177 > quux\## escaped comment character at end of name
179 178 > EOF
180 179 $ hg status
181 180 A dir/b.o
182 181 ? .hgignore
183 182 ? quu0#
184 183 ? quu0\ (no-windows !)
185 184
186 185 $ cat <<'EOF' > .hgignore
187 186 > .*\.o
188 187 > syntax: glob
189 188 > syntax# pattern, no whitespace, then comment
190 189 > a.c # pattern, then whitespace, then comment
191 190 > baz\\#* # (escaped) backslash, then comment
192 191 > ba0\\\#w* # (escaped) backslash, escaped comment character, then comment
193 192 > ba1\\\\#* # (escaped) backslashes, then comment
194 193 > foo\#b* # escaped comment character
195 194 > quux\## escaped comment character at end of name
196 195 > quu0[\#]# escaped comment character inside [...]
197 196 > EOF
198 197 $ hg status
199 198 A dir/b.o
200 199 ? .hgignore
201 200 ? ba1\\wat (no-windows !)
202 201 ? baz\wat (no-windows !)
203 202 ? quu0\ (no-windows !)
204 203
205 204 $ rm 'foo#bar' 'quux#' 'quu0#'
206 205 #if no-windows
207 206 $ rm 'baz\' 'baz\wat' 'ba0\#wat' 'ba1\\' 'ba1\\wat' 'quu0\'
208 207 #endif
209 208
210 209 Check that '^\.' does not ignore the root directory:
211 210
212 211 $ echo "^\." > .hgignore
213 212 $ hg status
214 213 A dir/b.o
215 214 ? a.c
216 215 ? a.o
217 216 ? dir/c.o
218 217 ? syntax
219 218
220 219 Test that patterns from ui.ignore options are read:
221 220
222 221 $ echo > .hgignore
223 222 $ cat >> $HGRCPATH << EOF
224 223 > [ui]
225 224 > ignore.other = $TESTTMP/ignorerepo/.hg/testhgignore
226 225 > EOF
227 226 $ echo "glob:**.o" > .hg/testhgignore
228 227 $ hg status
229 228 A dir/b.o
230 229 ? .hgignore
231 230 ? a.c
232 231 ? syntax
233 232
234 233 empty out testhgignore
235 234 $ echo > .hg/testhgignore
236 235
237 236 Test relative ignore path (issue4473):
238 237
239 238 $ cat >> $HGRCPATH << EOF
240 239 > [ui]
241 240 > ignore.relative = .hg/testhgignorerel
242 241 > EOF
243 242 $ echo "glob:*.o" > .hg/testhgignorerel
244 243 $ cd dir
245 244 $ hg status
246 245 A dir/b.o
247 246 ? .hgignore
248 247 ? a.c
249 248 ? syntax
250 249 $ hg debugignore
251 250 <includematcher includes='.*\\.o(?:/|$)'>
252 251
253 252 $ cd ..
254 253 $ echo > .hg/testhgignorerel
255 254 $ echo "syntax: glob" > .hgignore
256 255 $ echo "re:.*\.o" >> .hgignore
257 256 $ hg status
258 257 A dir/b.o
259 258 ? .hgignore
260 259 ? a.c
261 260 ? syntax
262 261
263 262 $ echo "syntax: invalid" > .hgignore
264 263 $ hg status
265 264 $TESTTMP/ignorerepo/.hgignore: ignoring invalid syntax 'invalid'
266 265 A dir/b.o
267 266 ? .hgignore
268 267 ? a.c
269 268 ? a.o
270 269 ? dir/c.o
271 270 ? syntax
272 271
273 272 $ echo "syntax: glob" > .hgignore
274 273 $ echo "*.o" >> .hgignore
275 274 $ hg status
276 275 A dir/b.o
277 276 ? .hgignore
278 277 ? a.c
279 278 ? syntax
280 279
281 280 $ echo "relglob:syntax*" > .hgignore
282 281 $ hg status
283 282 A dir/b.o
284 283 ? .hgignore
285 284 ? a.c
286 285 ? a.o
287 286 ? dir/c.o
288 287
289 288 $ echo "relglob:*" > .hgignore
290 289 $ hg status
291 290 A dir/b.o
292 291
293 292 $ cd dir
294 293 $ hg status .
295 294 A b.o
296 295
297 296 $ hg debugignore
298 297 <includematcher includes='.*(?:/|$)'>
299 298
300 299 $ hg debugignore b.o
301 300 b.o is ignored
302 301 (ignore rule in $TESTTMP/ignorerepo/.hgignore, line 1: '*') (glob)
303 302
304 303 $ cd ..
305 304
306 305 Check patterns that match only the directory
307 306
308 307 "(fsmonitor !)" below assumes that fsmonitor is enabled with
309 308 "walk_on_invalidate = false" (default), which doesn't involve
310 309 re-walking whole repository at detection of .hgignore change.
311 310
312 311 $ echo "^dir\$" > .hgignore
313 312 $ hg status
314 313 A dir/b.o
315 314 ? .hgignore
316 315 ? a.c
317 316 ? a.o
318 317 ? dir/c.o (fsmonitor !)
319 318 ? syntax
320 319
321 320 Check recursive glob pattern matches no directories (dir/**/c.o matches dir/c.o)
322 321
323 322 $ echo "syntax: glob" > .hgignore
324 323 $ echo "dir/**/c.o" >> .hgignore
325 324 $ touch dir/c.o
326 325 $ mkdir dir/subdir
327 326 $ touch dir/subdir/c.o
328 327 $ hg status
329 328 A dir/b.o
330 329 ? .hgignore
331 330 ? a.c
332 331 ? a.o
333 332 ? syntax
334 333 $ hg debugignore a.c
335 334 a.c is not ignored
336 335 $ hg debugignore dir/c.o
337 336 dir/c.o is ignored
338 337 (ignore rule in $TESTTMP/ignorerepo/.hgignore, line 2: 'dir/**/c.o') (glob)
339 338
340 339 Check rooted globs
341 340
342 341 $ hg purge --all --config extensions.purge=
343 342 $ echo "syntax: rootglob" > .hgignore
344 343 $ echo "a/*.ext" >> .hgignore
345 344 $ for p in a b/a aa; do mkdir -p $p; touch $p/b.ext; done
346 345 $ hg status -A 'set:**.ext'
347 346 ? aa/b.ext
348 347 ? b/a/b.ext
349 348 I a/b.ext
350 349
351 350 Check using 'include:' in ignore file
352 351
353 352 $ hg purge --all --config extensions.purge=
354 353 $ touch foo.included
355 354
356 355 $ echo ".*.included" > otherignore
357 356 $ hg status -I "include:otherignore"
358 357 ? foo.included
359 358
360 359 $ echo "include:otherignore" >> .hgignore
361 360 $ hg status
362 361 A dir/b.o
363 362 ? .hgignore
364 363 ? otherignore
365 364
366 365 Check recursive uses of 'include:'
367 366
368 367 $ echo "include:nested/ignore" >> otherignore
369 368 $ mkdir nested nested/more
370 369 $ echo "glob:*ignore" > nested/ignore
371 370 $ echo "rootglob:a" >> nested/ignore
372 371 $ touch a nested/a nested/more/a
373 372 $ hg status
374 373 A dir/b.o
375 374 ? nested/a
376 375 ? nested/more/a
377 376 $ rm a nested/a nested/more/a
378 377
379 378 $ cp otherignore goodignore
380 379 $ echo "include:badignore" >> otherignore
381 380 $ hg status
382 381 skipping unreadable pattern file 'badignore': $ENOENT$
383 382 A dir/b.o
384 383
385 384 $ mv goodignore otherignore
386 385
387 386 Check using 'include:' while in a non-root directory
388 387
389 388 $ cd ..
390 389 $ hg -R ignorerepo status
391 390 A dir/b.o
392 391 $ cd ignorerepo
393 392
394 393 Check including subincludes
395 394
396 395 $ hg revert -q --all
397 396 $ hg purge --all --config extensions.purge=
398 397 $ echo ".hgignore" > .hgignore
399 398 $ mkdir dir1 dir2
400 399 $ touch dir1/file1 dir1/file2 dir2/file1 dir2/file2
401 400 $ echo "subinclude:dir2/.hgignore" >> .hgignore
402 401 $ echo "glob:file*2" > dir2/.hgignore
403 402 $ hg status
404 403 ? dir1/file1
405 404 ? dir1/file2
406 405 ? dir2/file1
407 406
408 407 Check including subincludes with other patterns
409 408
410 409 $ echo "subinclude:dir1/.hgignore" >> .hgignore
411 410
412 411 $ mkdir dir1/subdir
413 412 $ touch dir1/subdir/file1
414 413 $ echo "rootglob:f?le1" > dir1/.hgignore
415 414 $ hg status
416 415 ? dir1/file2
417 416 ? dir1/subdir/file1
418 417 ? dir2/file1
419 418 $ rm dir1/subdir/file1
420 419
421 420 $ echo "regexp:f.le1" > dir1/.hgignore
422 421 $ hg status
423 422 ? dir1/file2
424 423 ? dir2/file1
425 424
426 425 Check multiple levels of sub-ignores
427 426
428 427 $ touch dir1/subdir/subfile1 dir1/subdir/subfile3 dir1/subdir/subfile4
429 428 $ echo "subinclude:subdir/.hgignore" >> dir1/.hgignore
430 429 $ echo "glob:subfil*3" >> dir1/subdir/.hgignore
431 430
432 431 $ hg status
433 432 ? dir1/file2
434 433 ? dir1/subdir/subfile4
435 434 ? dir2/file1
436 435
437 436 Check include subignore at the same level
438 437
439 438 $ mv dir1/subdir/.hgignore dir1/.hgignoretwo
440 439 $ echo "regexp:f.le1" > dir1/.hgignore
441 440 $ echo "subinclude:.hgignoretwo" >> dir1/.hgignore
442 441 $ echo "glob:file*2" > dir1/.hgignoretwo
443 442
444 443 $ hg status | grep file2
445 444 [1]
446 445 $ hg debugignore dir1/file2
447 446 dir1/file2 is ignored
448 447 (ignore rule in dir2/.hgignore, line 1: 'file*2')
449 448
450 449 #if windows
451 450
452 451 Windows paths are accepted on input
453 452
454 453 $ rm dir1/.hgignore
455 454 $ echo "dir1/file*" >> .hgignore
456 455 $ hg debugignore "dir1\file2"
457 456 dir1/file2 is ignored
458 457 (ignore rule in $TESTTMP\ignorerepo\.hgignore, line 4: 'dir1/file*')
459 458 $ hg up -qC .
460 459
461 460 #endif
462 461
463 462 #if dirstate-v2 rust
464 463
465 464 Check the hash of ignore patterns written in the dirstate
466 465 This is an optimization that is only relevant when using the Rust extensions
467 466
468 467 $ cat_filename_and_hash () {
469 468 > for i in "$@"; do
470 469 > printf "$i "
471 470 > cat "$i" | "$TESTDIR"/f --raw-sha1 | sed 's/^raw-sha1=//'
472 471 > done
473 472 > }
474 473 $ hg status > /dev/null
475 474 $ cat_filename_and_hash .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1
476 475 sha1=c0beb296395d48ced8e14f39009c4ea6e409bfe6
477 476 $ hg debugstate --docket | grep ignore
478 477 ignore pattern hash: c0beb296395d48ced8e14f39009c4ea6e409bfe6
479 478
480 479 $ echo rel > .hg/testhgignorerel
481 480 $ hg status > /dev/null
482 481 $ cat_filename_and_hash .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1
483 482 sha1=b8e63d3428ec38abc68baa27631516d5ec46b7fa
484 483 $ hg debugstate --docket | grep ignore
485 484 ignore pattern hash: b8e63d3428ec38abc68baa27631516d5ec46b7fa
486 485 $ cd ..
487 486
488 487 Check that the hash depends on the source of the hgignore patterns
489 488 (otherwise the context is lost and things like subinclude are cached improperly)
490 489
491 490 $ hg init ignore-collision
492 491 $ cd ignore-collision
493 492 $ echo > .hg/testhgignorerel
494 493
495 494 $ mkdir dir1/ dir1/subdir
496 495 $ touch dir1/subdir/f dir1/subdir/ignored1
497 496 $ echo 'ignored1' > dir1/.hgignore
498 497
499 498 $ mkdir dir2 dir2/subdir
500 499 $ touch dir2/subdir/f dir2/subdir/ignored2
501 500 $ echo 'ignored2' > dir2/.hgignore
502 501 $ echo 'subinclude:dir2/.hgignore' >> .hgignore
503 502 $ echo 'subinclude:dir1/.hgignore' >> .hgignore
504 503
505 504 $ hg commit -Aqm_
506 505
507 506 $ > dir1/.hgignore
508 507 $ echo 'ignored' > dir2/.hgignore
509 508 $ echo 'ignored1' >> dir2/.hgignore
510 509 $ hg status
511 510 M dir1/.hgignore
512 511 M dir2/.hgignore
513 512 ? dir1/subdir/ignored1
514 513
515 514 #endif
General Comments 0
You need to be logged in to leave comments. Login now