##// END OF EJS Templates
rust-filepatterns: export glob_to_re function...
Georges Racinet -
r52363:406b413e stable
parent child Browse files
Show More
@@ -1,874 +1,874 b''
1 1 // filepatterns.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Handling of Mercurial-specific patterns.
9 9
10 10 use crate::{
11 11 utils::{
12 12 files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
13 13 hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
14 14 SliceExt,
15 15 },
16 16 FastHashMap, PatternError,
17 17 };
18 18 use lazy_static::lazy_static;
19 19 use regex::bytes::{NoExpand, Regex};
20 20 use std::ops::Deref;
21 21 use std::path::{Path, PathBuf};
22 22 use std::vec::Vec;
23 23
24 24 lazy_static! {
25 25 static ref RE_ESCAPE: Vec<Vec<u8>> = {
26 26 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
27 27 let to_escape = b"()[]{}?*+-|^$\\.&~#\t\n\r\x0b\x0c";
28 28 for byte in to_escape {
29 29 v[*byte as usize].insert(0, b'\\');
30 30 }
31 31 v
32 32 };
33 33 }
34 34
35 35 /// These are matched in order
36 36 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
37 37 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
38 38
39 39 #[derive(Debug, Clone, PartialEq, Eq)]
40 40 pub enum PatternSyntax {
41 41 /// A regular expression
42 42 Regexp,
43 43 /// Glob that matches at the front of the path
44 44 RootGlob,
45 45 /// Glob that matches at any suffix of the path (still anchored at
46 46 /// slashes)
47 47 Glob,
48 48 /// a path relative to repository root, which is matched recursively
49 49 Path,
50 50 /// a single exact path relative to repository root
51 51 FilePath,
52 52 /// A path relative to cwd
53 53 RelPath,
54 54 /// an unrooted glob (*.rs matches Rust files in all dirs)
55 55 RelGlob,
56 56 /// A regexp that needn't match the start of a name
57 57 RelRegexp,
58 58 /// A path relative to repository root, which is matched non-recursively
59 59 /// (will not match subdirectories)
60 60 RootFiles,
61 61 /// A file of patterns to read and include
62 62 Include,
63 63 /// A file of patterns to match against files under the same directory
64 64 SubInclude,
65 65 /// SubInclude with the result of parsing the included file
66 66 ///
67 67 /// Note: there is no ExpandedInclude because that expansion can be done
68 68 /// in place by replacing the Include pattern by the included patterns.
69 69 /// SubInclude requires more handling.
70 70 ///
71 71 /// Note: `Box` is used to minimize size impact on other enum variants
72 72 ExpandedSubInclude(Box<SubInclude>),
73 73 }
74 74
75 75 /// Transforms a glob pattern into a regex
76 fn glob_to_re(pat: &[u8]) -> Vec<u8> {
76 pub fn glob_to_re(pat: &[u8]) -> Vec<u8> {
77 77 let mut input = pat;
78 78 let mut res: Vec<u8> = vec![];
79 79 let mut group_depth = 0;
80 80
81 81 while let Some((c, rest)) = input.split_first() {
82 82 input = rest;
83 83
84 84 match c {
85 85 b'*' => {
86 86 for (source, repl) in GLOB_REPLACEMENTS {
87 87 if let Some(rest) = input.drop_prefix(source) {
88 88 input = rest;
89 89 res.extend(*repl);
90 90 break;
91 91 }
92 92 }
93 93 }
94 94 b'?' => res.extend(b"."),
95 95 b'[' => {
96 96 match input.iter().skip(1).position(|b| *b == b']') {
97 97 None => res.extend(b"\\["),
98 98 Some(end) => {
99 99 // Account for the one we skipped
100 100 let end = end + 1;
101 101
102 102 res.extend(b"[");
103 103
104 104 for (i, b) in input[..end].iter().enumerate() {
105 105 if *b == b'!' && i == 0 {
106 106 res.extend(b"^")
107 107 } else if *b == b'^' && i == 0 {
108 108 res.extend(b"\\^")
109 109 } else if *b == b'\\' {
110 110 res.extend(b"\\\\")
111 111 } else {
112 112 res.push(*b)
113 113 }
114 114 }
115 115 res.extend(b"]");
116 116 input = &input[end + 1..];
117 117 }
118 118 }
119 119 }
120 120 b'{' => {
121 121 group_depth += 1;
122 122 res.extend(b"(?:")
123 123 }
124 124 b'}' if group_depth > 0 => {
125 125 group_depth -= 1;
126 126 res.extend(b")");
127 127 }
128 128 b',' if group_depth > 0 => res.extend(b"|"),
129 129 b'\\' => {
130 130 let c = {
131 131 if let Some((c, rest)) = input.split_first() {
132 132 input = rest;
133 133 c
134 134 } else {
135 135 c
136 136 }
137 137 };
138 138 res.extend(&RE_ESCAPE[*c as usize])
139 139 }
140 140 _ => res.extend(&RE_ESCAPE[*c as usize]),
141 141 }
142 142 }
143 143 res
144 144 }
145 145
146 146 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
147 147 pattern
148 148 .iter()
149 149 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
150 150 .collect()
151 151 }
152 152
153 153 pub fn parse_pattern_syntax(
154 154 kind: &[u8],
155 155 ) -> Result<PatternSyntax, PatternError> {
156 156 match kind {
157 157 b"re:" => Ok(PatternSyntax::Regexp),
158 158 b"path:" => Ok(PatternSyntax::Path),
159 159 b"filepath:" => Ok(PatternSyntax::FilePath),
160 160 b"relpath:" => Ok(PatternSyntax::RelPath),
161 161 b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
162 162 b"relglob:" => Ok(PatternSyntax::RelGlob),
163 163 b"relre:" => Ok(PatternSyntax::RelRegexp),
164 164 b"glob:" => Ok(PatternSyntax::Glob),
165 165 b"rootglob:" => Ok(PatternSyntax::RootGlob),
166 166 b"include:" => Ok(PatternSyntax::Include),
167 167 b"subinclude:" => Ok(PatternSyntax::SubInclude),
168 168 _ => Err(PatternError::UnsupportedSyntax(
169 169 String::from_utf8_lossy(kind).to_string(),
170 170 )),
171 171 }
172 172 }
173 173
174 174 lazy_static! {
175 175 static ref FLAG_RE: Regex = Regex::new(r"^\(\?[aiLmsux]+\)").unwrap();
176 176 }
177 177
178 178 /// Builds the regex that corresponds to the given pattern.
179 179 /// If within a `syntax: regexp` context, returns the pattern,
180 180 /// otherwise, returns the corresponding regex.
181 181 fn _build_single_regex(entry: &IgnorePattern, glob_suffix: &[u8]) -> Vec<u8> {
182 182 let IgnorePattern {
183 183 syntax, pattern, ..
184 184 } = entry;
185 185 if pattern.is_empty() {
186 186 return vec![];
187 187 }
188 188 match syntax {
189 189 PatternSyntax::Regexp => pattern.to_owned(),
190 190 PatternSyntax::RelRegexp => {
191 191 // The `regex` crate accepts `**` while `re2` and Python's `re`
192 192 // do not. Checking for `*` correctly triggers the same error all
193 193 // engines.
194 194 if pattern[0] == b'^'
195 195 || pattern[0] == b'*'
196 196 || pattern.starts_with(b".*")
197 197 {
198 198 return pattern.to_owned();
199 199 }
200 200 match FLAG_RE.find(pattern) {
201 201 Some(mat) => {
202 202 let s = mat.start();
203 203 let e = mat.end();
204 204 [
205 205 &b"(?"[..],
206 206 &pattern[s + 2..e - 1],
207 207 &b":"[..],
208 208 if pattern[e] == b'^'
209 209 || pattern[e] == b'*'
210 210 || pattern[e..].starts_with(b".*")
211 211 {
212 212 &b""[..]
213 213 } else {
214 214 &b".*"[..]
215 215 },
216 216 &pattern[e..],
217 217 &b")"[..],
218 218 ]
219 219 .concat()
220 220 }
221 221 None => [&b".*"[..], pattern].concat(),
222 222 }
223 223 }
224 224 PatternSyntax::Path | PatternSyntax::RelPath => {
225 225 if pattern == b"." {
226 226 return vec![];
227 227 }
228 228 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
229 229 }
230 230 PatternSyntax::RootFiles => {
231 231 let mut res = if pattern == b"." {
232 232 vec![]
233 233 } else {
234 234 // Pattern is a directory name.
235 235 [escape_pattern(pattern).as_slice(), b"/"].concat()
236 236 };
237 237
238 238 // Anything after the pattern must be a non-directory.
239 239 res.extend(b"[^/]+$");
240 240 res
241 241 }
242 242 PatternSyntax::RelGlob => {
243 243 let glob_re = glob_to_re(pattern);
244 244 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
245 245 [b".*", rest, glob_suffix].concat()
246 246 } else {
247 247 [b"(?:.*/)?", glob_re.as_slice(), glob_suffix].concat()
248 248 }
249 249 }
250 250 PatternSyntax::Glob | PatternSyntax::RootGlob => {
251 251 [glob_to_re(pattern).as_slice(), glob_suffix].concat()
252 252 }
253 253 PatternSyntax::Include
254 254 | PatternSyntax::SubInclude
255 255 | PatternSyntax::ExpandedSubInclude(_)
256 256 | PatternSyntax::FilePath => unreachable!(),
257 257 }
258 258 }
259 259
260 260 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
261 261 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
262 262
263 263 /// TODO support other platforms
264 264 #[cfg(unix)]
265 265 pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
266 266 if bytes.is_empty() {
267 267 return b".".to_vec();
268 268 }
269 269 let sep = b'/';
270 270
271 271 let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
272 272 if initial_slashes > 2 {
273 273 // POSIX allows one or two initial slashes, but treats three or more
274 274 // as single slash.
275 275 initial_slashes = 1;
276 276 }
277 277 let components = bytes
278 278 .split(|b| *b == sep)
279 279 .filter(|c| !(c.is_empty() || c == b"."))
280 280 .fold(vec![], |mut acc, component| {
281 281 if component != b".."
282 282 || (initial_slashes == 0 && acc.is_empty())
283 283 || (!acc.is_empty() && acc[acc.len() - 1] == b"..")
284 284 {
285 285 acc.push(component)
286 286 } else if !acc.is_empty() {
287 287 acc.pop();
288 288 }
289 289 acc
290 290 });
291 291 let mut new_bytes = components.join(&sep);
292 292
293 293 if initial_slashes > 0 {
294 294 let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
295 295 buf.extend(new_bytes);
296 296 new_bytes = buf;
297 297 }
298 298 if new_bytes.is_empty() {
299 299 b".".to_vec()
300 300 } else {
301 301 new_bytes
302 302 }
303 303 }
304 304
305 305 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
306 306 /// that don't need to be transformed into a regex.
307 307 pub fn build_single_regex(
308 308 entry: &IgnorePattern,
309 309 glob_suffix: &[u8],
310 310 ) -> Result<Option<Vec<u8>>, PatternError> {
311 311 let IgnorePattern {
312 312 pattern, syntax, ..
313 313 } = entry;
314 314 let pattern = match syntax {
315 315 PatternSyntax::RootGlob
316 316 | PatternSyntax::Path
317 317 | PatternSyntax::RelGlob
318 318 | PatternSyntax::RelPath
319 319 | PatternSyntax::RootFiles => normalize_path_bytes(pattern),
320 320 PatternSyntax::Include | PatternSyntax::SubInclude => {
321 321 return Err(PatternError::NonRegexPattern(entry.clone()))
322 322 }
323 323 _ => pattern.to_owned(),
324 324 };
325 325 let is_simple_rootglob = *syntax == PatternSyntax::RootGlob
326 326 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b));
327 327 if is_simple_rootglob || syntax == &PatternSyntax::FilePath {
328 328 Ok(None)
329 329 } else {
330 330 let mut entry = entry.clone();
331 331 entry.pattern = pattern;
332 332 Ok(Some(_build_single_regex(&entry, glob_suffix)))
333 333 }
334 334 }
335 335
336 336 lazy_static! {
337 337 static ref SYNTAXES: FastHashMap<&'static [u8], PatternSyntax> = {
338 338 let mut m = FastHashMap::default();
339 339
340 340 m.insert(b"re:".as_ref(), PatternSyntax::Regexp);
341 341 m.insert(b"regexp:".as_ref(), PatternSyntax::Regexp);
342 342 m.insert(b"path:".as_ref(), PatternSyntax::Path);
343 343 m.insert(b"filepath:".as_ref(), PatternSyntax::FilePath);
344 344 m.insert(b"relpath:".as_ref(), PatternSyntax::RelPath);
345 345 m.insert(b"rootfilesin:".as_ref(), PatternSyntax::RootFiles);
346 346 m.insert(b"relglob:".as_ref(), PatternSyntax::RelGlob);
347 347 m.insert(b"relre:".as_ref(), PatternSyntax::RelRegexp);
348 348 m.insert(b"glob:".as_ref(), PatternSyntax::Glob);
349 349 m.insert(b"rootglob:".as_ref(), PatternSyntax::RootGlob);
350 350 m.insert(b"include:".as_ref(), PatternSyntax::Include);
351 351 m.insert(b"subinclude:".as_ref(), PatternSyntax::SubInclude);
352 352
353 353 m
354 354 };
355 355 }
356 356
357 357 #[derive(Debug)]
358 358 pub enum PatternFileWarning {
359 359 /// (file path, syntax bytes)
360 360 InvalidSyntax(PathBuf, Vec<u8>),
361 361 /// File path
362 362 NoSuchFile(PathBuf),
363 363 }
364 364
365 365 pub fn parse_one_pattern(
366 366 pattern: &[u8],
367 367 source: &Path,
368 368 default: PatternSyntax,
369 369 normalize: bool,
370 370 ) -> IgnorePattern {
371 371 let mut pattern_bytes: &[u8] = pattern;
372 372 let mut syntax = default;
373 373
374 374 for (s, val) in SYNTAXES.iter() {
375 375 if let Some(rest) = pattern_bytes.drop_prefix(s) {
376 376 syntax = val.clone();
377 377 pattern_bytes = rest;
378 378 break;
379 379 }
380 380 }
381 381
382 382 let pattern = match syntax {
383 383 PatternSyntax::RootGlob
384 384 | PatternSyntax::Path
385 385 | PatternSyntax::Glob
386 386 | PatternSyntax::RelGlob
387 387 | PatternSyntax::RelPath
388 388 | PatternSyntax::RootFiles
389 389 if normalize =>
390 390 {
391 391 normalize_path_bytes(pattern_bytes)
392 392 }
393 393 _ => pattern_bytes.to_vec(),
394 394 };
395 395
396 396 IgnorePattern {
397 397 syntax,
398 398 pattern,
399 399 source: source.to_owned(),
400 400 }
401 401 }
402 402
403 403 pub fn parse_pattern_file_contents(
404 404 lines: &[u8],
405 405 file_path: &Path,
406 406 default_syntax_override: Option<PatternSyntax>,
407 407 warn: bool,
408 408 relativize: bool,
409 409 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
410 410 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
411 411
412 412 #[allow(clippy::trivial_regex)]
413 413 let comment_escape_regex = Regex::new(r"\\#").unwrap();
414 414 let mut inputs: Vec<IgnorePattern> = vec![];
415 415 let mut warnings: Vec<PatternFileWarning> = vec![];
416 416
417 417 let mut current_syntax =
418 418 default_syntax_override.unwrap_or(PatternSyntax::RelRegexp);
419 419
420 420 for mut line in lines.split(|c| *c == b'\n') {
421 421 let line_buf;
422 422 if line.contains(&b'#') {
423 423 if let Some(cap) = comment_regex.captures(line) {
424 424 line = &line[..cap.get(1).unwrap().end()]
425 425 }
426 426 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
427 427 line = &line_buf;
428 428 }
429 429
430 430 let line = line.trim_end();
431 431
432 432 if line.is_empty() {
433 433 continue;
434 434 }
435 435
436 436 if let Some(syntax) = line.drop_prefix(b"syntax:") {
437 437 let syntax = syntax.trim();
438 438
439 439 if let Some(parsed) =
440 440 SYNTAXES.get([syntax, &b":"[..]].concat().as_slice())
441 441 {
442 442 current_syntax = parsed.clone();
443 443 } else if warn {
444 444 warnings.push(PatternFileWarning::InvalidSyntax(
445 445 file_path.to_owned(),
446 446 syntax.to_owned(),
447 447 ));
448 448 }
449 449 } else {
450 450 let pattern = parse_one_pattern(
451 451 line,
452 452 file_path,
453 453 current_syntax.clone(),
454 454 false,
455 455 );
456 456 inputs.push(if relativize {
457 457 pattern.to_relative()
458 458 } else {
459 459 pattern
460 460 })
461 461 }
462 462 }
463 463 Ok((inputs, warnings))
464 464 }
465 465
466 466 pub fn parse_pattern_args(
467 467 patterns: Vec<Vec<u8>>,
468 468 cwd: &Path,
469 469 root: &Path,
470 470 ) -> Result<Vec<IgnorePattern>, HgPathError> {
471 471 let mut ignore_patterns: Vec<IgnorePattern> = Vec::new();
472 472 for pattern in patterns {
473 473 let pattern = parse_one_pattern(
474 474 &pattern,
475 475 Path::new("<args>"),
476 476 PatternSyntax::RelPath,
477 477 true,
478 478 );
479 479 match pattern.syntax {
480 480 PatternSyntax::RelGlob | PatternSyntax::RelPath => {
481 481 let name = get_path_from_bytes(&pattern.pattern);
482 482 let canon = canonical_path(root, cwd, name)?;
483 483 ignore_patterns.push(IgnorePattern {
484 484 syntax: pattern.syntax,
485 485 pattern: get_bytes_from_path(canon),
486 486 source: pattern.source,
487 487 })
488 488 }
489 489 _ => ignore_patterns.push(pattern.to_owned()),
490 490 };
491 491 }
492 492 Ok(ignore_patterns)
493 493 }
494 494
495 495 pub fn read_pattern_file(
496 496 file_path: &Path,
497 497 warn: bool,
498 498 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
499 499 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
500 500 match std::fs::read(file_path) {
501 501 Ok(contents) => {
502 502 inspect_pattern_bytes(file_path, &contents);
503 503 parse_pattern_file_contents(&contents, file_path, None, warn, true)
504 504 }
505 505 Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok((
506 506 vec![],
507 507 vec![PatternFileWarning::NoSuchFile(file_path.to_owned())],
508 508 )),
509 509 Err(e) => Err(e.into()),
510 510 }
511 511 }
512 512
513 513 /// Represents an entry in an "ignore" file.
514 514 #[derive(Debug, Eq, PartialEq, Clone)]
515 515 pub struct IgnorePattern {
516 516 pub syntax: PatternSyntax,
517 517 pub pattern: Vec<u8>,
518 518 pub source: PathBuf,
519 519 }
520 520
521 521 impl IgnorePattern {
522 522 pub fn new(syntax: PatternSyntax, pattern: &[u8], source: &Path) -> Self {
523 523 Self {
524 524 syntax,
525 525 pattern: pattern.to_owned(),
526 526 source: source.to_owned(),
527 527 }
528 528 }
529 529
530 530 pub fn to_relative(self) -> Self {
531 531 let Self {
532 532 syntax,
533 533 pattern,
534 534 source,
535 535 } = self;
536 536 Self {
537 537 syntax: match syntax {
538 538 PatternSyntax::Regexp => PatternSyntax::RelRegexp,
539 539 PatternSyntax::Glob => PatternSyntax::RelGlob,
540 540 x => x,
541 541 },
542 542 pattern,
543 543 source,
544 544 }
545 545 }
546 546 }
547 547
548 548 pub type PatternResult<T> = Result<T, PatternError>;
549 549
550 550 /// Wrapper for `read_pattern_file` that also recursively expands `include:`
551 551 /// and `subinclude:` patterns.
552 552 ///
553 553 /// The former are expanded in place, while `PatternSyntax::ExpandedSubInclude`
554 554 /// is used for the latter to form a tree of patterns.
555 555 pub fn get_patterns_from_file(
556 556 pattern_file: &Path,
557 557 root_dir: &Path,
558 558 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
559 559 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
560 560 let (patterns, mut warnings) =
561 561 read_pattern_file(pattern_file, true, inspect_pattern_bytes)?;
562 562 let patterns = patterns
563 563 .into_iter()
564 564 .flat_map(|entry| -> PatternResult<_> {
565 565 Ok(match &entry.syntax {
566 566 PatternSyntax::Include => {
567 567 let inner_include =
568 568 root_dir.join(get_path_from_bytes(&entry.pattern));
569 569 let (inner_pats, inner_warnings) = get_patterns_from_file(
570 570 &inner_include,
571 571 root_dir,
572 572 inspect_pattern_bytes,
573 573 )?;
574 574 warnings.extend(inner_warnings);
575 575 inner_pats
576 576 }
577 577 PatternSyntax::SubInclude => {
578 578 let mut sub_include = SubInclude::new(
579 579 root_dir,
580 580 &entry.pattern,
581 581 &entry.source,
582 582 )?;
583 583 let (inner_patterns, inner_warnings) =
584 584 get_patterns_from_file(
585 585 &sub_include.path,
586 586 &sub_include.root,
587 587 inspect_pattern_bytes,
588 588 )?;
589 589 sub_include.included_patterns = inner_patterns;
590 590 warnings.extend(inner_warnings);
591 591 vec![IgnorePattern {
592 592 syntax: PatternSyntax::ExpandedSubInclude(Box::new(
593 593 sub_include,
594 594 )),
595 595 ..entry
596 596 }]
597 597 }
598 598 _ => vec![entry],
599 599 })
600 600 })
601 601 .flatten()
602 602 .collect();
603 603
604 604 Ok((patterns, warnings))
605 605 }
606 606
607 607 /// Holds all the information needed to handle a `subinclude:` pattern.
608 608 #[derive(Debug, PartialEq, Eq, Clone)]
609 609 pub struct SubInclude {
610 610 /// Will be used for repository (hg) paths that start with this prefix.
611 611 /// It is relative to the current working directory, so comparing against
612 612 /// repository paths is painless.
613 613 pub prefix: HgPathBuf,
614 614 /// The file itself, containing the patterns
615 615 pub path: PathBuf,
616 616 /// Folder in the filesystem where this it applies
617 617 pub root: PathBuf,
618 618
619 619 pub included_patterns: Vec<IgnorePattern>,
620 620 }
621 621
622 622 impl SubInclude {
623 623 pub fn new(
624 624 root_dir: &Path,
625 625 pattern: &[u8],
626 626 source: &Path,
627 627 ) -> Result<SubInclude, HgPathError> {
628 628 let normalized_source =
629 629 normalize_path_bytes(&get_bytes_from_path(source));
630 630
631 631 let source_root = get_path_from_bytes(&normalized_source);
632 632 let source_root = source_root.parent().unwrap_or(source_root);
633 633
634 634 let path = source_root.join(get_path_from_bytes(pattern));
635 635 let new_root = path.parent().unwrap_or_else(|| path.deref());
636 636
637 637 let prefix = canonical_path(root_dir, root_dir, new_root)?;
638 638
639 639 Ok(Self {
640 640 prefix: path_to_hg_path_buf(prefix).map(|mut p| {
641 641 if !p.is_empty() {
642 642 p.push_byte(b'/');
643 643 }
644 644 p
645 645 })?,
646 646 path: path.to_owned(),
647 647 root: new_root.to_owned(),
648 648 included_patterns: Vec::new(),
649 649 })
650 650 }
651 651 }
652 652
653 653 /// Separate and pre-process subincludes from other patterns for the "ignore"
654 654 /// phase.
655 655 pub fn filter_subincludes(
656 656 ignore_patterns: Vec<IgnorePattern>,
657 657 ) -> Result<(Vec<SubInclude>, Vec<IgnorePattern>), HgPathError> {
658 658 let mut subincludes = vec![];
659 659 let mut others = vec![];
660 660
661 661 for pattern in ignore_patterns {
662 662 if let PatternSyntax::ExpandedSubInclude(sub_include) = pattern.syntax
663 663 {
664 664 subincludes.push(*sub_include);
665 665 } else {
666 666 others.push(pattern)
667 667 }
668 668 }
669 669 Ok((subincludes, others))
670 670 }
671 671
672 672 #[cfg(test)]
673 673 mod tests {
674 674 use super::*;
675 675 use pretty_assertions::assert_eq;
676 676
677 677 #[test]
678 678 fn escape_pattern_test() {
679 679 let untouched =
680 680 br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
681 681 assert_eq!(escape_pattern(untouched), untouched.to_vec());
682 682 // All escape codes
683 683 assert_eq!(
684 684 escape_pattern(br"()[]{}?*+-|^$\\.&~#\t\n\r\v\f"),
685 685 br"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\\t\\n\\r\\v\\f".to_vec()
686 686 );
687 687 }
688 688
689 689 #[test]
690 690 fn glob_test() {
691 691 assert_eq!(glob_to_re(br"?"), br".");
692 692 assert_eq!(glob_to_re(br"*"), br"[^/]*");
693 693 assert_eq!(glob_to_re(br"**"), br".*");
694 694 assert_eq!(glob_to_re(br"**/a"), br"(?:.*/)?a");
695 695 assert_eq!(glob_to_re(br"a/**/b"), br"a/(?:.*/)?b");
696 696 assert_eq!(glob_to_re(br"[a*?!^][^b][!c]"), br"[a*?!^][\^b][^c]");
697 697 assert_eq!(glob_to_re(br"{a,b}"), br"(?:a|b)");
698 698 assert_eq!(glob_to_re(br".\*\?"), br"\.\*\?");
699 699 }
700 700
701 701 #[test]
702 702 fn test_parse_pattern_file_contents() {
703 703 let lines = b"syntax: glob\n*.elc";
704 704
705 705 assert_eq!(
706 706 parse_pattern_file_contents(
707 707 lines,
708 708 Path::new("file_path"),
709 709 None,
710 710 false,
711 711 true,
712 712 )
713 713 .unwrap()
714 714 .0,
715 715 vec![IgnorePattern::new(
716 716 PatternSyntax::RelGlob,
717 717 b"*.elc",
718 718 Path::new("file_path")
719 719 )],
720 720 );
721 721
722 722 let lines = b"syntax: include\nsyntax: glob";
723 723
724 724 assert_eq!(
725 725 parse_pattern_file_contents(
726 726 lines,
727 727 Path::new("file_path"),
728 728 None,
729 729 false,
730 730 true,
731 731 )
732 732 .unwrap()
733 733 .0,
734 734 vec![]
735 735 );
736 736 let lines = b"glob:**.o";
737 737 assert_eq!(
738 738 parse_pattern_file_contents(
739 739 lines,
740 740 Path::new("file_path"),
741 741 None,
742 742 false,
743 743 true,
744 744 )
745 745 .unwrap()
746 746 .0,
747 747 vec![IgnorePattern::new(
748 748 PatternSyntax::RelGlob,
749 749 b"**.o",
750 750 Path::new("file_path")
751 751 )]
752 752 );
753 753 }
754 754
755 755 #[test]
756 756 fn test_build_single_regex() {
757 757 assert_eq!(
758 758 build_single_regex(
759 759 &IgnorePattern::new(
760 760 PatternSyntax::RelGlob,
761 761 b"rust/target/",
762 762 Path::new("")
763 763 ),
764 764 b"(?:/|$)"
765 765 )
766 766 .unwrap(),
767 767 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
768 768 );
769 769 assert_eq!(
770 770 build_single_regex(
771 771 &IgnorePattern::new(
772 772 PatternSyntax::Regexp,
773 773 br"rust/target/\d+",
774 774 Path::new("")
775 775 ),
776 776 b"(?:/|$)"
777 777 )
778 778 .unwrap(),
779 779 Some(br"rust/target/\d+".to_vec()),
780 780 );
781 781 }
782 782
783 783 #[test]
784 784 fn test_build_single_regex_shortcut() {
785 785 assert_eq!(
786 786 build_single_regex(
787 787 &IgnorePattern::new(
788 788 PatternSyntax::RootGlob,
789 789 b"",
790 790 Path::new("")
791 791 ),
792 792 b"(?:/|$)"
793 793 )
794 794 .unwrap(),
795 795 None,
796 796 );
797 797 assert_eq!(
798 798 build_single_regex(
799 799 &IgnorePattern::new(
800 800 PatternSyntax::RootGlob,
801 801 b"whatever",
802 802 Path::new("")
803 803 ),
804 804 b"(?:/|$)"
805 805 )
806 806 .unwrap(),
807 807 None,
808 808 );
809 809 assert_eq!(
810 810 build_single_regex(
811 811 &IgnorePattern::new(
812 812 PatternSyntax::RootGlob,
813 813 b"*.o",
814 814 Path::new("")
815 815 ),
816 816 b"(?:/|$)"
817 817 )
818 818 .unwrap(),
819 819 Some(br"[^/]*\.o(?:/|$)".to_vec()),
820 820 );
821 821 }
822 822
823 823 #[test]
824 824 fn test_build_single_relregex() {
825 825 assert_eq!(
826 826 build_single_regex(
827 827 &IgnorePattern::new(
828 828 PatternSyntax::RelRegexp,
829 829 b"^ba{2}r",
830 830 Path::new("")
831 831 ),
832 832 b"(?:/|$)"
833 833 )
834 834 .unwrap(),
835 835 Some(b"^ba{2}r".to_vec()),
836 836 );
837 837 assert_eq!(
838 838 build_single_regex(
839 839 &IgnorePattern::new(
840 840 PatternSyntax::RelRegexp,
841 841 b"ba{2}r",
842 842 Path::new("")
843 843 ),
844 844 b"(?:/|$)"
845 845 )
846 846 .unwrap(),
847 847 Some(b".*ba{2}r".to_vec()),
848 848 );
849 849 assert_eq!(
850 850 build_single_regex(
851 851 &IgnorePattern::new(
852 852 PatternSyntax::RelRegexp,
853 853 b"(?ia)ba{2}r",
854 854 Path::new("")
855 855 ),
856 856 b"(?:/|$)"
857 857 )
858 858 .unwrap(),
859 859 Some(b"(?ia:.*ba{2}r)".to_vec()),
860 860 );
861 861 assert_eq!(
862 862 build_single_regex(
863 863 &IgnorePattern::new(
864 864 PatternSyntax::RelRegexp,
865 865 b"(?ia)^ba{2}r",
866 866 Path::new("")
867 867 ),
868 868 b"(?:/|$)"
869 869 )
870 870 .unwrap(),
871 871 Some(b"(?ia:^ba{2}r)".to_vec()),
872 872 );
873 873 }
874 874 }
General Comments 0
You need to be logged in to leave comments. Login now