Show More
@@ -176,9 +176,14 b' fn _build_single_regex(entry: &IgnorePat' | |||
|
176 | 176 | return vec![]; |
|
177 | 177 | } |
|
178 | 178 | match syntax { |
|
179 | PatternSyntax::Regexp => pattern.to_owned(), | |
|
179 | // The `regex` crate adds `.*` to the start and end of expressions | |
|
180 | // if there are no anchors, so add them. | |
|
181 | PatternSyntax::Regexp => [b"^", &pattern[..], b"$"].concat(), | |
|
180 | 182 | PatternSyntax::RelRegexp => { |
|
181 | if pattern[0] == b'^' { | |
|
183 | // The `regex` crate accepts `**` while `re2` and Python's `re` | |
|
184 | // do not. Checking for `*` correctly triggers the same error all | |
|
185 | // engines. | |
|
186 | if pattern[0] == b'^' || pattern[0] == b'*' { | |
|
182 | 187 | return pattern.to_owned(); |
|
183 | 188 | } |
|
184 | 189 | [&b".*"[..], pattern].concat() |
@@ -191,14 +196,15 b' fn _build_single_regex(entry: &IgnorePat' | |||
|
191 | 196 | } |
|
192 | 197 | PatternSyntax::RootFiles => { |
|
193 | 198 | let mut res = if pattern == b"." { |
|
194 | vec![] | |
|
199 | vec![b'^'] | |
|
195 | 200 | } else { |
|
196 | 201 | // Pattern is a directory name. |
|
197 | [escape_pattern(pattern).as_slice(), b"/"].concat() | |
|
202 | [b"^", escape_pattern(pattern).as_slice(), b"/"].concat() | |
|
198 | 203 | }; |
|
199 | 204 | |
|
200 | 205 | // Anything after the pattern must be a non-directory. |
|
201 | 206 | res.extend(b"[^/]+$"); |
|
207 | res.push(b'$'); | |
|
202 | 208 | res |
|
203 | 209 | } |
|
204 | 210 | PatternSyntax::RelGlob => { |
@@ -206,11 +212,11 b' fn _build_single_regex(entry: &IgnorePat' | |||
|
206 | 212 | if let Some(rest) = glob_re.drop_prefix(b"[^/]*") { |
|
207 | 213 | [b".*", rest, GLOB_SUFFIX].concat() |
|
208 | 214 | } else { |
|
209 |
[b"(?: |
|
|
215 | [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat() | |
|
210 | 216 | } |
|
211 | 217 | } |
|
212 | 218 | PatternSyntax::Glob | PatternSyntax::RootGlob => { |
|
213 | [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat() | |
|
219 | [b"^", glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat() | |
|
214 | 220 | } |
|
215 | 221 | PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(), |
|
216 | 222 | } |
@@ -282,7 +288,10 b' pub fn build_single_regex(' | |||
|
282 | 288 | if *syntax == PatternSyntax::RootGlob |
|
283 | 289 | && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b)) |
|
284 | 290 | { |
|
285 | let mut escaped = escape_pattern(&pattern); | |
|
291 | // The `regex` crate adds `.*` to the start and end of expressions | |
|
292 | // if there are no anchors, so add the start anchor. | |
|
293 | let mut escaped = vec![b'^']; | |
|
294 | escaped.extend(escape_pattern(&pattern)); | |
|
286 | 295 | escaped.extend(GLOB_SUFFIX); |
|
287 | 296 | Ok(escaped) |
|
288 | 297 | } else { |
@@ -619,7 +628,7 b' mod tests {' | |||
|
619 | 628 | Path::new("") |
|
620 | 629 | )) |
|
621 | 630 | .unwrap(), |
|
622 |
br"(?: |
|
|
631 | br"(?:.*/)?rust/target(?:/|$)".to_vec(), | |
|
623 | 632 | ); |
|
624 | 633 | } |
|
625 | 634 | |
@@ -632,7 +641,7 b' mod tests {' | |||
|
632 | 641 | Path::new("") |
|
633 | 642 | )) |
|
634 | 643 | .unwrap(), |
|
635 | br"\.(?:/|$)".to_vec(), | |
|
644 | br"^\.(?:/|$)".to_vec(), | |
|
636 | 645 | ); |
|
637 | 646 | assert_eq!( |
|
638 | 647 | build_single_regex(&IgnorePattern::new( |
@@ -641,7 +650,7 b' mod tests {' | |||
|
641 | 650 | Path::new("") |
|
642 | 651 | )) |
|
643 | 652 | .unwrap(), |
|
644 | br"whatever(?:/|$)".to_vec(), | |
|
653 | br"^whatever(?:/|$)".to_vec(), | |
|
645 | 654 | ); |
|
646 | 655 | assert_eq!( |
|
647 | 656 | build_single_regex(&IgnorePattern::new( |
@@ -650,7 +659,7 b' mod tests {' | |||
|
650 | 659 | Path::new("") |
|
651 | 660 | )) |
|
652 | 661 | .unwrap(), |
|
653 | br"[^/]*\.o(?:/|$)".to_vec(), | |
|
662 | br"^[^/]*\.o(?:/|$)".to_vec(), | |
|
654 | 663 | ); |
|
655 | 664 | } |
|
656 | 665 | } |
@@ -331,8 +331,37 b' fn re_matcher(' | |||
|
331 | 331 | } |
|
332 | 332 | |
|
333 | 333 | #[cfg(not(feature = "with-re2"))] |
|
334 | fn re_matcher(_: &[u8]) -> PatternResult<Box<dyn Fn(&HgPath) -> bool + Sync>> { | |
|
335 | Err(PatternError::Re2NotInstalled) | |
|
334 | /// Returns a function that matches an `HgPath` against the given regex | |
|
335 | /// pattern. | |
|
336 | /// | |
|
337 | /// This can fail when the pattern is invalid or not supported by the | |
|
338 | /// underlying engine (the `regex` crate), for instance anything with | |
|
339 | /// back-references. | |
|
340 | fn re_matcher( | |
|
341 | pattern: &[u8], | |
|
342 | ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> { | |
|
343 | use std::io::Write; | |
|
344 | ||
|
345 | let mut escaped_bytes = vec![]; | |
|
346 | for byte in pattern { | |
|
347 | if *byte > 127 { | |
|
348 | write!(escaped_bytes, "\\x{:x}", *byte).unwrap(); | |
|
349 | } else { | |
|
350 | escaped_bytes.push(*byte); | |
|
351 | } | |
|
352 | } | |
|
353 | ||
|
354 | // Avoid the cost of UTF8 checking | |
|
355 | // | |
|
356 | // # Safety | |
|
357 | // This is safe because we escaped all non-ASCII bytes. | |
|
358 | let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) }; | |
|
359 | let re = regex::bytes::RegexBuilder::new(&pattern_string) | |
|
360 | .unicode(false) | |
|
361 | .build() | |
|
362 | .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?; | |
|
363 | ||
|
364 | Ok(move |path: &HgPath| re.is_match(path.as_bytes())) | |
|
336 | 365 | } |
|
337 | 366 | |
|
338 | 367 | /// Returns the regex pattern and a function that matches an `HgPath` against |
General Comments 0
You need to be logged in to leave comments.
Login now