Show More
@@ -176,9 +176,14 b' fn _build_single_regex(entry: &IgnorePat' | |||||
176 | return vec![]; |
|
176 | return vec![]; | |
177 | } |
|
177 | } | |
178 | match syntax { |
|
178 | match syntax { | |
179 | PatternSyntax::Regexp => pattern.to_owned(), |
|
179 | // The `regex` crate adds `.*` to the start and end of expressions | |
|
180 | // if there are no anchors, so add them. | |||
|
181 | PatternSyntax::Regexp => [b"^", &pattern[..], b"$"].concat(), | |||
180 | PatternSyntax::RelRegexp => { |
|
182 | PatternSyntax::RelRegexp => { | |
181 | if pattern[0] == b'^' { |
|
183 | // The `regex` crate accepts `**` while `re2` and Python's `re` | |
|
184 | // do not. Checking for `*` correctly triggers the same error all | |||
|
185 | // engines. | |||
|
186 | if pattern[0] == b'^' || pattern[0] == b'*' { | |||
182 | return pattern.to_owned(); |
|
187 | return pattern.to_owned(); | |
183 | } |
|
188 | } | |
184 | [&b".*"[..], pattern].concat() |
|
189 | [&b".*"[..], pattern].concat() | |
@@ -191,14 +196,15 b' fn _build_single_regex(entry: &IgnorePat' | |||||
191 | } |
|
196 | } | |
192 | PatternSyntax::RootFiles => { |
|
197 | PatternSyntax::RootFiles => { | |
193 | let mut res = if pattern == b"." { |
|
198 | let mut res = if pattern == b"." { | |
194 | vec![] |
|
199 | vec![b'^'] | |
195 | } else { |
|
200 | } else { | |
196 | // Pattern is a directory name. |
|
201 | // Pattern is a directory name. | |
197 | [escape_pattern(pattern).as_slice(), b"/"].concat() |
|
202 | [b"^", escape_pattern(pattern).as_slice(), b"/"].concat() | |
198 | }; |
|
203 | }; | |
199 |
|
204 | |||
200 | // Anything after the pattern must be a non-directory. |
|
205 | // Anything after the pattern must be a non-directory. | |
201 | res.extend(b"[^/]+$"); |
|
206 | res.extend(b"[^/]+$"); | |
|
207 | res.push(b'$'); | |||
202 | res |
|
208 | res | |
203 | } |
|
209 | } | |
204 | PatternSyntax::RelGlob => { |
|
210 | PatternSyntax::RelGlob => { | |
@@ -206,11 +212,11 b' fn _build_single_regex(entry: &IgnorePat' | |||||
206 | if let Some(rest) = glob_re.drop_prefix(b"[^/]*") { |
|
212 | if let Some(rest) = glob_re.drop_prefix(b"[^/]*") { | |
207 | [b".*", rest, GLOB_SUFFIX].concat() |
|
213 | [b".*", rest, GLOB_SUFFIX].concat() | |
208 | } else { |
|
214 | } else { | |
209 |
[b"(?: |
|
215 | [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat() | |
210 | } |
|
216 | } | |
211 | } |
|
217 | } | |
212 | PatternSyntax::Glob | PatternSyntax::RootGlob => { |
|
218 | PatternSyntax::Glob | PatternSyntax::RootGlob => { | |
213 | [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat() |
|
219 | [b"^", glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat() | |
214 | } |
|
220 | } | |
215 | PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(), |
|
221 | PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(), | |
216 | } |
|
222 | } | |
@@ -282,7 +288,10 b' pub fn build_single_regex(' | |||||
282 | if *syntax == PatternSyntax::RootGlob |
|
288 | if *syntax == PatternSyntax::RootGlob | |
283 | && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b)) |
|
289 | && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b)) | |
284 | { |
|
290 | { | |
285 | let mut escaped = escape_pattern(&pattern); |
|
291 | // The `regex` crate adds `.*` to the start and end of expressions | |
|
292 | // if there are no anchors, so add the start anchor. | |||
|
293 | let mut escaped = vec![b'^']; | |||
|
294 | escaped.extend(escape_pattern(&pattern)); | |||
286 | escaped.extend(GLOB_SUFFIX); |
|
295 | escaped.extend(GLOB_SUFFIX); | |
287 | Ok(escaped) |
|
296 | Ok(escaped) | |
288 | } else { |
|
297 | } else { | |
@@ -619,7 +628,7 b' mod tests {' | |||||
619 | Path::new("") |
|
628 | Path::new("") | |
620 | )) |
|
629 | )) | |
621 | .unwrap(), |
|
630 | .unwrap(), | |
622 |
br"(?: |
|
631 | br"(?:.*/)?rust/target(?:/|$)".to_vec(), | |
623 | ); |
|
632 | ); | |
624 | } |
|
633 | } | |
625 |
|
634 | |||
@@ -632,7 +641,7 b' mod tests {' | |||||
632 | Path::new("") |
|
641 | Path::new("") | |
633 | )) |
|
642 | )) | |
634 | .unwrap(), |
|
643 | .unwrap(), | |
635 | br"\.(?:/|$)".to_vec(), |
|
644 | br"^\.(?:/|$)".to_vec(), | |
636 | ); |
|
645 | ); | |
637 | assert_eq!( |
|
646 | assert_eq!( | |
638 | build_single_regex(&IgnorePattern::new( |
|
647 | build_single_regex(&IgnorePattern::new( | |
@@ -641,7 +650,7 b' mod tests {' | |||||
641 | Path::new("") |
|
650 | Path::new("") | |
642 | )) |
|
651 | )) | |
643 | .unwrap(), |
|
652 | .unwrap(), | |
644 | br"whatever(?:/|$)".to_vec(), |
|
653 | br"^whatever(?:/|$)".to_vec(), | |
645 | ); |
|
654 | ); | |
646 | assert_eq!( |
|
655 | assert_eq!( | |
647 | build_single_regex(&IgnorePattern::new( |
|
656 | build_single_regex(&IgnorePattern::new( | |
@@ -650,7 +659,7 b' mod tests {' | |||||
650 | Path::new("") |
|
659 | Path::new("") | |
651 | )) |
|
660 | )) | |
652 | .unwrap(), |
|
661 | .unwrap(), | |
653 | br"[^/]*\.o(?:/|$)".to_vec(), |
|
662 | br"^[^/]*\.o(?:/|$)".to_vec(), | |
654 | ); |
|
663 | ); | |
655 | } |
|
664 | } | |
656 | } |
|
665 | } |
@@ -331,8 +331,37 b' fn re_matcher(' | |||||
331 | } |
|
331 | } | |
332 |
|
332 | |||
333 | #[cfg(not(feature = "with-re2"))] |
|
333 | #[cfg(not(feature = "with-re2"))] | |
334 | fn re_matcher(_: &[u8]) -> PatternResult<Box<dyn Fn(&HgPath) -> bool + Sync>> { |
|
334 | /// Returns a function that matches an `HgPath` against the given regex | |
335 | Err(PatternError::Re2NotInstalled) |
|
335 | /// pattern. | |
|
336 | /// | |||
|
337 | /// This can fail when the pattern is invalid or not supported by the | |||
|
338 | /// underlying engine (the `regex` crate), for instance anything with | |||
|
339 | /// back-references. | |||
|
340 | fn re_matcher( | |||
|
341 | pattern: &[u8], | |||
|
342 | ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> { | |||
|
343 | use std::io::Write; | |||
|
344 | ||||
|
345 | let mut escaped_bytes = vec![]; | |||
|
346 | for byte in pattern { | |||
|
347 | if *byte > 127 { | |||
|
348 | write!(escaped_bytes, "\\x{:x}", *byte).unwrap(); | |||
|
349 | } else { | |||
|
350 | escaped_bytes.push(*byte); | |||
|
351 | } | |||
|
352 | } | |||
|
353 | ||||
|
354 | // Avoid the cost of UTF8 checking | |||
|
355 | // | |||
|
356 | // # Safety | |||
|
357 | // This is safe because we escaped all non-ASCII bytes. | |||
|
358 | let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) }; | |||
|
359 | let re = regex::bytes::RegexBuilder::new(&pattern_string) | |||
|
360 | .unicode(false) | |||
|
361 | .build() | |||
|
362 | .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?; | |||
|
363 | ||||
|
364 | Ok(move |path: &HgPath| re.is_match(path.as_bytes())) | |||
336 | } |
|
365 | } | |
337 |
|
366 | |||
338 | /// Returns the regex pattern and a function that matches an `HgPath` against |
|
367 | /// Returns the regex pattern and a function that matches an `HgPath` against |
General Comments 0
You need to be logged in to leave comments.
Login now