##// END OF EJS Templates
rust-matchers: use the `regex` crate...
Raphaël Gomès -
r45084:496868f1 default
parent child Browse files
Show More
@@ -176,9 +176,14 b' fn _build_single_regex(entry: &IgnorePat'
176 return vec![];
176 return vec![];
177 }
177 }
178 match syntax {
178 match syntax {
179 PatternSyntax::Regexp => pattern.to_owned(),
179 // The `regex` crate adds `.*` to the start and end of expressions
180 // if there are no anchors, so add them.
181 PatternSyntax::Regexp => [b"^", &pattern[..], b"$"].concat(),
180 PatternSyntax::RelRegexp => {
182 PatternSyntax::RelRegexp => {
181 if pattern[0] == b'^' {
183 // The `regex` crate accepts `**` while `re2` and Python's `re`
184 // do not. Checking for `*` correctly triggers the same error all
185 // engines.
186 if pattern[0] == b'^' || pattern[0] == b'*' {
182 return pattern.to_owned();
187 return pattern.to_owned();
183 }
188 }
184 [&b".*"[..], pattern].concat()
189 [&b".*"[..], pattern].concat()
@@ -191,14 +196,15 b' fn _build_single_regex(entry: &IgnorePat'
191 }
196 }
192 PatternSyntax::RootFiles => {
197 PatternSyntax::RootFiles => {
193 let mut res = if pattern == b"." {
198 let mut res = if pattern == b"." {
194 vec![]
199 vec![b'^']
195 } else {
200 } else {
196 // Pattern is a directory name.
201 // Pattern is a directory name.
197 [escape_pattern(pattern).as_slice(), b"/"].concat()
202 [b"^", escape_pattern(pattern).as_slice(), b"/"].concat()
198 };
203 };
199
204
200 // Anything after the pattern must be a non-directory.
205 // Anything after the pattern must be a non-directory.
201 res.extend(b"[^/]+$");
206 res.extend(b"[^/]+$");
207 res.push(b'$');
202 res
208 res
203 }
209 }
204 PatternSyntax::RelGlob => {
210 PatternSyntax::RelGlob => {
@@ -206,11 +212,11 b' fn _build_single_regex(entry: &IgnorePat'
206 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
212 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
207 [b".*", rest, GLOB_SUFFIX].concat()
213 [b".*", rest, GLOB_SUFFIX].concat()
208 } else {
214 } else {
209 [b"(?:|.*/)", glob_re.as_slice(), GLOB_SUFFIX].concat()
215 [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat()
210 }
216 }
211 }
217 }
212 PatternSyntax::Glob | PatternSyntax::RootGlob => {
218 PatternSyntax::Glob | PatternSyntax::RootGlob => {
213 [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
219 [b"^", glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
214 }
220 }
215 PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(),
221 PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(),
216 }
222 }
@@ -282,7 +288,10 b' pub fn build_single_regex('
282 if *syntax == PatternSyntax::RootGlob
288 if *syntax == PatternSyntax::RootGlob
283 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
289 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
284 {
290 {
285 let mut escaped = escape_pattern(&pattern);
291 // The `regex` crate adds `.*` to the start and end of expressions
292 // if there are no anchors, so add the start anchor.
293 let mut escaped = vec![b'^'];
294 escaped.extend(escape_pattern(&pattern));
286 escaped.extend(GLOB_SUFFIX);
295 escaped.extend(GLOB_SUFFIX);
287 Ok(escaped)
296 Ok(escaped)
288 } else {
297 } else {
@@ -619,7 +628,7 b' mod tests {'
619 Path::new("")
628 Path::new("")
620 ))
629 ))
621 .unwrap(),
630 .unwrap(),
622 br"(?:|.*/)rust/target(?:/|$)".to_vec(),
631 br"(?:.*/)?rust/target(?:/|$)".to_vec(),
623 );
632 );
624 }
633 }
625
634
@@ -632,7 +641,7 b' mod tests {'
632 Path::new("")
641 Path::new("")
633 ))
642 ))
634 .unwrap(),
643 .unwrap(),
635 br"\.(?:/|$)".to_vec(),
644 br"^\.(?:/|$)".to_vec(),
636 );
645 );
637 assert_eq!(
646 assert_eq!(
638 build_single_regex(&IgnorePattern::new(
647 build_single_regex(&IgnorePattern::new(
@@ -641,7 +650,7 b' mod tests {'
641 Path::new("")
650 Path::new("")
642 ))
651 ))
643 .unwrap(),
652 .unwrap(),
644 br"whatever(?:/|$)".to_vec(),
653 br"^whatever(?:/|$)".to_vec(),
645 );
654 );
646 assert_eq!(
655 assert_eq!(
647 build_single_regex(&IgnorePattern::new(
656 build_single_regex(&IgnorePattern::new(
@@ -650,7 +659,7 b' mod tests {'
650 Path::new("")
659 Path::new("")
651 ))
660 ))
652 .unwrap(),
661 .unwrap(),
653 br"[^/]*\.o(?:/|$)".to_vec(),
662 br"^[^/]*\.o(?:/|$)".to_vec(),
654 );
663 );
655 }
664 }
656 }
665 }
@@ -331,8 +331,37 b' fn re_matcher('
331 }
331 }
332
332
333 #[cfg(not(feature = "with-re2"))]
333 #[cfg(not(feature = "with-re2"))]
334 fn re_matcher(_: &[u8]) -> PatternResult<Box<dyn Fn(&HgPath) -> bool + Sync>> {
334 /// Returns a function that matches an `HgPath` against the given regex
335 Err(PatternError::Re2NotInstalled)
335 /// pattern.
336 ///
337 /// This can fail when the pattern is invalid or not supported by the
338 /// underlying engine (the `regex` crate), for instance anything with
339 /// back-references.
340 fn re_matcher(
341 pattern: &[u8],
342 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
343 use std::io::Write;
344
345 let mut escaped_bytes = vec![];
346 for byte in pattern {
347 if *byte > 127 {
348 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
349 } else {
350 escaped_bytes.push(*byte);
351 }
352 }
353
354 // Avoid the cost of UTF8 checking
355 //
356 // # Safety
357 // This is safe because we escaped all non-ASCII bytes.
358 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
359 let re = regex::bytes::RegexBuilder::new(&pattern_string)
360 .unicode(false)
361 .build()
362 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
363
364 Ok(move |path: &HgPath| re.is_match(path.as_bytes()))
336 }
365 }
337
366
338 /// Returns the regex pattern and a function that matches an `HgPath` against
367 /// Returns the regex pattern and a function that matches an `HgPath` against
General Comments 0
You need to be logged in to leave comments. Login now