##// END OF EJS Templates
rust-matchers: use the `regex` crate...
Raphaël Gomès -
r45084:496868f1 default
parent child Browse files
Show More
@@ -176,9 +176,14 b' fn _build_single_regex(entry: &IgnorePat'
176 176 return vec![];
177 177 }
178 178 match syntax {
179 PatternSyntax::Regexp => pattern.to_owned(),
179 // The `regex` crate adds `.*` to the start and end of expressions
180 // if there are no anchors, so add them.
181 PatternSyntax::Regexp => [b"^", &pattern[..], b"$"].concat(),
180 182 PatternSyntax::RelRegexp => {
181 if pattern[0] == b'^' {
183 // The `regex` crate accepts `**` while `re2` and Python's `re`
184 // do not. Checking for `*` correctly triggers the same error all
185 // engines.
186 if pattern[0] == b'^' || pattern[0] == b'*' {
182 187 return pattern.to_owned();
183 188 }
184 189 [&b".*"[..], pattern].concat()
@@ -191,14 +196,15 b' fn _build_single_regex(entry: &IgnorePat'
191 196 }
192 197 PatternSyntax::RootFiles => {
193 198 let mut res = if pattern == b"." {
194 vec![]
199 vec![b'^']
195 200 } else {
196 201 // Pattern is a directory name.
197 [escape_pattern(pattern).as_slice(), b"/"].concat()
202 [b"^", escape_pattern(pattern).as_slice(), b"/"].concat()
198 203 };
199 204
200 205 // Anything after the pattern must be a non-directory.
201 206 res.extend(b"[^/]+$");
207 res.push(b'$');
202 208 res
203 209 }
204 210 PatternSyntax::RelGlob => {
@@ -206,11 +212,11 b' fn _build_single_regex(entry: &IgnorePat'
206 212 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
207 213 [b".*", rest, GLOB_SUFFIX].concat()
208 214 } else {
209 [b"(?:|.*/)", glob_re.as_slice(), GLOB_SUFFIX].concat()
215 [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat()
210 216 }
211 217 }
212 218 PatternSyntax::Glob | PatternSyntax::RootGlob => {
213 [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
219 [b"^", glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
214 220 }
215 221 PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(),
216 222 }
@@ -282,7 +288,10 b' pub fn build_single_regex('
282 288 if *syntax == PatternSyntax::RootGlob
283 289 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
284 290 {
285 let mut escaped = escape_pattern(&pattern);
291 // The `regex` crate adds `.*` to the start and end of expressions
292 // if there are no anchors, so add the start anchor.
293 let mut escaped = vec![b'^'];
294 escaped.extend(escape_pattern(&pattern));
286 295 escaped.extend(GLOB_SUFFIX);
287 296 Ok(escaped)
288 297 } else {
@@ -619,7 +628,7 b' mod tests {'
619 628 Path::new("")
620 629 ))
621 630 .unwrap(),
622 br"(?:|.*/)rust/target(?:/|$)".to_vec(),
631 br"(?:.*/)?rust/target(?:/|$)".to_vec(),
623 632 );
624 633 }
625 634
@@ -632,7 +641,7 b' mod tests {'
632 641 Path::new("")
633 642 ))
634 643 .unwrap(),
635 br"\.(?:/|$)".to_vec(),
644 br"^\.(?:/|$)".to_vec(),
636 645 );
637 646 assert_eq!(
638 647 build_single_regex(&IgnorePattern::new(
@@ -641,7 +650,7 b' mod tests {'
641 650 Path::new("")
642 651 ))
643 652 .unwrap(),
644 br"whatever(?:/|$)".to_vec(),
653 br"^whatever(?:/|$)".to_vec(),
645 654 );
646 655 assert_eq!(
647 656 build_single_regex(&IgnorePattern::new(
@@ -650,7 +659,7 b' mod tests {'
650 659 Path::new("")
651 660 ))
652 661 .unwrap(),
653 br"[^/]*\.o(?:/|$)".to_vec(),
662 br"^[^/]*\.o(?:/|$)".to_vec(),
654 663 );
655 664 }
656 665 }
@@ -331,8 +331,37 b' fn re_matcher('
331 331 }
332 332
333 333 #[cfg(not(feature = "with-re2"))]
334 fn re_matcher(_: &[u8]) -> PatternResult<Box<dyn Fn(&HgPath) -> bool + Sync>> {
335 Err(PatternError::Re2NotInstalled)
334 /// Returns a function that matches an `HgPath` against the given regex
335 /// pattern.
336 ///
337 /// This can fail when the pattern is invalid or not supported by the
338 /// underlying engine (the `regex` crate), for instance anything with
339 /// back-references.
340 fn re_matcher(
341 pattern: &[u8],
342 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
343 use std::io::Write;
344
345 let mut escaped_bytes = vec![];
346 for byte in pattern {
347 if *byte > 127 {
348 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
349 } else {
350 escaped_bytes.push(*byte);
351 }
352 }
353
354 // Avoid the cost of UTF8 checking
355 //
356 // # Safety
357 // This is safe because we escaped all non-ASCII bytes.
358 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
359 let re = regex::bytes::RegexBuilder::new(&pattern_string)
360 .unicode(false)
361 .build()
362 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
363
364 Ok(move |path: &HgPath| re.is_match(path.as_bytes()))
336 365 }
337 366
338 367 /// Returns the regex pattern and a function that matches an `HgPath` against
General Comments 0
You need to be logged in to leave comments. Login now