##// END OF EJS Templates
rust-regex: fix issues with regex anchoring and performance...
Raphaël Gomès -
r45347:ad1ec409 default
parent child Browse files
Show More
@@ -176,9 +176,7 b' fn _build_single_regex(entry: &IgnorePat'
176 return vec![];
176 return vec![];
177 }
177 }
178 match syntax {
178 match syntax {
179 // The `regex` crate adds `.*` to the start and end of expressions
179 PatternSyntax::Regexp => pattern.to_owned(),
180 // if there are no anchors, so add them.
181 PatternSyntax::Regexp => [b"^", &pattern[..], b"$"].concat(),
182 PatternSyntax::RelRegexp => {
180 PatternSyntax::RelRegexp => {
183 // The `regex` crate accepts `**` while `re2` and Python's `re`
181 // The `regex` crate accepts `**` while `re2` and Python's `re`
184 // do not. Checking for `*` correctly triggers the same error all
182 // do not. Checking for `*` correctly triggers the same error all
@@ -196,15 +194,14 b' fn _build_single_regex(entry: &IgnorePat'
196 }
194 }
197 PatternSyntax::RootFiles => {
195 PatternSyntax::RootFiles => {
198 let mut res = if pattern == b"." {
196 let mut res = if pattern == b"." {
199 vec![b'^']
197 vec![]
200 } else {
198 } else {
201 // Pattern is a directory name.
199 // Pattern is a directory name.
202 [b"^", escape_pattern(pattern).as_slice(), b"/"].concat()
200 [escape_pattern(pattern).as_slice(), b"/"].concat()
203 };
201 };
204
202
205 // Anything after the pattern must be a non-directory.
203 // Anything after the pattern must be a non-directory.
206 res.extend(b"[^/]+$");
204 res.extend(b"[^/]+$");
207 res.push(b'$');
208 res
205 res
209 }
206 }
210 PatternSyntax::RelGlob => {
207 PatternSyntax::RelGlob => {
@@ -216,7 +213,7 b' fn _build_single_regex(entry: &IgnorePat'
216 }
213 }
217 }
214 }
218 PatternSyntax::Glob | PatternSyntax::RootGlob => {
215 PatternSyntax::Glob | PatternSyntax::RootGlob => {
219 [b"^", glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
216 [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
220 }
217 }
221 PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(),
218 PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(),
222 }
219 }
@@ -654,7 +651,7 b' mod tests {'
654 Path::new("")
651 Path::new("")
655 ))
652 ))
656 .unwrap(),
653 .unwrap(),
657 Some(br"^[^/]*\.o(?:/|$)".to_vec()),
654 Some(br"[^/]*\.o(?:/|$)".to_vec()),
658 );
655 );
659 }
656 }
660 }
657 }
@@ -347,7 +347,9 b' fn re_matcher('
347 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
347 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
348 use std::io::Write;
348 use std::io::Write;
349
349
350 let mut escaped_bytes = vec![];
350 // The `regex` crate adds `.*` to the start and end of expressions if there
351 // are no anchors, so add the start anchor.
352 let mut escaped_bytes = vec![b'^', b'(', b'?', b':'];
351 for byte in pattern {
353 for byte in pattern {
352 if *byte > 127 {
354 if *byte > 127 {
353 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
355 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
@@ -355,6 +357,7 b' fn re_matcher('
355 escaped_bytes.push(*byte);
357 escaped_bytes.push(*byte);
356 }
358 }
357 }
359 }
360 escaped_bytes.push(b')');
358
361
359 // Avoid the cost of UTF8 checking
362 // Avoid the cost of UTF8 checking
360 //
363 //
General Comments 0
You need to be logged in to leave comments. Login now