##// END OF EJS Templates
rust-regex: fix issues with regex anchoring and performance...
Raphaël Gomès -
r45347:ad1ec409 default
parent child Browse files
Show More
@@ -1,660 +1,657 b''
1 // filepatterns.rs
1 // filepatterns.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Handling of Mercurial-specific patterns.
8 //! Handling of Mercurial-specific patterns.
9
9
10 use crate::{
10 use crate::{
11 utils::{
11 utils::{
12 files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
12 files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
13 hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
13 hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
14 SliceExt,
14 SliceExt,
15 },
15 },
16 FastHashMap, PatternError,
16 FastHashMap, PatternError,
17 };
17 };
18 use lazy_static::lazy_static;
18 use lazy_static::lazy_static;
19 use regex::bytes::{NoExpand, Regex};
19 use regex::bytes::{NoExpand, Regex};
20 use std::fs::File;
20 use std::fs::File;
21 use std::io::Read;
21 use std::io::Read;
22 use std::ops::Deref;
22 use std::ops::Deref;
23 use std::path::{Path, PathBuf};
23 use std::path::{Path, PathBuf};
24 use std::vec::Vec;
24 use std::vec::Vec;
25
25
26 lazy_static! {
26 lazy_static! {
27 static ref RE_ESCAPE: Vec<Vec<u8>> = {
27 static ref RE_ESCAPE: Vec<Vec<u8>> = {
28 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
28 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
29 let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
29 let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
30 for byte in to_escape {
30 for byte in to_escape {
31 v[*byte as usize].insert(0, b'\\');
31 v[*byte as usize].insert(0, b'\\');
32 }
32 }
33 v
33 v
34 };
34 };
35 }
35 }
36
36
37 /// These are matched in order
37 /// These are matched in order
38 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
38 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
39 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
39 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
40
40
41 /// Appended to the regexp of globs
41 /// Appended to the regexp of globs
42 const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";
42 const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";
43
43
44 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
44 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
45 pub enum PatternSyntax {
45 pub enum PatternSyntax {
46 /// A regular expression
46 /// A regular expression
47 Regexp,
47 Regexp,
48 /// Glob that matches at the front of the path
48 /// Glob that matches at the front of the path
49 RootGlob,
49 RootGlob,
50 /// Glob that matches at any suffix of the path (still anchored at
50 /// Glob that matches at any suffix of the path (still anchored at
51 /// slashes)
51 /// slashes)
52 Glob,
52 Glob,
53 /// a path relative to repository root, which is matched recursively
53 /// a path relative to repository root, which is matched recursively
54 Path,
54 Path,
55 /// A path relative to cwd
55 /// A path relative to cwd
56 RelPath,
56 RelPath,
57 /// an unrooted glob (*.rs matches Rust files in all dirs)
57 /// an unrooted glob (*.rs matches Rust files in all dirs)
58 RelGlob,
58 RelGlob,
59 /// A regexp that needn't match the start of a name
59 /// A regexp that needn't match the start of a name
60 RelRegexp,
60 RelRegexp,
61 /// A path relative to repository root, which is matched non-recursively
61 /// A path relative to repository root, which is matched non-recursively
62 /// (will not match subdirectories)
62 /// (will not match subdirectories)
63 RootFiles,
63 RootFiles,
64 /// A file of patterns to read and include
64 /// A file of patterns to read and include
65 Include,
65 Include,
66 /// A file of patterns to match against files under the same directory
66 /// A file of patterns to match against files under the same directory
67 SubInclude,
67 SubInclude,
68 }
68 }
69
69
70 /// Transforms a glob pattern into a regex
70 /// Transforms a glob pattern into a regex
71 fn glob_to_re(pat: &[u8]) -> Vec<u8> {
71 fn glob_to_re(pat: &[u8]) -> Vec<u8> {
72 let mut input = pat;
72 let mut input = pat;
73 let mut res: Vec<u8> = vec![];
73 let mut res: Vec<u8> = vec![];
74 let mut group_depth = 0;
74 let mut group_depth = 0;
75
75
76 while let Some((c, rest)) = input.split_first() {
76 while let Some((c, rest)) = input.split_first() {
77 input = rest;
77 input = rest;
78
78
79 match c {
79 match c {
80 b'*' => {
80 b'*' => {
81 for (source, repl) in GLOB_REPLACEMENTS {
81 for (source, repl) in GLOB_REPLACEMENTS {
82 if let Some(rest) = input.drop_prefix(source) {
82 if let Some(rest) = input.drop_prefix(source) {
83 input = rest;
83 input = rest;
84 res.extend(*repl);
84 res.extend(*repl);
85 break;
85 break;
86 }
86 }
87 }
87 }
88 }
88 }
89 b'?' => res.extend(b"."),
89 b'?' => res.extend(b"."),
90 b'[' => {
90 b'[' => {
91 match input.iter().skip(1).position(|b| *b == b']') {
91 match input.iter().skip(1).position(|b| *b == b']') {
92 None => res.extend(b"\\["),
92 None => res.extend(b"\\["),
93 Some(end) => {
93 Some(end) => {
94 // Account for the one we skipped
94 // Account for the one we skipped
95 let end = end + 1;
95 let end = end + 1;
96
96
97 res.extend(b"[");
97 res.extend(b"[");
98
98
99 for (i, b) in input[..end].iter().enumerate() {
99 for (i, b) in input[..end].iter().enumerate() {
100 if *b == b'!' && i == 0 {
100 if *b == b'!' && i == 0 {
101 res.extend(b"^")
101 res.extend(b"^")
102 } else if *b == b'^' && i == 0 {
102 } else if *b == b'^' && i == 0 {
103 res.extend(b"\\^")
103 res.extend(b"\\^")
104 } else if *b == b'\\' {
104 } else if *b == b'\\' {
105 res.extend(b"\\\\")
105 res.extend(b"\\\\")
106 } else {
106 } else {
107 res.push(*b)
107 res.push(*b)
108 }
108 }
109 }
109 }
110 res.extend(b"]");
110 res.extend(b"]");
111 input = &input[end + 1..];
111 input = &input[end + 1..];
112 }
112 }
113 }
113 }
114 }
114 }
115 b'{' => {
115 b'{' => {
116 group_depth += 1;
116 group_depth += 1;
117 res.extend(b"(?:")
117 res.extend(b"(?:")
118 }
118 }
119 b'}' if group_depth > 0 => {
119 b'}' if group_depth > 0 => {
120 group_depth -= 1;
120 group_depth -= 1;
121 res.extend(b")");
121 res.extend(b")");
122 }
122 }
123 b',' if group_depth > 0 => res.extend(b"|"),
123 b',' if group_depth > 0 => res.extend(b"|"),
124 b'\\' => {
124 b'\\' => {
125 let c = {
125 let c = {
126 if let Some((c, rest)) = input.split_first() {
126 if let Some((c, rest)) = input.split_first() {
127 input = rest;
127 input = rest;
128 c
128 c
129 } else {
129 } else {
130 c
130 c
131 }
131 }
132 };
132 };
133 res.extend(&RE_ESCAPE[*c as usize])
133 res.extend(&RE_ESCAPE[*c as usize])
134 }
134 }
135 _ => res.extend(&RE_ESCAPE[*c as usize]),
135 _ => res.extend(&RE_ESCAPE[*c as usize]),
136 }
136 }
137 }
137 }
138 res
138 res
139 }
139 }
140
140
141 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
141 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
142 pattern
142 pattern
143 .iter()
143 .iter()
144 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
144 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
145 .collect()
145 .collect()
146 }
146 }
147
147
148 pub fn parse_pattern_syntax(
148 pub fn parse_pattern_syntax(
149 kind: &[u8],
149 kind: &[u8],
150 ) -> Result<PatternSyntax, PatternError> {
150 ) -> Result<PatternSyntax, PatternError> {
151 match kind {
151 match kind {
152 b"re:" => Ok(PatternSyntax::Regexp),
152 b"re:" => Ok(PatternSyntax::Regexp),
153 b"path:" => Ok(PatternSyntax::Path),
153 b"path:" => Ok(PatternSyntax::Path),
154 b"relpath:" => Ok(PatternSyntax::RelPath),
154 b"relpath:" => Ok(PatternSyntax::RelPath),
155 b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
155 b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
156 b"relglob:" => Ok(PatternSyntax::RelGlob),
156 b"relglob:" => Ok(PatternSyntax::RelGlob),
157 b"relre:" => Ok(PatternSyntax::RelRegexp),
157 b"relre:" => Ok(PatternSyntax::RelRegexp),
158 b"glob:" => Ok(PatternSyntax::Glob),
158 b"glob:" => Ok(PatternSyntax::Glob),
159 b"rootglob:" => Ok(PatternSyntax::RootGlob),
159 b"rootglob:" => Ok(PatternSyntax::RootGlob),
160 b"include:" => Ok(PatternSyntax::Include),
160 b"include:" => Ok(PatternSyntax::Include),
161 b"subinclude:" => Ok(PatternSyntax::SubInclude),
161 b"subinclude:" => Ok(PatternSyntax::SubInclude),
162 _ => Err(PatternError::UnsupportedSyntax(
162 _ => Err(PatternError::UnsupportedSyntax(
163 String::from_utf8_lossy(kind).to_string(),
163 String::from_utf8_lossy(kind).to_string(),
164 )),
164 )),
165 }
165 }
166 }
166 }
167
167
168 /// Builds the regex that corresponds to the given pattern.
168 /// Builds the regex that corresponds to the given pattern.
169 /// If within a `syntax: regexp` context, returns the pattern,
169 /// If within a `syntax: regexp` context, returns the pattern,
170 /// otherwise, returns the corresponding regex.
170 /// otherwise, returns the corresponding regex.
171 fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> {
171 fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> {
172 let IgnorePattern {
172 let IgnorePattern {
173 syntax, pattern, ..
173 syntax, pattern, ..
174 } = entry;
174 } = entry;
175 if pattern.is_empty() {
175 if pattern.is_empty() {
176 return vec![];
176 return vec![];
177 }
177 }
178 match syntax {
178 match syntax {
179 // The `regex` crate adds `.*` to the start and end of expressions
179 PatternSyntax::Regexp => pattern.to_owned(),
180 // if there are no anchors, so add them.
181 PatternSyntax::Regexp => [b"^", &pattern[..], b"$"].concat(),
182 PatternSyntax::RelRegexp => {
180 PatternSyntax::RelRegexp => {
183 // The `regex` crate accepts `**` while `re2` and Python's `re`
181 // The `regex` crate accepts `**` while `re2` and Python's `re`
184 // do not. Checking for `*` correctly triggers the same error all
182 // do not. Checking for `*` correctly triggers the same error all
185 // engines.
183 // engines.
186 if pattern[0] == b'^' || pattern[0] == b'*' {
184 if pattern[0] == b'^' || pattern[0] == b'*' {
187 return pattern.to_owned();
185 return pattern.to_owned();
188 }
186 }
189 [&b".*"[..], pattern].concat()
187 [&b".*"[..], pattern].concat()
190 }
188 }
191 PatternSyntax::Path | PatternSyntax::RelPath => {
189 PatternSyntax::Path | PatternSyntax::RelPath => {
192 if pattern == b"." {
190 if pattern == b"." {
193 return vec![];
191 return vec![];
194 }
192 }
195 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
193 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
196 }
194 }
197 PatternSyntax::RootFiles => {
195 PatternSyntax::RootFiles => {
198 let mut res = if pattern == b"." {
196 let mut res = if pattern == b"." {
199 vec![b'^']
197 vec![]
200 } else {
198 } else {
201 // Pattern is a directory name.
199 // Pattern is a directory name.
202 [b"^", escape_pattern(pattern).as_slice(), b"/"].concat()
200 [escape_pattern(pattern).as_slice(), b"/"].concat()
203 };
201 };
204
202
205 // Anything after the pattern must be a non-directory.
203 // Anything after the pattern must be a non-directory.
206 res.extend(b"[^/]+$");
204 res.extend(b"[^/]+$");
207 res.push(b'$');
208 res
205 res
209 }
206 }
210 PatternSyntax::RelGlob => {
207 PatternSyntax::RelGlob => {
211 let glob_re = glob_to_re(pattern);
208 let glob_re = glob_to_re(pattern);
212 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
209 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
213 [b".*", rest, GLOB_SUFFIX].concat()
210 [b".*", rest, GLOB_SUFFIX].concat()
214 } else {
211 } else {
215 [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat()
212 [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat()
216 }
213 }
217 }
214 }
218 PatternSyntax::Glob | PatternSyntax::RootGlob => {
215 PatternSyntax::Glob | PatternSyntax::RootGlob => {
219 [b"^", glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
216 [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
220 }
217 }
221 PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(),
218 PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(),
222 }
219 }
223 }
220 }
224
221
225 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
222 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
226 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
223 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
227
224
228 /// TODO support other platforms
225 /// TODO support other platforms
229 #[cfg(unix)]
226 #[cfg(unix)]
230 pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
227 pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
231 if bytes.is_empty() {
228 if bytes.is_empty() {
232 return b".".to_vec();
229 return b".".to_vec();
233 }
230 }
234 let sep = b'/';
231 let sep = b'/';
235
232
236 let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
233 let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
237 if initial_slashes > 2 {
234 if initial_slashes > 2 {
238 // POSIX allows one or two initial slashes, but treats three or more
235 // POSIX allows one or two initial slashes, but treats three or more
239 // as single slash.
236 // as single slash.
240 initial_slashes = 1;
237 initial_slashes = 1;
241 }
238 }
242 let components = bytes
239 let components = bytes
243 .split(|b| *b == sep)
240 .split(|b| *b == sep)
244 .filter(|c| !(c.is_empty() || c == b"."))
241 .filter(|c| !(c.is_empty() || c == b"."))
245 .fold(vec![], |mut acc, component| {
242 .fold(vec![], |mut acc, component| {
246 if component != b".."
243 if component != b".."
247 || (initial_slashes == 0 && acc.is_empty())
244 || (initial_slashes == 0 && acc.is_empty())
248 || (!acc.is_empty() && acc[acc.len() - 1] == b"..")
245 || (!acc.is_empty() && acc[acc.len() - 1] == b"..")
249 {
246 {
250 acc.push(component)
247 acc.push(component)
251 } else if !acc.is_empty() {
248 } else if !acc.is_empty() {
252 acc.pop();
249 acc.pop();
253 }
250 }
254 acc
251 acc
255 });
252 });
256 let mut new_bytes = components.join(&sep);
253 let mut new_bytes = components.join(&sep);
257
254
258 if initial_slashes > 0 {
255 if initial_slashes > 0 {
259 let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
256 let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
260 buf.extend(new_bytes);
257 buf.extend(new_bytes);
261 new_bytes = buf;
258 new_bytes = buf;
262 }
259 }
263 if new_bytes.is_empty() {
260 if new_bytes.is_empty() {
264 b".".to_vec()
261 b".".to_vec()
265 } else {
262 } else {
266 new_bytes
263 new_bytes
267 }
264 }
268 }
265 }
269
266
270 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
267 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
271 /// that don't need to be transformed into a regex.
268 /// that don't need to be transformed into a regex.
272 pub fn build_single_regex(
269 pub fn build_single_regex(
273 entry: &IgnorePattern,
270 entry: &IgnorePattern,
274 ) -> Result<Option<Vec<u8>>, PatternError> {
271 ) -> Result<Option<Vec<u8>>, PatternError> {
275 let IgnorePattern {
272 let IgnorePattern {
276 pattern, syntax, ..
273 pattern, syntax, ..
277 } = entry;
274 } = entry;
278 let pattern = match syntax {
275 let pattern = match syntax {
279 PatternSyntax::RootGlob
276 PatternSyntax::RootGlob
280 | PatternSyntax::Path
277 | PatternSyntax::Path
281 | PatternSyntax::RelGlob
278 | PatternSyntax::RelGlob
282 | PatternSyntax::RootFiles => normalize_path_bytes(&pattern),
279 | PatternSyntax::RootFiles => normalize_path_bytes(&pattern),
283 PatternSyntax::Include | PatternSyntax::SubInclude => {
280 PatternSyntax::Include | PatternSyntax::SubInclude => {
284 return Err(PatternError::NonRegexPattern(entry.clone()))
281 return Err(PatternError::NonRegexPattern(entry.clone()))
285 }
282 }
286 _ => pattern.to_owned(),
283 _ => pattern.to_owned(),
287 };
284 };
288 if *syntax == PatternSyntax::RootGlob
285 if *syntax == PatternSyntax::RootGlob
289 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
286 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
290 {
287 {
291 Ok(None)
288 Ok(None)
292 } else {
289 } else {
293 let mut entry = entry.clone();
290 let mut entry = entry.clone();
294 entry.pattern = pattern;
291 entry.pattern = pattern;
295 Ok(Some(_build_single_regex(&entry)))
292 Ok(Some(_build_single_regex(&entry)))
296 }
293 }
297 }
294 }
298
295
299 lazy_static! {
296 lazy_static! {
300 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
297 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
301 let mut m = FastHashMap::default();
298 let mut m = FastHashMap::default();
302
299
303 m.insert(b"re".as_ref(), b"relre:".as_ref());
300 m.insert(b"re".as_ref(), b"relre:".as_ref());
304 m.insert(b"regexp".as_ref(), b"relre:".as_ref());
301 m.insert(b"regexp".as_ref(), b"relre:".as_ref());
305 m.insert(b"glob".as_ref(), b"relglob:".as_ref());
302 m.insert(b"glob".as_ref(), b"relglob:".as_ref());
306 m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
303 m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
307 m.insert(b"include".as_ref(), b"include:".as_ref());
304 m.insert(b"include".as_ref(), b"include:".as_ref());
308 m.insert(b"subinclude".as_ref(), b"subinclude:".as_ref());
305 m.insert(b"subinclude".as_ref(), b"subinclude:".as_ref());
309 m
306 m
310 };
307 };
311 }
308 }
312
309
313 #[derive(Debug)]
310 #[derive(Debug)]
314 pub enum PatternFileWarning {
311 pub enum PatternFileWarning {
315 /// (file path, syntax bytes)
312 /// (file path, syntax bytes)
316 InvalidSyntax(PathBuf, Vec<u8>),
313 InvalidSyntax(PathBuf, Vec<u8>),
317 /// File path
314 /// File path
318 NoSuchFile(PathBuf),
315 NoSuchFile(PathBuf),
319 }
316 }
320
317
321 pub fn parse_pattern_file_contents<P: AsRef<Path>>(
318 pub fn parse_pattern_file_contents<P: AsRef<Path>>(
322 lines: &[u8],
319 lines: &[u8],
323 file_path: P,
320 file_path: P,
324 warn: bool,
321 warn: bool,
325 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
322 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
326 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
323 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
327 let comment_escape_regex = Regex::new(r"\\#").unwrap();
324 let comment_escape_regex = Regex::new(r"\\#").unwrap();
328 let mut inputs: Vec<IgnorePattern> = vec![];
325 let mut inputs: Vec<IgnorePattern> = vec![];
329 let mut warnings: Vec<PatternFileWarning> = vec![];
326 let mut warnings: Vec<PatternFileWarning> = vec![];
330
327
331 let mut current_syntax = b"relre:".as_ref();
328 let mut current_syntax = b"relre:".as_ref();
332
329
333 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
330 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
334 let line_number = line_number + 1;
331 let line_number = line_number + 1;
335
332
336 let line_buf;
333 let line_buf;
337 if line.contains(&b'#') {
334 if line.contains(&b'#') {
338 if let Some(cap) = comment_regex.captures(line) {
335 if let Some(cap) = comment_regex.captures(line) {
339 line = &line[..cap.get(1).unwrap().end()]
336 line = &line[..cap.get(1).unwrap().end()]
340 }
337 }
341 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
338 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
342 line = &line_buf;
339 line = &line_buf;
343 }
340 }
344
341
345 let mut line = line.trim_end();
342 let mut line = line.trim_end();
346
343
347 if line.is_empty() {
344 if line.is_empty() {
348 continue;
345 continue;
349 }
346 }
350
347
351 if let Some(syntax) = line.drop_prefix(b"syntax:") {
348 if let Some(syntax) = line.drop_prefix(b"syntax:") {
352 let syntax = syntax.trim();
349 let syntax = syntax.trim();
353
350
354 if let Some(rel_syntax) = SYNTAXES.get(syntax) {
351 if let Some(rel_syntax) = SYNTAXES.get(syntax) {
355 current_syntax = rel_syntax;
352 current_syntax = rel_syntax;
356 } else if warn {
353 } else if warn {
357 warnings.push(PatternFileWarning::InvalidSyntax(
354 warnings.push(PatternFileWarning::InvalidSyntax(
358 file_path.as_ref().to_owned(),
355 file_path.as_ref().to_owned(),
359 syntax.to_owned(),
356 syntax.to_owned(),
360 ));
357 ));
361 }
358 }
362 continue;
359 continue;
363 }
360 }
364
361
365 let mut line_syntax: &[u8] = &current_syntax;
362 let mut line_syntax: &[u8] = &current_syntax;
366
363
367 for (s, rels) in SYNTAXES.iter() {
364 for (s, rels) in SYNTAXES.iter() {
368 if let Some(rest) = line.drop_prefix(rels) {
365 if let Some(rest) = line.drop_prefix(rels) {
369 line_syntax = rels;
366 line_syntax = rels;
370 line = rest;
367 line = rest;
371 break;
368 break;
372 }
369 }
373 if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) {
370 if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) {
374 line_syntax = rels;
371 line_syntax = rels;
375 line = rest;
372 line = rest;
376 break;
373 break;
377 }
374 }
378 }
375 }
379
376
380 inputs.push(IgnorePattern::new(
377 inputs.push(IgnorePattern::new(
381 parse_pattern_syntax(&line_syntax).map_err(|e| match e {
378 parse_pattern_syntax(&line_syntax).map_err(|e| match e {
382 PatternError::UnsupportedSyntax(syntax) => {
379 PatternError::UnsupportedSyntax(syntax) => {
383 PatternError::UnsupportedSyntaxInFile(
380 PatternError::UnsupportedSyntaxInFile(
384 syntax,
381 syntax,
385 file_path.as_ref().to_string_lossy().into(),
382 file_path.as_ref().to_string_lossy().into(),
386 line_number,
383 line_number,
387 )
384 )
388 }
385 }
389 _ => e,
386 _ => e,
390 })?,
387 })?,
391 &line,
388 &line,
392 &file_path,
389 &file_path,
393 ));
390 ));
394 }
391 }
395 Ok((inputs, warnings))
392 Ok((inputs, warnings))
396 }
393 }
397
394
398 pub fn read_pattern_file<P: AsRef<Path>>(
395 pub fn read_pattern_file<P: AsRef<Path>>(
399 file_path: P,
396 file_path: P,
400 warn: bool,
397 warn: bool,
401 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
398 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
402 let mut f = match File::open(file_path.as_ref()) {
399 let mut f = match File::open(file_path.as_ref()) {
403 Ok(f) => Ok(f),
400 Ok(f) => Ok(f),
404 Err(e) => match e.kind() {
401 Err(e) => match e.kind() {
405 std::io::ErrorKind::NotFound => {
402 std::io::ErrorKind::NotFound => {
406 return Ok((
403 return Ok((
407 vec![],
404 vec![],
408 vec![PatternFileWarning::NoSuchFile(
405 vec![PatternFileWarning::NoSuchFile(
409 file_path.as_ref().to_owned(),
406 file_path.as_ref().to_owned(),
410 )],
407 )],
411 ))
408 ))
412 }
409 }
413 _ => Err(e),
410 _ => Err(e),
414 },
411 },
415 }?;
412 }?;
416 let mut contents = Vec::new();
413 let mut contents = Vec::new();
417
414
418 f.read_to_end(&mut contents)?;
415 f.read_to_end(&mut contents)?;
419
416
420 Ok(parse_pattern_file_contents(&contents, file_path, warn)?)
417 Ok(parse_pattern_file_contents(&contents, file_path, warn)?)
421 }
418 }
422
419
423 /// Represents an entry in an "ignore" file.
420 /// Represents an entry in an "ignore" file.
424 #[derive(Debug, Eq, PartialEq, Clone)]
421 #[derive(Debug, Eq, PartialEq, Clone)]
425 pub struct IgnorePattern {
422 pub struct IgnorePattern {
426 pub syntax: PatternSyntax,
423 pub syntax: PatternSyntax,
427 pub pattern: Vec<u8>,
424 pub pattern: Vec<u8>,
428 pub source: PathBuf,
425 pub source: PathBuf,
429 }
426 }
430
427
431 impl IgnorePattern {
428 impl IgnorePattern {
432 pub fn new(
429 pub fn new(
433 syntax: PatternSyntax,
430 syntax: PatternSyntax,
434 pattern: &[u8],
431 pattern: &[u8],
435 source: impl AsRef<Path>,
432 source: impl AsRef<Path>,
436 ) -> Self {
433 ) -> Self {
437 Self {
434 Self {
438 syntax,
435 syntax,
439 pattern: pattern.to_owned(),
436 pattern: pattern.to_owned(),
440 source: source.as_ref().to_owned(),
437 source: source.as_ref().to_owned(),
441 }
438 }
442 }
439 }
443 }
440 }
444
441
445 pub type PatternResult<T> = Result<T, PatternError>;
442 pub type PatternResult<T> = Result<T, PatternError>;
446
443
447 /// Wrapper for `read_pattern_file` that also recursively expands `include:`
444 /// Wrapper for `read_pattern_file` that also recursively expands `include:`
448 /// patterns.
445 /// patterns.
449 ///
446 ///
450 /// `subinclude:` is not treated as a special pattern here: unraveling them
447 /// `subinclude:` is not treated as a special pattern here: unraveling them
451 /// needs to occur in the "ignore" phase.
448 /// needs to occur in the "ignore" phase.
452 pub fn get_patterns_from_file(
449 pub fn get_patterns_from_file(
453 pattern_file: impl AsRef<Path>,
450 pattern_file: impl AsRef<Path>,
454 root_dir: impl AsRef<Path>,
451 root_dir: impl AsRef<Path>,
455 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
452 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
456 let (patterns, mut warnings) = read_pattern_file(&pattern_file, true)?;
453 let (patterns, mut warnings) = read_pattern_file(&pattern_file, true)?;
457 let patterns = patterns
454 let patterns = patterns
458 .into_iter()
455 .into_iter()
459 .flat_map(|entry| -> PatternResult<_> {
456 .flat_map(|entry| -> PatternResult<_> {
460 let IgnorePattern {
457 let IgnorePattern {
461 syntax,
458 syntax,
462 pattern,
459 pattern,
463 source: _,
460 source: _,
464 } = &entry;
461 } = &entry;
465 Ok(match syntax {
462 Ok(match syntax {
466 PatternSyntax::Include => {
463 PatternSyntax::Include => {
467 let inner_include =
464 let inner_include =
468 root_dir.as_ref().join(get_path_from_bytes(&pattern));
465 root_dir.as_ref().join(get_path_from_bytes(&pattern));
469 let (inner_pats, inner_warnings) = get_patterns_from_file(
466 let (inner_pats, inner_warnings) = get_patterns_from_file(
470 &inner_include,
467 &inner_include,
471 root_dir.as_ref(),
468 root_dir.as_ref(),
472 )?;
469 )?;
473 warnings.extend(inner_warnings);
470 warnings.extend(inner_warnings);
474 inner_pats
471 inner_pats
475 }
472 }
476 _ => vec![entry],
473 _ => vec![entry],
477 })
474 })
478 })
475 })
479 .flatten()
476 .flatten()
480 .collect();
477 .collect();
481
478
482 Ok((patterns, warnings))
479 Ok((patterns, warnings))
483 }
480 }
484
481
485 /// Holds all the information needed to handle a `subinclude:` pattern.
482 /// Holds all the information needed to handle a `subinclude:` pattern.
486 pub struct SubInclude {
483 pub struct SubInclude {
487 /// Will be used for repository (hg) paths that start with this prefix.
484 /// Will be used for repository (hg) paths that start with this prefix.
488 /// It is relative to the current working directory, so comparing against
485 /// It is relative to the current working directory, so comparing against
489 /// repository paths is painless.
486 /// repository paths is painless.
490 pub prefix: HgPathBuf,
487 pub prefix: HgPathBuf,
491 /// The file itself, containing the patterns
488 /// The file itself, containing the patterns
492 pub path: PathBuf,
489 pub path: PathBuf,
493 /// Folder in the filesystem where this it applies
490 /// Folder in the filesystem where this it applies
494 pub root: PathBuf,
491 pub root: PathBuf,
495 }
492 }
496
493
497 impl SubInclude {
494 impl SubInclude {
498 pub fn new(
495 pub fn new(
499 root_dir: impl AsRef<Path>,
496 root_dir: impl AsRef<Path>,
500 pattern: &[u8],
497 pattern: &[u8],
501 source: impl AsRef<Path>,
498 source: impl AsRef<Path>,
502 ) -> Result<SubInclude, HgPathError> {
499 ) -> Result<SubInclude, HgPathError> {
503 let normalized_source =
500 let normalized_source =
504 normalize_path_bytes(&get_bytes_from_path(source));
501 normalize_path_bytes(&get_bytes_from_path(source));
505
502
506 let source_root = get_path_from_bytes(&normalized_source);
503 let source_root = get_path_from_bytes(&normalized_source);
507 let source_root = source_root.parent().unwrap_or(source_root.deref());
504 let source_root = source_root.parent().unwrap_or(source_root.deref());
508
505
509 let path = source_root.join(get_path_from_bytes(pattern));
506 let path = source_root.join(get_path_from_bytes(pattern));
510 let new_root = path.parent().unwrap_or(path.deref());
507 let new_root = path.parent().unwrap_or(path.deref());
511
508
512 let prefix = canonical_path(&root_dir, &root_dir, new_root)?;
509 let prefix = canonical_path(&root_dir, &root_dir, new_root)?;
513
510
514 Ok(Self {
511 Ok(Self {
515 prefix: path_to_hg_path_buf(prefix).and_then(|mut p| {
512 prefix: path_to_hg_path_buf(prefix).and_then(|mut p| {
516 if !p.is_empty() {
513 if !p.is_empty() {
517 p.push(b'/');
514 p.push(b'/');
518 }
515 }
519 Ok(p)
516 Ok(p)
520 })?,
517 })?,
521 path: path.to_owned(),
518 path: path.to_owned(),
522 root: new_root.to_owned(),
519 root: new_root.to_owned(),
523 })
520 })
524 }
521 }
525 }
522 }
526
523
527 /// Separate and pre-process subincludes from other patterns for the "ignore"
524 /// Separate and pre-process subincludes from other patterns for the "ignore"
528 /// phase.
525 /// phase.
529 pub fn filter_subincludes(
526 pub fn filter_subincludes(
530 ignore_patterns: &[IgnorePattern],
527 ignore_patterns: &[IgnorePattern],
531 root_dir: impl AsRef<Path>,
528 root_dir: impl AsRef<Path>,
532 ) -> Result<(Vec<SubInclude>, Vec<&IgnorePattern>), HgPathError> {
529 ) -> Result<(Vec<SubInclude>, Vec<&IgnorePattern>), HgPathError> {
533 let mut subincludes = vec![];
530 let mut subincludes = vec![];
534 let mut others = vec![];
531 let mut others = vec![];
535
532
536 for ignore_pattern in ignore_patterns.iter() {
533 for ignore_pattern in ignore_patterns.iter() {
537 let IgnorePattern {
534 let IgnorePattern {
538 syntax,
535 syntax,
539 pattern,
536 pattern,
540 source,
537 source,
541 } = ignore_pattern;
538 } = ignore_pattern;
542 if *syntax == PatternSyntax::SubInclude {
539 if *syntax == PatternSyntax::SubInclude {
543 subincludes.push(SubInclude::new(&root_dir, pattern, &source)?);
540 subincludes.push(SubInclude::new(&root_dir, pattern, &source)?);
544 } else {
541 } else {
545 others.push(ignore_pattern)
542 others.push(ignore_pattern)
546 }
543 }
547 }
544 }
548 Ok((subincludes, others))
545 Ok((subincludes, others))
549 }
546 }
550
547
551 #[cfg(test)]
548 #[cfg(test)]
552 mod tests {
549 mod tests {
553 use super::*;
550 use super::*;
554 use pretty_assertions::assert_eq;
551 use pretty_assertions::assert_eq;
555
552
556 #[test]
553 #[test]
557 fn escape_pattern_test() {
554 fn escape_pattern_test() {
558 let untouched =
555 let untouched =
559 br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
556 br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
560 assert_eq!(escape_pattern(untouched), untouched.to_vec());
557 assert_eq!(escape_pattern(untouched), untouched.to_vec());
561 // All escape codes
558 // All escape codes
562 assert_eq!(
559 assert_eq!(
563 escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
560 escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
564 br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
561 br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
565 .to_vec()
562 .to_vec()
566 );
563 );
567 }
564 }
568
565
569 #[test]
566 #[test]
570 fn glob_test() {
567 fn glob_test() {
571 assert_eq!(glob_to_re(br#"?"#), br#"."#);
568 assert_eq!(glob_to_re(br#"?"#), br#"."#);
572 assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
569 assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
573 assert_eq!(glob_to_re(br#"**"#), br#".*"#);
570 assert_eq!(glob_to_re(br#"**"#), br#".*"#);
574 assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
571 assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
575 assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
572 assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
576 assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
573 assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
577 assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
574 assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
578 assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
575 assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
579 }
576 }
580
577
581 #[test]
578 #[test]
582 fn test_parse_pattern_file_contents() {
579 fn test_parse_pattern_file_contents() {
583 let lines = b"syntax: glob\n*.elc";
580 let lines = b"syntax: glob\n*.elc";
584
581
585 assert_eq!(
582 assert_eq!(
586 parse_pattern_file_contents(lines, Path::new("file_path"), false)
583 parse_pattern_file_contents(lines, Path::new("file_path"), false)
587 .unwrap()
584 .unwrap()
588 .0,
585 .0,
589 vec![IgnorePattern::new(
586 vec![IgnorePattern::new(
590 PatternSyntax::RelGlob,
587 PatternSyntax::RelGlob,
591 b"*.elc",
588 b"*.elc",
592 Path::new("file_path")
589 Path::new("file_path")
593 )],
590 )],
594 );
591 );
595
592
596 let lines = b"syntax: include\nsyntax: glob";
593 let lines = b"syntax: include\nsyntax: glob";
597
594
598 assert_eq!(
595 assert_eq!(
599 parse_pattern_file_contents(lines, Path::new("file_path"), false)
596 parse_pattern_file_contents(lines, Path::new("file_path"), false)
600 .unwrap()
597 .unwrap()
601 .0,
598 .0,
602 vec![]
599 vec![]
603 );
600 );
604 let lines = b"glob:**.o";
601 let lines = b"glob:**.o";
605 assert_eq!(
602 assert_eq!(
606 parse_pattern_file_contents(lines, Path::new("file_path"), false)
603 parse_pattern_file_contents(lines, Path::new("file_path"), false)
607 .unwrap()
604 .unwrap()
608 .0,
605 .0,
609 vec![IgnorePattern::new(
606 vec![IgnorePattern::new(
610 PatternSyntax::RelGlob,
607 PatternSyntax::RelGlob,
611 b"**.o",
608 b"**.o",
612 Path::new("file_path")
609 Path::new("file_path")
613 )]
610 )]
614 );
611 );
615 }
612 }
616
613
617 #[test]
614 #[test]
618 fn test_build_single_regex() {
615 fn test_build_single_regex() {
619 assert_eq!(
616 assert_eq!(
620 build_single_regex(&IgnorePattern::new(
617 build_single_regex(&IgnorePattern::new(
621 PatternSyntax::RelGlob,
618 PatternSyntax::RelGlob,
622 b"rust/target/",
619 b"rust/target/",
623 Path::new("")
620 Path::new("")
624 ))
621 ))
625 .unwrap(),
622 .unwrap(),
626 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
623 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
627 );
624 );
628 }
625 }
629
626
630 #[test]
627 #[test]
631 fn test_build_single_regex_shortcut() {
628 fn test_build_single_regex_shortcut() {
632 assert_eq!(
629 assert_eq!(
633 build_single_regex(&IgnorePattern::new(
630 build_single_regex(&IgnorePattern::new(
634 PatternSyntax::RootGlob,
631 PatternSyntax::RootGlob,
635 b"",
632 b"",
636 Path::new("")
633 Path::new("")
637 ))
634 ))
638 .unwrap(),
635 .unwrap(),
639 None,
636 None,
640 );
637 );
641 assert_eq!(
638 assert_eq!(
642 build_single_regex(&IgnorePattern::new(
639 build_single_regex(&IgnorePattern::new(
643 PatternSyntax::RootGlob,
640 PatternSyntax::RootGlob,
644 b"whatever",
641 b"whatever",
645 Path::new("")
642 Path::new("")
646 ))
643 ))
647 .unwrap(),
644 .unwrap(),
648 None,
645 None,
649 );
646 );
650 assert_eq!(
647 assert_eq!(
651 build_single_regex(&IgnorePattern::new(
648 build_single_regex(&IgnorePattern::new(
652 PatternSyntax::RootGlob,
649 PatternSyntax::RootGlob,
653 b"*.o",
650 b"*.o",
654 Path::new("")
651 Path::new("")
655 ))
652 ))
656 .unwrap(),
653 .unwrap(),
657 Some(br"^[^/]*\.o(?:/|$)".to_vec()),
654 Some(br"[^/]*\.o(?:/|$)".to_vec()),
658 );
655 );
659 }
656 }
660 }
657 }
@@ -1,954 +1,957 b''
1 // matchers.rs
1 // matchers.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Structs and types for matching files and directories.
8 //! Structs and types for matching files and directories.
9
9
10 #[cfg(feature = "with-re2")]
10 #[cfg(feature = "with-re2")]
11 use crate::re2::Re2;
11 use crate::re2::Re2;
12 use crate::{
12 use crate::{
13 dirstate::dirs_multiset::DirsChildrenMultiset,
13 dirstate::dirs_multiset::DirsChildrenMultiset,
14 filepatterns::{
14 filepatterns::{
15 build_single_regex, filter_subincludes, get_patterns_from_file,
15 build_single_regex, filter_subincludes, get_patterns_from_file,
16 PatternFileWarning, PatternResult, SubInclude,
16 PatternFileWarning, PatternResult, SubInclude,
17 },
17 },
18 utils::{
18 utils::{
19 files::find_dirs,
19 files::find_dirs,
20 hg_path::{HgPath, HgPathBuf},
20 hg_path::{HgPath, HgPathBuf},
21 Escaped,
21 Escaped,
22 },
22 },
23 DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
23 DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
24 PatternSyntax,
24 PatternSyntax,
25 };
25 };
26
26
27 use crate::filepatterns::normalize_path_bytes;
27 use crate::filepatterns::normalize_path_bytes;
28 use std::borrow::ToOwned;
28 use std::borrow::ToOwned;
29 use std::collections::HashSet;
29 use std::collections::HashSet;
30 use std::fmt::{Display, Error, Formatter};
30 use std::fmt::{Display, Error, Formatter};
31 use std::iter::FromIterator;
31 use std::iter::FromIterator;
32 use std::ops::Deref;
32 use std::ops::Deref;
33 use std::path::{Path, PathBuf};
33 use std::path::{Path, PathBuf};
34
34
35 use micro_timer::timed;
35 use micro_timer::timed;
36
36
37 #[derive(Debug, PartialEq)]
37 #[derive(Debug, PartialEq)]
38 pub enum VisitChildrenSet<'a> {
38 pub enum VisitChildrenSet<'a> {
39 /// Don't visit anything
39 /// Don't visit anything
40 Empty,
40 Empty,
41 /// Only visit this directory
41 /// Only visit this directory
42 This,
42 This,
43 /// Visit this directory and these subdirectories
43 /// Visit this directory and these subdirectories
44 /// TODO Should we implement a `NonEmptyHashSet`?
44 /// TODO Should we implement a `NonEmptyHashSet`?
45 Set(HashSet<&'a HgPath>),
45 Set(HashSet<&'a HgPath>),
46 /// Visit this directory and all subdirectories
46 /// Visit this directory and all subdirectories
47 Recursive,
47 Recursive,
48 }
48 }
49
49
50 pub trait Matcher {
50 pub trait Matcher {
51 /// Explicitly listed files
51 /// Explicitly listed files
52 fn file_set(&self) -> Option<&HashSet<&HgPath>>;
52 fn file_set(&self) -> Option<&HashSet<&HgPath>>;
53 /// Returns whether `filename` is in `file_set`
53 /// Returns whether `filename` is in `file_set`
54 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool;
54 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool;
55 /// Returns whether `filename` is matched by this matcher
55 /// Returns whether `filename` is matched by this matcher
56 fn matches(&self, filename: impl AsRef<HgPath>) -> bool;
56 fn matches(&self, filename: impl AsRef<HgPath>) -> bool;
57 /// Decides whether a directory should be visited based on whether it
57 /// Decides whether a directory should be visited based on whether it
58 /// has potential matches in it or one of its subdirectories, and
58 /// has potential matches in it or one of its subdirectories, and
59 /// potentially lists which subdirectories of that directory should be
59 /// potentially lists which subdirectories of that directory should be
60 /// visited. This is based on the match's primary, included, and excluded
60 /// visited. This is based on the match's primary, included, and excluded
61 /// patterns.
61 /// patterns.
62 ///
62 ///
63 /// # Example
63 /// # Example
64 ///
64 ///
65 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
65 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
66 /// return the following values (assuming the implementation of
66 /// return the following values (assuming the implementation of
67 /// visit_children_set is capable of recognizing this; some implementations
67 /// visit_children_set is capable of recognizing this; some implementations
68 /// are not).
68 /// are not).
69 ///
69 ///
70 /// ```text
70 /// ```text
71 /// ```ignore
71 /// ```ignore
72 /// '' -> {'foo', 'qux'}
72 /// '' -> {'foo', 'qux'}
73 /// 'baz' -> set()
73 /// 'baz' -> set()
74 /// 'foo' -> {'bar'}
74 /// 'foo' -> {'bar'}
75 /// // Ideally this would be `Recursive`, but since the prefix nature of
75 /// // Ideally this would be `Recursive`, but since the prefix nature of
76 /// // matchers is applied to the entire matcher, we have to downgrade this
76 /// // matchers is applied to the entire matcher, we have to downgrade this
77 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
77 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
78 /// // `RootFilesIn'-kind matcher being mixed in.
78 /// // `RootFilesIn'-kind matcher being mixed in.
79 /// 'foo/bar' -> 'this'
79 /// 'foo/bar' -> 'this'
80 /// 'qux' -> 'this'
80 /// 'qux' -> 'this'
81 /// ```
81 /// ```
82 /// # Important
82 /// # Important
83 ///
83 ///
84 /// Most matchers do not know if they're representing files or
84 /// Most matchers do not know if they're representing files or
85 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
85 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
86 /// file or a directory, so `visit_children_set('dir')` for most matchers
86 /// file or a directory, so `visit_children_set('dir')` for most matchers
87 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
87 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
88 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
88 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
89 /// it may return `VisitChildrenSet::This`.
89 /// it may return `VisitChildrenSet::This`.
90 /// Do not rely on the return being a `HashSet` indicating that there are
90 /// Do not rely on the return being a `HashSet` indicating that there are
91 /// no files in this dir to investigate (or equivalently that if there are
91 /// no files in this dir to investigate (or equivalently that if there are
92 /// files to investigate in 'dir' that it will always return
92 /// files to investigate in 'dir' that it will always return
93 /// `VisitChildrenSet::This`).
93 /// `VisitChildrenSet::This`).
94 fn visit_children_set(
94 fn visit_children_set(
95 &self,
95 &self,
96 directory: impl AsRef<HgPath>,
96 directory: impl AsRef<HgPath>,
97 ) -> VisitChildrenSet;
97 ) -> VisitChildrenSet;
98 /// Matcher will match everything and `files_set()` will be empty:
98 /// Matcher will match everything and `files_set()` will be empty:
99 /// optimization might be possible.
99 /// optimization might be possible.
100 fn matches_everything(&self) -> bool;
100 fn matches_everything(&self) -> bool;
101 /// Matcher will match exactly the files in `files_set()`: optimization
101 /// Matcher will match exactly the files in `files_set()`: optimization
102 /// might be possible.
102 /// might be possible.
103 fn is_exact(&self) -> bool;
103 fn is_exact(&self) -> bool;
104 }
104 }
105
105
106 /// Matches everything.
106 /// Matches everything.
107 ///```
107 ///```
108 /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
108 /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
109 ///
109 ///
110 /// let matcher = AlwaysMatcher;
110 /// let matcher = AlwaysMatcher;
111 ///
111 ///
112 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
112 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
113 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
113 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
114 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
114 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
115 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
115 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
116 /// ```
116 /// ```
117 #[derive(Debug)]
117 #[derive(Debug)]
118 pub struct AlwaysMatcher;
118 pub struct AlwaysMatcher;
119
119
120 impl Matcher for AlwaysMatcher {
120 impl Matcher for AlwaysMatcher {
121 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
121 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
122 None
122 None
123 }
123 }
124 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
124 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
125 false
125 false
126 }
126 }
127 fn matches(&self, _filename: impl AsRef<HgPath>) -> bool {
127 fn matches(&self, _filename: impl AsRef<HgPath>) -> bool {
128 true
128 true
129 }
129 }
130 fn visit_children_set(
130 fn visit_children_set(
131 &self,
131 &self,
132 _directory: impl AsRef<HgPath>,
132 _directory: impl AsRef<HgPath>,
133 ) -> VisitChildrenSet {
133 ) -> VisitChildrenSet {
134 VisitChildrenSet::Recursive
134 VisitChildrenSet::Recursive
135 }
135 }
136 fn matches_everything(&self) -> bool {
136 fn matches_everything(&self) -> bool {
137 true
137 true
138 }
138 }
139 fn is_exact(&self) -> bool {
139 fn is_exact(&self) -> bool {
140 false
140 false
141 }
141 }
142 }
142 }
143
143
144 /// Matches the input files exactly. They are interpreted as paths, not
144 /// Matches the input files exactly. They are interpreted as paths, not
145 /// patterns.
145 /// patterns.
146 ///
146 ///
147 ///```
147 ///```
148 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::HgPath };
148 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::HgPath };
149 ///
149 ///
150 /// let files = [HgPath::new(b"a.txt"), HgPath::new(br"re:.*\.c$")];
150 /// let files = [HgPath::new(b"a.txt"), HgPath::new(br"re:.*\.c$")];
151 /// let matcher = FileMatcher::new(&files).unwrap();
151 /// let matcher = FileMatcher::new(&files).unwrap();
152 ///
152 ///
153 /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
153 /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
154 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
154 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
155 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
155 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
156 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
156 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
157 /// ```
157 /// ```
158 #[derive(Debug)]
158 #[derive(Debug)]
159 pub struct FileMatcher<'a> {
159 pub struct FileMatcher<'a> {
160 files: HashSet<&'a HgPath>,
160 files: HashSet<&'a HgPath>,
161 dirs: DirsMultiset,
161 dirs: DirsMultiset,
162 }
162 }
163
163
164 impl<'a> FileMatcher<'a> {
164 impl<'a> FileMatcher<'a> {
165 pub fn new(
165 pub fn new(
166 files: &'a [impl AsRef<HgPath>],
166 files: &'a [impl AsRef<HgPath>],
167 ) -> Result<Self, DirstateMapError> {
167 ) -> Result<Self, DirstateMapError> {
168 Ok(Self {
168 Ok(Self {
169 files: HashSet::from_iter(files.iter().map(|f| f.as_ref())),
169 files: HashSet::from_iter(files.iter().map(|f| f.as_ref())),
170 dirs: DirsMultiset::from_manifest(files)?,
170 dirs: DirsMultiset::from_manifest(files)?,
171 })
171 })
172 }
172 }
173 fn inner_matches(&self, filename: impl AsRef<HgPath>) -> bool {
173 fn inner_matches(&self, filename: impl AsRef<HgPath>) -> bool {
174 self.files.contains(filename.as_ref())
174 self.files.contains(filename.as_ref())
175 }
175 }
176 }
176 }
177
177
178 impl<'a> Matcher for FileMatcher<'a> {
178 impl<'a> Matcher for FileMatcher<'a> {
179 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
179 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
180 Some(&self.files)
180 Some(&self.files)
181 }
181 }
182 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool {
182 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool {
183 self.inner_matches(filename)
183 self.inner_matches(filename)
184 }
184 }
185 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
185 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
186 self.inner_matches(filename)
186 self.inner_matches(filename)
187 }
187 }
188 fn visit_children_set(
188 fn visit_children_set(
189 &self,
189 &self,
190 directory: impl AsRef<HgPath>,
190 directory: impl AsRef<HgPath>,
191 ) -> VisitChildrenSet {
191 ) -> VisitChildrenSet {
192 if self.files.is_empty() || !self.dirs.contains(&directory) {
192 if self.files.is_empty() || !self.dirs.contains(&directory) {
193 return VisitChildrenSet::Empty;
193 return VisitChildrenSet::Empty;
194 }
194 }
195 let dirs_as_set = self.dirs.iter().map(|k| k.deref()).collect();
195 let dirs_as_set = self.dirs.iter().map(|k| k.deref()).collect();
196
196
197 let mut candidates: HashSet<&HgPath> =
197 let mut candidates: HashSet<&HgPath> =
198 self.files.union(&dirs_as_set).map(|k| *k).collect();
198 self.files.union(&dirs_as_set).map(|k| *k).collect();
199 candidates.remove(HgPath::new(b""));
199 candidates.remove(HgPath::new(b""));
200
200
201 if !directory.as_ref().is_empty() {
201 if !directory.as_ref().is_empty() {
202 let directory = [directory.as_ref().as_bytes(), b"/"].concat();
202 let directory = [directory.as_ref().as_bytes(), b"/"].concat();
203 candidates = candidates
203 candidates = candidates
204 .iter()
204 .iter()
205 .filter_map(|c| {
205 .filter_map(|c| {
206 if c.as_bytes().starts_with(&directory) {
206 if c.as_bytes().starts_with(&directory) {
207 Some(HgPath::new(&c.as_bytes()[directory.len()..]))
207 Some(HgPath::new(&c.as_bytes()[directory.len()..]))
208 } else {
208 } else {
209 None
209 None
210 }
210 }
211 })
211 })
212 .collect();
212 .collect();
213 }
213 }
214
214
215 // `self.dirs` includes all of the directories, recursively, so if
215 // `self.dirs` includes all of the directories, recursively, so if
216 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
216 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
217 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
217 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
218 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
218 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
219 // subdir will be in there without a slash.
219 // subdir will be in there without a slash.
220 VisitChildrenSet::Set(
220 VisitChildrenSet::Set(
221 candidates
221 candidates
222 .iter()
222 .iter()
223 .filter_map(|c| {
223 .filter_map(|c| {
224 if c.bytes().all(|b| *b != b'/') {
224 if c.bytes().all(|b| *b != b'/') {
225 Some(*c)
225 Some(*c)
226 } else {
226 } else {
227 None
227 None
228 }
228 }
229 })
229 })
230 .collect(),
230 .collect(),
231 )
231 )
232 }
232 }
233 fn matches_everything(&self) -> bool {
233 fn matches_everything(&self) -> bool {
234 false
234 false
235 }
235 }
236 fn is_exact(&self) -> bool {
236 fn is_exact(&self) -> bool {
237 true
237 true
238 }
238 }
239 }
239 }
240
240
241 /// Matches files that are included in the ignore rules.
241 /// Matches files that are included in the ignore rules.
242 #[cfg_attr(
242 #[cfg_attr(
243 feature = "with-re2",
243 feature = "with-re2",
244 doc = r##"
244 doc = r##"
245 ```
245 ```
246 use hg::{
246 use hg::{
247 matchers::{IncludeMatcher, Matcher},
247 matchers::{IncludeMatcher, Matcher},
248 IgnorePattern,
248 IgnorePattern,
249 PatternSyntax,
249 PatternSyntax,
250 utils::hg_path::HgPath
250 utils::hg_path::HgPath
251 };
251 };
252 use std::path::Path;
252 use std::path::Path;
253 ///
253 ///
254 let ignore_patterns =
254 let ignore_patterns =
255 vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
255 vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
256 let (matcher, _) = IncludeMatcher::new(ignore_patterns, "").unwrap();
256 let (matcher, _) = IncludeMatcher::new(ignore_patterns, "").unwrap();
257 ///
257 ///
258 assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
258 assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
259 assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
259 assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
260 assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
260 assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
261 assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
261 assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
262 ```
262 ```
263 "##
263 "##
264 )]
264 )]
265 pub struct IncludeMatcher<'a> {
265 pub struct IncludeMatcher<'a> {
266 patterns: Vec<u8>,
266 patterns: Vec<u8>,
267 match_fn: Box<dyn for<'r> Fn(&'r HgPath) -> bool + 'a + Sync>,
267 match_fn: Box<dyn for<'r> Fn(&'r HgPath) -> bool + 'a + Sync>,
268 /// Whether all the patterns match a prefix (i.e. recursively)
268 /// Whether all the patterns match a prefix (i.e. recursively)
269 prefix: bool,
269 prefix: bool,
270 roots: HashSet<HgPathBuf>,
270 roots: HashSet<HgPathBuf>,
271 dirs: HashSet<HgPathBuf>,
271 dirs: HashSet<HgPathBuf>,
272 parents: HashSet<HgPathBuf>,
272 parents: HashSet<HgPathBuf>,
273 }
273 }
274
274
275 impl<'a> Matcher for IncludeMatcher<'a> {
275 impl<'a> Matcher for IncludeMatcher<'a> {
276 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
276 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
277 None
277 None
278 }
278 }
279
279
280 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
280 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
281 false
281 false
282 }
282 }
283
283
284 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
284 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
285 (self.match_fn)(filename.as_ref())
285 (self.match_fn)(filename.as_ref())
286 }
286 }
287
287
288 fn visit_children_set(
288 fn visit_children_set(
289 &self,
289 &self,
290 directory: impl AsRef<HgPath>,
290 directory: impl AsRef<HgPath>,
291 ) -> VisitChildrenSet {
291 ) -> VisitChildrenSet {
292 let dir = directory.as_ref();
292 let dir = directory.as_ref();
293 if self.prefix && self.roots.contains(dir) {
293 if self.prefix && self.roots.contains(dir) {
294 return VisitChildrenSet::Recursive;
294 return VisitChildrenSet::Recursive;
295 }
295 }
296 if self.roots.contains(HgPath::new(b""))
296 if self.roots.contains(HgPath::new(b""))
297 || self.roots.contains(dir)
297 || self.roots.contains(dir)
298 || self.dirs.contains(dir)
298 || self.dirs.contains(dir)
299 || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
299 || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
300 {
300 {
301 return VisitChildrenSet::This;
301 return VisitChildrenSet::This;
302 }
302 }
303
303
304 if self.parents.contains(directory.as_ref()) {
304 if self.parents.contains(directory.as_ref()) {
305 let multiset = self.get_all_parents_children();
305 let multiset = self.get_all_parents_children();
306 if let Some(children) = multiset.get(dir) {
306 if let Some(children) = multiset.get(dir) {
307 return VisitChildrenSet::Set(children.to_owned());
307 return VisitChildrenSet::Set(children.to_owned());
308 }
308 }
309 }
309 }
310 VisitChildrenSet::Empty
310 VisitChildrenSet::Empty
311 }
311 }
312
312
313 fn matches_everything(&self) -> bool {
313 fn matches_everything(&self) -> bool {
314 false
314 false
315 }
315 }
316
316
317 fn is_exact(&self) -> bool {
317 fn is_exact(&self) -> bool {
318 false
318 false
319 }
319 }
320 }
320 }
321
321
322 #[cfg(feature = "with-re2")]
322 #[cfg(feature = "with-re2")]
323 /// Returns a function that matches an `HgPath` against the given regex
323 /// Returns a function that matches an `HgPath` against the given regex
324 /// pattern.
324 /// pattern.
325 ///
325 ///
326 /// This can fail when the pattern is invalid or not supported by the
326 /// This can fail when the pattern is invalid or not supported by the
327 /// underlying engine `Re2`, for instance anything with back-references.
327 /// underlying engine `Re2`, for instance anything with back-references.
328 #[timed]
328 #[timed]
329 fn re_matcher(
329 fn re_matcher(
330 pattern: &[u8],
330 pattern: &[u8],
331 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
331 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
332 let regex = Re2::new(pattern);
332 let regex = Re2::new(pattern);
333 let regex = regex.map_err(|e| PatternError::UnsupportedSyntax(e))?;
333 let regex = regex.map_err(|e| PatternError::UnsupportedSyntax(e))?;
334 Ok(move |path: &HgPath| regex.is_match(path.as_bytes()))
334 Ok(move |path: &HgPath| regex.is_match(path.as_bytes()))
335 }
335 }
336
336
337 #[cfg(not(feature = "with-re2"))]
337 #[cfg(not(feature = "with-re2"))]
338 /// Returns a function that matches an `HgPath` against the given regex
338 /// Returns a function that matches an `HgPath` against the given regex
339 /// pattern.
339 /// pattern.
340 ///
340 ///
341 /// This can fail when the pattern is invalid or not supported by the
341 /// This can fail when the pattern is invalid or not supported by the
342 /// underlying engine (the `regex` crate), for instance anything with
342 /// underlying engine (the `regex` crate), for instance anything with
343 /// back-references.
343 /// back-references.
344 #[timed]
344 #[timed]
345 fn re_matcher(
345 fn re_matcher(
346 pattern: &[u8],
346 pattern: &[u8],
347 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
347 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
348 use std::io::Write;
348 use std::io::Write;
349
349
350 let mut escaped_bytes = vec![];
350 // The `regex` crate adds `.*` to the start and end of expressions if there
351 // are no anchors, so add the start anchor.
352 let mut escaped_bytes = vec![b'^', b'(', b'?', b':'];
351 for byte in pattern {
353 for byte in pattern {
352 if *byte > 127 {
354 if *byte > 127 {
353 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
355 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
354 } else {
356 } else {
355 escaped_bytes.push(*byte);
357 escaped_bytes.push(*byte);
356 }
358 }
357 }
359 }
360 escaped_bytes.push(b')');
358
361
359 // Avoid the cost of UTF8 checking
362 // Avoid the cost of UTF8 checking
360 //
363 //
361 // # Safety
364 // # Safety
362 // This is safe because we escaped all non-ASCII bytes.
365 // This is safe because we escaped all non-ASCII bytes.
363 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
366 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
364 let re = regex::bytes::RegexBuilder::new(&pattern_string)
367 let re = regex::bytes::RegexBuilder::new(&pattern_string)
365 .unicode(false)
368 .unicode(false)
366 .build()
369 .build()
367 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
370 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
368
371
369 Ok(move |path: &HgPath| re.is_match(path.as_bytes()))
372 Ok(move |path: &HgPath| re.is_match(path.as_bytes()))
370 }
373 }
371
374
372 /// Returns the regex pattern and a function that matches an `HgPath` against
375 /// Returns the regex pattern and a function that matches an `HgPath` against
373 /// said regex formed by the given ignore patterns.
376 /// said regex formed by the given ignore patterns.
374 fn build_regex_match<'a>(
377 fn build_regex_match<'a>(
375 ignore_patterns: &'a [&'a IgnorePattern],
378 ignore_patterns: &'a [&'a IgnorePattern],
376 ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)> {
379 ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)> {
377 let mut regexps = vec![];
380 let mut regexps = vec![];
378 let mut exact_set = HashSet::new();
381 let mut exact_set = HashSet::new();
379
382
380 for pattern in ignore_patterns {
383 for pattern in ignore_patterns {
381 if let Some(re) = build_single_regex(pattern)? {
384 if let Some(re) = build_single_regex(pattern)? {
382 regexps.push(re);
385 regexps.push(re);
383 } else {
386 } else {
384 let exact = normalize_path_bytes(&pattern.pattern);
387 let exact = normalize_path_bytes(&pattern.pattern);
385 exact_set.insert(HgPathBuf::from_bytes(&exact));
388 exact_set.insert(HgPathBuf::from_bytes(&exact));
386 }
389 }
387 }
390 }
388
391
389 let full_regex = regexps.join(&b'|');
392 let full_regex = regexps.join(&b'|');
390
393
391 // An empty pattern would cause the regex engine to incorrectly match the
394 // An empty pattern would cause the regex engine to incorrectly match the
392 // (empty) root directory
395 // (empty) root directory
393 let func = if !(regexps.is_empty()) {
396 let func = if !(regexps.is_empty()) {
394 let matcher = re_matcher(&full_regex)?;
397 let matcher = re_matcher(&full_regex)?;
395 let func = move |filename: &HgPath| {
398 let func = move |filename: &HgPath| {
396 exact_set.contains(filename) || matcher(filename)
399 exact_set.contains(filename) || matcher(filename)
397 };
400 };
398 Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
401 Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
399 } else {
402 } else {
400 let func = move |filename: &HgPath| exact_set.contains(filename);
403 let func = move |filename: &HgPath| exact_set.contains(filename);
401 Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
404 Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
402 };
405 };
403
406
404 Ok((full_regex, func))
407 Ok((full_regex, func))
405 }
408 }
406
409
407 /// Returns roots and directories corresponding to each pattern.
410 /// Returns roots and directories corresponding to each pattern.
408 ///
411 ///
409 /// This calculates the roots and directories exactly matching the patterns and
412 /// This calculates the roots and directories exactly matching the patterns and
410 /// returns a tuple of (roots, dirs). It does not return other directories
413 /// returns a tuple of (roots, dirs). It does not return other directories
411 /// which may also need to be considered, like the parent directories.
414 /// which may also need to be considered, like the parent directories.
412 fn roots_and_dirs(
415 fn roots_and_dirs(
413 ignore_patterns: &[IgnorePattern],
416 ignore_patterns: &[IgnorePattern],
414 ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
417 ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
415 let mut roots = Vec::new();
418 let mut roots = Vec::new();
416 let mut dirs = Vec::new();
419 let mut dirs = Vec::new();
417
420
418 for ignore_pattern in ignore_patterns {
421 for ignore_pattern in ignore_patterns {
419 let IgnorePattern {
422 let IgnorePattern {
420 syntax, pattern, ..
423 syntax, pattern, ..
421 } = ignore_pattern;
424 } = ignore_pattern;
422 match syntax {
425 match syntax {
423 PatternSyntax::RootGlob | PatternSyntax::Glob => {
426 PatternSyntax::RootGlob | PatternSyntax::Glob => {
424 let mut root = vec![];
427 let mut root = vec![];
425
428
426 for p in pattern.split(|c| *c == b'/') {
429 for p in pattern.split(|c| *c == b'/') {
427 if p.iter().any(|c| match *c {
430 if p.iter().any(|c| match *c {
428 b'[' | b'{' | b'*' | b'?' => true,
431 b'[' | b'{' | b'*' | b'?' => true,
429 _ => false,
432 _ => false,
430 }) {
433 }) {
431 break;
434 break;
432 }
435 }
433 root.push(HgPathBuf::from_bytes(p));
436 root.push(HgPathBuf::from_bytes(p));
434 }
437 }
435 let buf =
438 let buf =
436 root.iter().fold(HgPathBuf::new(), |acc, r| acc.join(r));
439 root.iter().fold(HgPathBuf::new(), |acc, r| acc.join(r));
437 roots.push(buf);
440 roots.push(buf);
438 }
441 }
439 PatternSyntax::Path | PatternSyntax::RelPath => {
442 PatternSyntax::Path | PatternSyntax::RelPath => {
440 let pat = HgPath::new(if pattern == b"." {
443 let pat = HgPath::new(if pattern == b"." {
441 &[] as &[u8]
444 &[] as &[u8]
442 } else {
445 } else {
443 pattern
446 pattern
444 });
447 });
445 roots.push(pat.to_owned());
448 roots.push(pat.to_owned());
446 }
449 }
447 PatternSyntax::RootFiles => {
450 PatternSyntax::RootFiles => {
448 let pat = if pattern == b"." {
451 let pat = if pattern == b"." {
449 &[] as &[u8]
452 &[] as &[u8]
450 } else {
453 } else {
451 pattern
454 pattern
452 };
455 };
453 dirs.push(HgPathBuf::from_bytes(pat));
456 dirs.push(HgPathBuf::from_bytes(pat));
454 }
457 }
455 _ => {
458 _ => {
456 roots.push(HgPathBuf::new());
459 roots.push(HgPathBuf::new());
457 }
460 }
458 }
461 }
459 }
462 }
460 (roots, dirs)
463 (roots, dirs)
461 }
464 }
462
465
463 /// Paths extracted from patterns
466 /// Paths extracted from patterns
464 #[derive(Debug, PartialEq)]
467 #[derive(Debug, PartialEq)]
465 struct RootsDirsAndParents {
468 struct RootsDirsAndParents {
466 /// Directories to match recursively
469 /// Directories to match recursively
467 pub roots: HashSet<HgPathBuf>,
470 pub roots: HashSet<HgPathBuf>,
468 /// Directories to match non-recursively
471 /// Directories to match non-recursively
469 pub dirs: HashSet<HgPathBuf>,
472 pub dirs: HashSet<HgPathBuf>,
470 /// Implicitly required directories to go to items in either roots or dirs
473 /// Implicitly required directories to go to items in either roots or dirs
471 pub parents: HashSet<HgPathBuf>,
474 pub parents: HashSet<HgPathBuf>,
472 }
475 }
473
476
474 /// Extract roots, dirs and parents from patterns.
477 /// Extract roots, dirs and parents from patterns.
475 fn roots_dirs_and_parents(
478 fn roots_dirs_and_parents(
476 ignore_patterns: &[IgnorePattern],
479 ignore_patterns: &[IgnorePattern],
477 ) -> PatternResult<RootsDirsAndParents> {
480 ) -> PatternResult<RootsDirsAndParents> {
478 let (roots, dirs) = roots_and_dirs(ignore_patterns);
481 let (roots, dirs) = roots_and_dirs(ignore_patterns);
479
482
480 let mut parents = HashSet::new();
483 let mut parents = HashSet::new();
481
484
482 parents.extend(
485 parents.extend(
483 DirsMultiset::from_manifest(&dirs)
486 DirsMultiset::from_manifest(&dirs)
484 .map_err(|e| match e {
487 .map_err(|e| match e {
485 DirstateMapError::InvalidPath(e) => e,
488 DirstateMapError::InvalidPath(e) => e,
486 _ => unreachable!(),
489 _ => unreachable!(),
487 })?
490 })?
488 .iter()
491 .iter()
489 .map(|k| k.to_owned()),
492 .map(|k| k.to_owned()),
490 );
493 );
491 parents.extend(
494 parents.extend(
492 DirsMultiset::from_manifest(&roots)
495 DirsMultiset::from_manifest(&roots)
493 .map_err(|e| match e {
496 .map_err(|e| match e {
494 DirstateMapError::InvalidPath(e) => e,
497 DirstateMapError::InvalidPath(e) => e,
495 _ => unreachable!(),
498 _ => unreachable!(),
496 })?
499 })?
497 .iter()
500 .iter()
498 .map(|k| k.to_owned()),
501 .map(|k| k.to_owned()),
499 );
502 );
500
503
501 Ok(RootsDirsAndParents {
504 Ok(RootsDirsAndParents {
502 roots: HashSet::from_iter(roots),
505 roots: HashSet::from_iter(roots),
503 dirs: HashSet::from_iter(dirs),
506 dirs: HashSet::from_iter(dirs),
504 parents,
507 parents,
505 })
508 })
506 }
509 }
507
510
508 /// Returns a function that checks whether a given file (in the general sense)
511 /// Returns a function that checks whether a given file (in the general sense)
509 /// should be matched.
512 /// should be matched.
510 fn build_match<'a, 'b>(
513 fn build_match<'a, 'b>(
511 ignore_patterns: &'a [IgnorePattern],
514 ignore_patterns: &'a [IgnorePattern],
512 root_dir: impl AsRef<Path>,
515 root_dir: impl AsRef<Path>,
513 ) -> PatternResult<(
516 ) -> PatternResult<(
514 Vec<u8>,
517 Vec<u8>,
515 Box<dyn Fn(&HgPath) -> bool + 'b + Sync>,
518 Box<dyn Fn(&HgPath) -> bool + 'b + Sync>,
516 Vec<PatternFileWarning>,
519 Vec<PatternFileWarning>,
517 )> {
520 )> {
518 let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![];
521 let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![];
519 // For debugging and printing
522 // For debugging and printing
520 let mut patterns = vec![];
523 let mut patterns = vec![];
521 let mut all_warnings = vec![];
524 let mut all_warnings = vec![];
522
525
523 let (subincludes, ignore_patterns) =
526 let (subincludes, ignore_patterns) =
524 filter_subincludes(ignore_patterns, root_dir)?;
527 filter_subincludes(ignore_patterns, root_dir)?;
525
528
526 if !subincludes.is_empty() {
529 if !subincludes.is_empty() {
527 // Build prefix-based matcher functions for subincludes
530 // Build prefix-based matcher functions for subincludes
528 let mut submatchers = FastHashMap::default();
531 let mut submatchers = FastHashMap::default();
529 let mut prefixes = vec![];
532 let mut prefixes = vec![];
530
533
531 for SubInclude { prefix, root, path } in subincludes.into_iter() {
534 for SubInclude { prefix, root, path } in subincludes.into_iter() {
532 let (match_fn, warnings) =
535 let (match_fn, warnings) =
533 get_ignore_function(vec![path.to_path_buf()], root)?;
536 get_ignore_function(vec![path.to_path_buf()], root)?;
534 all_warnings.extend(warnings);
537 all_warnings.extend(warnings);
535 prefixes.push(prefix.to_owned());
538 prefixes.push(prefix.to_owned());
536 submatchers.insert(prefix.to_owned(), match_fn);
539 submatchers.insert(prefix.to_owned(), match_fn);
537 }
540 }
538
541
539 let match_subinclude = move |filename: &HgPath| {
542 let match_subinclude = move |filename: &HgPath| {
540 for prefix in prefixes.iter() {
543 for prefix in prefixes.iter() {
541 if let Some(rel) = filename.relative_to(prefix) {
544 if let Some(rel) = filename.relative_to(prefix) {
542 if (submatchers.get(prefix).unwrap())(rel) {
545 if (submatchers.get(prefix).unwrap())(rel) {
543 return true;
546 return true;
544 }
547 }
545 }
548 }
546 }
549 }
547 false
550 false
548 };
551 };
549
552
550 match_funcs.push(Box::new(match_subinclude));
553 match_funcs.push(Box::new(match_subinclude));
551 }
554 }
552
555
553 if !ignore_patterns.is_empty() {
556 if !ignore_patterns.is_empty() {
554 // Either do dumb matching if all patterns are rootfiles, or match
557 // Either do dumb matching if all patterns are rootfiles, or match
555 // with a regex.
558 // with a regex.
556 if ignore_patterns
559 if ignore_patterns
557 .iter()
560 .iter()
558 .all(|k| k.syntax == PatternSyntax::RootFiles)
561 .all(|k| k.syntax == PatternSyntax::RootFiles)
559 {
562 {
560 let dirs: HashSet<_> = ignore_patterns
563 let dirs: HashSet<_> = ignore_patterns
561 .iter()
564 .iter()
562 .map(|k| k.pattern.to_owned())
565 .map(|k| k.pattern.to_owned())
563 .collect();
566 .collect();
564 let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
567 let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
565
568
566 let match_func = move |path: &HgPath| -> bool {
569 let match_func = move |path: &HgPath| -> bool {
567 let path = path.as_bytes();
570 let path = path.as_bytes();
568 let i = path.iter().rfind(|a| **a == b'/');
571 let i = path.iter().rfind(|a| **a == b'/');
569 let dir = if let Some(i) = i {
572 let dir = if let Some(i) = i {
570 &path[..*i as usize]
573 &path[..*i as usize]
571 } else {
574 } else {
572 b"."
575 b"."
573 };
576 };
574 dirs.contains(dir.deref())
577 dirs.contains(dir.deref())
575 };
578 };
576 match_funcs.push(Box::new(match_func));
579 match_funcs.push(Box::new(match_func));
577
580
578 patterns.extend(b"rootfilesin: ");
581 patterns.extend(b"rootfilesin: ");
579 dirs_vec.sort();
582 dirs_vec.sort();
580 patterns.extend(dirs_vec.escaped_bytes());
583 patterns.extend(dirs_vec.escaped_bytes());
581 } else {
584 } else {
582 let (new_re, match_func) = build_regex_match(&ignore_patterns)?;
585 let (new_re, match_func) = build_regex_match(&ignore_patterns)?;
583 patterns = new_re;
586 patterns = new_re;
584 match_funcs.push(match_func)
587 match_funcs.push(match_func)
585 }
588 }
586 }
589 }
587
590
588 Ok(if match_funcs.len() == 1 {
591 Ok(if match_funcs.len() == 1 {
589 (patterns, match_funcs.remove(0), all_warnings)
592 (patterns, match_funcs.remove(0), all_warnings)
590 } else {
593 } else {
591 (
594 (
592 patterns,
595 patterns,
593 Box::new(move |f: &HgPath| -> bool {
596 Box::new(move |f: &HgPath| -> bool {
594 match_funcs.iter().any(|match_func| match_func(f))
597 match_funcs.iter().any(|match_func| match_func(f))
595 }),
598 }),
596 all_warnings,
599 all_warnings,
597 )
600 )
598 })
601 })
599 }
602 }
600
603
601 /// Parses all "ignore" files with their recursive includes and returns a
604 /// Parses all "ignore" files with their recursive includes and returns a
602 /// function that checks whether a given file (in the general sense) should be
605 /// function that checks whether a given file (in the general sense) should be
603 /// ignored.
606 /// ignored.
604 pub fn get_ignore_function<'a>(
607 pub fn get_ignore_function<'a>(
605 all_pattern_files: Vec<PathBuf>,
608 all_pattern_files: Vec<PathBuf>,
606 root_dir: impl AsRef<Path>,
609 root_dir: impl AsRef<Path>,
607 ) -> PatternResult<(
610 ) -> PatternResult<(
608 Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>,
611 Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>,
609 Vec<PatternFileWarning>,
612 Vec<PatternFileWarning>,
610 )> {
613 )> {
611 let mut all_patterns = vec![];
614 let mut all_patterns = vec![];
612 let mut all_warnings = vec![];
615 let mut all_warnings = vec![];
613
616
614 for pattern_file in all_pattern_files.into_iter() {
617 for pattern_file in all_pattern_files.into_iter() {
615 let (patterns, warnings) =
618 let (patterns, warnings) =
616 get_patterns_from_file(pattern_file, &root_dir)?;
619 get_patterns_from_file(pattern_file, &root_dir)?;
617
620
618 all_patterns.extend(patterns.to_owned());
621 all_patterns.extend(patterns.to_owned());
619 all_warnings.extend(warnings);
622 all_warnings.extend(warnings);
620 }
623 }
621 let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?;
624 let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?;
622 all_warnings.extend(warnings);
625 all_warnings.extend(warnings);
623 Ok((
626 Ok((
624 Box::new(move |path: &HgPath| matcher.matches(path)),
627 Box::new(move |path: &HgPath| matcher.matches(path)),
625 all_warnings,
628 all_warnings,
626 ))
629 ))
627 }
630 }
628
631
629 impl<'a> IncludeMatcher<'a> {
632 impl<'a> IncludeMatcher<'a> {
630 pub fn new(
633 pub fn new(
631 ignore_patterns: Vec<IgnorePattern>,
634 ignore_patterns: Vec<IgnorePattern>,
632 root_dir: impl AsRef<Path>,
635 root_dir: impl AsRef<Path>,
633 ) -> PatternResult<(Self, Vec<PatternFileWarning>)> {
636 ) -> PatternResult<(Self, Vec<PatternFileWarning>)> {
634 let (patterns, match_fn, warnings) =
637 let (patterns, match_fn, warnings) =
635 build_match(&ignore_patterns, root_dir)?;
638 build_match(&ignore_patterns, root_dir)?;
636 let RootsDirsAndParents {
639 let RootsDirsAndParents {
637 roots,
640 roots,
638 dirs,
641 dirs,
639 parents,
642 parents,
640 } = roots_dirs_and_parents(&ignore_patterns)?;
643 } = roots_dirs_and_parents(&ignore_patterns)?;
641
644
642 let prefix = ignore_patterns.iter().any(|k| match k.syntax {
645 let prefix = ignore_patterns.iter().any(|k| match k.syntax {
643 PatternSyntax::Path | PatternSyntax::RelPath => true,
646 PatternSyntax::Path | PatternSyntax::RelPath => true,
644 _ => false,
647 _ => false,
645 });
648 });
646
649
647 Ok((
650 Ok((
648 Self {
651 Self {
649 patterns,
652 patterns,
650 match_fn,
653 match_fn,
651 prefix,
654 prefix,
652 roots,
655 roots,
653 dirs,
656 dirs,
654 parents,
657 parents,
655 },
658 },
656 warnings,
659 warnings,
657 ))
660 ))
658 }
661 }
659
662
660 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
663 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
661 // TODO cache
664 // TODO cache
662 let thing = self
665 let thing = self
663 .dirs
666 .dirs
664 .iter()
667 .iter()
665 .chain(self.roots.iter())
668 .chain(self.roots.iter())
666 .chain(self.parents.iter());
669 .chain(self.parents.iter());
667 DirsChildrenMultiset::new(thing, Some(&self.parents))
670 DirsChildrenMultiset::new(thing, Some(&self.parents))
668 }
671 }
669 }
672 }
670
673
671 impl<'a> Display for IncludeMatcher<'a> {
674 impl<'a> Display for IncludeMatcher<'a> {
672 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
675 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
673 // XXX What about exact matches?
676 // XXX What about exact matches?
674 // I'm not sure it's worth it to clone the HashSet and keep it
677 // I'm not sure it's worth it to clone the HashSet and keep it
675 // around just in case someone wants to display the matcher, plus
678 // around just in case someone wants to display the matcher, plus
676 // it's going to be unreadable after a few entries, but we need to
679 // it's going to be unreadable after a few entries, but we need to
677 // inform in this display that exact matches are being used and are
680 // inform in this display that exact matches are being used and are
678 // (on purpose) missing from the `includes`.
681 // (on purpose) missing from the `includes`.
679 write!(
682 write!(
680 f,
683 f,
681 "IncludeMatcher(includes='{}')",
684 "IncludeMatcher(includes='{}')",
682 String::from_utf8_lossy(&self.patterns.escaped_bytes())
685 String::from_utf8_lossy(&self.patterns.escaped_bytes())
683 )
686 )
684 }
687 }
685 }
688 }
686
689
687 #[cfg(test)]
690 #[cfg(test)]
688 mod tests {
691 mod tests {
689 use super::*;
692 use super::*;
690 use pretty_assertions::assert_eq;
693 use pretty_assertions::assert_eq;
691 use std::path::Path;
694 use std::path::Path;
692
695
693 #[test]
696 #[test]
694 fn test_roots_and_dirs() {
697 fn test_roots_and_dirs() {
695 let pats = vec![
698 let pats = vec![
696 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
699 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
697 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
700 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
698 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
701 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
699 ];
702 ];
700 let (roots, dirs) = roots_and_dirs(&pats);
703 let (roots, dirs) = roots_and_dirs(&pats);
701
704
702 assert_eq!(
705 assert_eq!(
703 roots,
706 roots,
704 vec!(
707 vec!(
705 HgPathBuf::from_bytes(b"g/h"),
708 HgPathBuf::from_bytes(b"g/h"),
706 HgPathBuf::from_bytes(b"g/h"),
709 HgPathBuf::from_bytes(b"g/h"),
707 HgPathBuf::new()
710 HgPathBuf::new()
708 ),
711 ),
709 );
712 );
710 assert_eq!(dirs, vec!());
713 assert_eq!(dirs, vec!());
711 }
714 }
712
715
713 #[test]
716 #[test]
714 fn test_roots_dirs_and_parents() {
717 fn test_roots_dirs_and_parents() {
715 let pats = vec![
718 let pats = vec![
716 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
719 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
717 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
720 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
718 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
721 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
719 ];
722 ];
720
723
721 let mut roots = HashSet::new();
724 let mut roots = HashSet::new();
722 roots.insert(HgPathBuf::from_bytes(b"g/h"));
725 roots.insert(HgPathBuf::from_bytes(b"g/h"));
723 roots.insert(HgPathBuf::new());
726 roots.insert(HgPathBuf::new());
724
727
725 let dirs = HashSet::new();
728 let dirs = HashSet::new();
726
729
727 let mut parents = HashSet::new();
730 let mut parents = HashSet::new();
728 parents.insert(HgPathBuf::new());
731 parents.insert(HgPathBuf::new());
729 parents.insert(HgPathBuf::from_bytes(b"g"));
732 parents.insert(HgPathBuf::from_bytes(b"g"));
730
733
731 assert_eq!(
734 assert_eq!(
732 roots_dirs_and_parents(&pats).unwrap(),
735 roots_dirs_and_parents(&pats).unwrap(),
733 RootsDirsAndParents {
736 RootsDirsAndParents {
734 roots,
737 roots,
735 dirs,
738 dirs,
736 parents
739 parents
737 }
740 }
738 );
741 );
739 }
742 }
740
743
741 #[test]
744 #[test]
742 fn test_filematcher_visit_children_set() {
745 fn test_filematcher_visit_children_set() {
743 // Visitchildrenset
746 // Visitchildrenset
744 let files = vec![HgPath::new(b"dir/subdir/foo.txt")];
747 let files = vec![HgPath::new(b"dir/subdir/foo.txt")];
745 let matcher = FileMatcher::new(&files).unwrap();
748 let matcher = FileMatcher::new(&files).unwrap();
746
749
747 let mut set = HashSet::new();
750 let mut set = HashSet::new();
748 set.insert(HgPath::new(b"dir"));
751 set.insert(HgPath::new(b"dir"));
749 assert_eq!(
752 assert_eq!(
750 matcher.visit_children_set(HgPath::new(b"")),
753 matcher.visit_children_set(HgPath::new(b"")),
751 VisitChildrenSet::Set(set)
754 VisitChildrenSet::Set(set)
752 );
755 );
753
756
754 let mut set = HashSet::new();
757 let mut set = HashSet::new();
755 set.insert(HgPath::new(b"subdir"));
758 set.insert(HgPath::new(b"subdir"));
756 assert_eq!(
759 assert_eq!(
757 matcher.visit_children_set(HgPath::new(b"dir")),
760 matcher.visit_children_set(HgPath::new(b"dir")),
758 VisitChildrenSet::Set(set)
761 VisitChildrenSet::Set(set)
759 );
762 );
760
763
761 let mut set = HashSet::new();
764 let mut set = HashSet::new();
762 set.insert(HgPath::new(b"foo.txt"));
765 set.insert(HgPath::new(b"foo.txt"));
763 assert_eq!(
766 assert_eq!(
764 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
767 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
765 VisitChildrenSet::Set(set)
768 VisitChildrenSet::Set(set)
766 );
769 );
767
770
768 assert_eq!(
771 assert_eq!(
769 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
772 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
770 VisitChildrenSet::Empty
773 VisitChildrenSet::Empty
771 );
774 );
772 assert_eq!(
775 assert_eq!(
773 matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
776 matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
774 VisitChildrenSet::Empty
777 VisitChildrenSet::Empty
775 );
778 );
776 assert_eq!(
779 assert_eq!(
777 matcher.visit_children_set(HgPath::new(b"folder")),
780 matcher.visit_children_set(HgPath::new(b"folder")),
778 VisitChildrenSet::Empty
781 VisitChildrenSet::Empty
779 );
782 );
780 }
783 }
781
784
782 #[test]
785 #[test]
783 fn test_filematcher_visit_children_set_files_and_dirs() {
786 fn test_filematcher_visit_children_set_files_and_dirs() {
784 let files = vec![
787 let files = vec![
785 HgPath::new(b"rootfile.txt"),
788 HgPath::new(b"rootfile.txt"),
786 HgPath::new(b"a/file1.txt"),
789 HgPath::new(b"a/file1.txt"),
787 HgPath::new(b"a/b/file2.txt"),
790 HgPath::new(b"a/b/file2.txt"),
788 // No file in a/b/c
791 // No file in a/b/c
789 HgPath::new(b"a/b/c/d/file4.txt"),
792 HgPath::new(b"a/b/c/d/file4.txt"),
790 ];
793 ];
791 let matcher = FileMatcher::new(&files).unwrap();
794 let matcher = FileMatcher::new(&files).unwrap();
792
795
793 let mut set = HashSet::new();
796 let mut set = HashSet::new();
794 set.insert(HgPath::new(b"a"));
797 set.insert(HgPath::new(b"a"));
795 set.insert(HgPath::new(b"rootfile.txt"));
798 set.insert(HgPath::new(b"rootfile.txt"));
796 assert_eq!(
799 assert_eq!(
797 matcher.visit_children_set(HgPath::new(b"")),
800 matcher.visit_children_set(HgPath::new(b"")),
798 VisitChildrenSet::Set(set)
801 VisitChildrenSet::Set(set)
799 );
802 );
800
803
801 let mut set = HashSet::new();
804 let mut set = HashSet::new();
802 set.insert(HgPath::new(b"b"));
805 set.insert(HgPath::new(b"b"));
803 set.insert(HgPath::new(b"file1.txt"));
806 set.insert(HgPath::new(b"file1.txt"));
804 assert_eq!(
807 assert_eq!(
805 matcher.visit_children_set(HgPath::new(b"a")),
808 matcher.visit_children_set(HgPath::new(b"a")),
806 VisitChildrenSet::Set(set)
809 VisitChildrenSet::Set(set)
807 );
810 );
808
811
809 let mut set = HashSet::new();
812 let mut set = HashSet::new();
810 set.insert(HgPath::new(b"c"));
813 set.insert(HgPath::new(b"c"));
811 set.insert(HgPath::new(b"file2.txt"));
814 set.insert(HgPath::new(b"file2.txt"));
812 assert_eq!(
815 assert_eq!(
813 matcher.visit_children_set(HgPath::new(b"a/b")),
816 matcher.visit_children_set(HgPath::new(b"a/b")),
814 VisitChildrenSet::Set(set)
817 VisitChildrenSet::Set(set)
815 );
818 );
816
819
817 let mut set = HashSet::new();
820 let mut set = HashSet::new();
818 set.insert(HgPath::new(b"d"));
821 set.insert(HgPath::new(b"d"));
819 assert_eq!(
822 assert_eq!(
820 matcher.visit_children_set(HgPath::new(b"a/b/c")),
823 matcher.visit_children_set(HgPath::new(b"a/b/c")),
821 VisitChildrenSet::Set(set)
824 VisitChildrenSet::Set(set)
822 );
825 );
823 let mut set = HashSet::new();
826 let mut set = HashSet::new();
824 set.insert(HgPath::new(b"file4.txt"));
827 set.insert(HgPath::new(b"file4.txt"));
825 assert_eq!(
828 assert_eq!(
826 matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
829 matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
827 VisitChildrenSet::Set(set)
830 VisitChildrenSet::Set(set)
828 );
831 );
829
832
830 assert_eq!(
833 assert_eq!(
831 matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
834 matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
832 VisitChildrenSet::Empty
835 VisitChildrenSet::Empty
833 );
836 );
834 assert_eq!(
837 assert_eq!(
835 matcher.visit_children_set(HgPath::new(b"folder")),
838 matcher.visit_children_set(HgPath::new(b"folder")),
836 VisitChildrenSet::Empty
839 VisitChildrenSet::Empty
837 );
840 );
838 }
841 }
839
842
840 #[cfg(feature = "with-re2")]
843 #[cfg(feature = "with-re2")]
841 #[test]
844 #[test]
842 fn test_includematcher() {
845 fn test_includematcher() {
843 // VisitchildrensetPrefix
846 // VisitchildrensetPrefix
844 let (matcher, _) = IncludeMatcher::new(
847 let (matcher, _) = IncludeMatcher::new(
845 vec![IgnorePattern::new(
848 vec![IgnorePattern::new(
846 PatternSyntax::RelPath,
849 PatternSyntax::RelPath,
847 b"dir/subdir",
850 b"dir/subdir",
848 Path::new(""),
851 Path::new(""),
849 )],
852 )],
850 "",
853 "",
851 )
854 )
852 .unwrap();
855 .unwrap();
853
856
854 let mut set = HashSet::new();
857 let mut set = HashSet::new();
855 set.insert(HgPath::new(b"dir"));
858 set.insert(HgPath::new(b"dir"));
856 assert_eq!(
859 assert_eq!(
857 matcher.visit_children_set(HgPath::new(b"")),
860 matcher.visit_children_set(HgPath::new(b"")),
858 VisitChildrenSet::Set(set)
861 VisitChildrenSet::Set(set)
859 );
862 );
860
863
861 let mut set = HashSet::new();
864 let mut set = HashSet::new();
862 set.insert(HgPath::new(b"subdir"));
865 set.insert(HgPath::new(b"subdir"));
863 assert_eq!(
866 assert_eq!(
864 matcher.visit_children_set(HgPath::new(b"dir")),
867 matcher.visit_children_set(HgPath::new(b"dir")),
865 VisitChildrenSet::Set(set)
868 VisitChildrenSet::Set(set)
866 );
869 );
867 assert_eq!(
870 assert_eq!(
868 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
871 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
869 VisitChildrenSet::Recursive
872 VisitChildrenSet::Recursive
870 );
873 );
871 // OPT: This should probably be 'all' if its parent is?
874 // OPT: This should probably be 'all' if its parent is?
872 assert_eq!(
875 assert_eq!(
873 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
876 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
874 VisitChildrenSet::This
877 VisitChildrenSet::This
875 );
878 );
876 assert_eq!(
879 assert_eq!(
877 matcher.visit_children_set(HgPath::new(b"folder")),
880 matcher.visit_children_set(HgPath::new(b"folder")),
878 VisitChildrenSet::Empty
881 VisitChildrenSet::Empty
879 );
882 );
880
883
881 // VisitchildrensetRootfilesin
884 // VisitchildrensetRootfilesin
882 let (matcher, _) = IncludeMatcher::new(
885 let (matcher, _) = IncludeMatcher::new(
883 vec![IgnorePattern::new(
886 vec![IgnorePattern::new(
884 PatternSyntax::RootFiles,
887 PatternSyntax::RootFiles,
885 b"dir/subdir",
888 b"dir/subdir",
886 Path::new(""),
889 Path::new(""),
887 )],
890 )],
888 "",
891 "",
889 )
892 )
890 .unwrap();
893 .unwrap();
891
894
892 let mut set = HashSet::new();
895 let mut set = HashSet::new();
893 set.insert(HgPath::new(b"dir"));
896 set.insert(HgPath::new(b"dir"));
894 assert_eq!(
897 assert_eq!(
895 matcher.visit_children_set(HgPath::new(b"")),
898 matcher.visit_children_set(HgPath::new(b"")),
896 VisitChildrenSet::Set(set)
899 VisitChildrenSet::Set(set)
897 );
900 );
898
901
899 let mut set = HashSet::new();
902 let mut set = HashSet::new();
900 set.insert(HgPath::new(b"subdir"));
903 set.insert(HgPath::new(b"subdir"));
901 assert_eq!(
904 assert_eq!(
902 matcher.visit_children_set(HgPath::new(b"dir")),
905 matcher.visit_children_set(HgPath::new(b"dir")),
903 VisitChildrenSet::Set(set)
906 VisitChildrenSet::Set(set)
904 );
907 );
905
908
906 assert_eq!(
909 assert_eq!(
907 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
910 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
908 VisitChildrenSet::This
911 VisitChildrenSet::This
909 );
912 );
910 assert_eq!(
913 assert_eq!(
911 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
914 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
912 VisitChildrenSet::Empty
915 VisitChildrenSet::Empty
913 );
916 );
914 assert_eq!(
917 assert_eq!(
915 matcher.visit_children_set(HgPath::new(b"folder")),
918 matcher.visit_children_set(HgPath::new(b"folder")),
916 VisitChildrenSet::Empty
919 VisitChildrenSet::Empty
917 );
920 );
918
921
919 // VisitchildrensetGlob
922 // VisitchildrensetGlob
920 let (matcher, _) = IncludeMatcher::new(
923 let (matcher, _) = IncludeMatcher::new(
921 vec![IgnorePattern::new(
924 vec![IgnorePattern::new(
922 PatternSyntax::Glob,
925 PatternSyntax::Glob,
923 b"dir/z*",
926 b"dir/z*",
924 Path::new(""),
927 Path::new(""),
925 )],
928 )],
926 "",
929 "",
927 )
930 )
928 .unwrap();
931 .unwrap();
929
932
930 let mut set = HashSet::new();
933 let mut set = HashSet::new();
931 set.insert(HgPath::new(b"dir"));
934 set.insert(HgPath::new(b"dir"));
932 assert_eq!(
935 assert_eq!(
933 matcher.visit_children_set(HgPath::new(b"")),
936 matcher.visit_children_set(HgPath::new(b"")),
934 VisitChildrenSet::Set(set)
937 VisitChildrenSet::Set(set)
935 );
938 );
936 assert_eq!(
939 assert_eq!(
937 matcher.visit_children_set(HgPath::new(b"folder")),
940 matcher.visit_children_set(HgPath::new(b"folder")),
938 VisitChildrenSet::Empty
941 VisitChildrenSet::Empty
939 );
942 );
940 assert_eq!(
943 assert_eq!(
941 matcher.visit_children_set(HgPath::new(b"dir")),
944 matcher.visit_children_set(HgPath::new(b"dir")),
942 VisitChildrenSet::This
945 VisitChildrenSet::This
943 );
946 );
944 // OPT: these should probably be set().
947 // OPT: these should probably be set().
945 assert_eq!(
948 assert_eq!(
946 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
949 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
947 VisitChildrenSet::This
950 VisitChildrenSet::This
948 );
951 );
949 assert_eq!(
952 assert_eq!(
950 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
953 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
951 VisitChildrenSet::This
954 VisitChildrenSet::This
952 );
955 );
953 }
956 }
954 }
957 }
General Comments 0
You need to be logged in to leave comments. Login now