##// END OF EJS Templates
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
Raphaël Gomès -
r45311:e0414fcd default
parent child Browse files
Show More
@@ -1,665 +1,660 b''
1 // filepatterns.rs
1 // filepatterns.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Handling of Mercurial-specific patterns.
8 //! Handling of Mercurial-specific patterns.
9
9
10 use crate::{
10 use crate::{
11 utils::{
11 utils::{
12 files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
12 files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
13 hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
13 hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
14 SliceExt,
14 SliceExt,
15 },
15 },
16 FastHashMap, PatternError,
16 FastHashMap, PatternError,
17 };
17 };
18 use lazy_static::lazy_static;
18 use lazy_static::lazy_static;
19 use regex::bytes::{NoExpand, Regex};
19 use regex::bytes::{NoExpand, Regex};
20 use std::fs::File;
20 use std::fs::File;
21 use std::io::Read;
21 use std::io::Read;
22 use std::ops::Deref;
22 use std::ops::Deref;
23 use std::path::{Path, PathBuf};
23 use std::path::{Path, PathBuf};
24 use std::vec::Vec;
24 use std::vec::Vec;
25
25
26 lazy_static! {
26 lazy_static! {
27 static ref RE_ESCAPE: Vec<Vec<u8>> = {
27 static ref RE_ESCAPE: Vec<Vec<u8>> = {
28 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
28 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
29 let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
29 let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
30 for byte in to_escape {
30 for byte in to_escape {
31 v[*byte as usize].insert(0, b'\\');
31 v[*byte as usize].insert(0, b'\\');
32 }
32 }
33 v
33 v
34 };
34 };
35 }
35 }
36
36
37 /// These are matched in order
37 /// These are matched in order
38 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
38 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
39 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
39 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
40
40
41 /// Appended to the regexp of globs
41 /// Appended to the regexp of globs
42 const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";
42 const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";
43
43
44 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
44 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
45 pub enum PatternSyntax {
45 pub enum PatternSyntax {
46 /// A regular expression
46 /// A regular expression
47 Regexp,
47 Regexp,
48 /// Glob that matches at the front of the path
48 /// Glob that matches at the front of the path
49 RootGlob,
49 RootGlob,
50 /// Glob that matches at any suffix of the path (still anchored at
50 /// Glob that matches at any suffix of the path (still anchored at
51 /// slashes)
51 /// slashes)
52 Glob,
52 Glob,
53 /// a path relative to repository root, which is matched recursively
53 /// a path relative to repository root, which is matched recursively
54 Path,
54 Path,
55 /// A path relative to cwd
55 /// A path relative to cwd
56 RelPath,
56 RelPath,
57 /// an unrooted glob (*.rs matches Rust files in all dirs)
57 /// an unrooted glob (*.rs matches Rust files in all dirs)
58 RelGlob,
58 RelGlob,
59 /// A regexp that needn't match the start of a name
59 /// A regexp that needn't match the start of a name
60 RelRegexp,
60 RelRegexp,
61 /// A path relative to repository root, which is matched non-recursively
61 /// A path relative to repository root, which is matched non-recursively
62 /// (will not match subdirectories)
62 /// (will not match subdirectories)
63 RootFiles,
63 RootFiles,
64 /// A file of patterns to read and include
64 /// A file of patterns to read and include
65 Include,
65 Include,
66 /// A file of patterns to match against files under the same directory
66 /// A file of patterns to match against files under the same directory
67 SubInclude,
67 SubInclude,
68 }
68 }
69
69
70 /// Transforms a glob pattern into a regex
70 /// Transforms a glob pattern into a regex
71 fn glob_to_re(pat: &[u8]) -> Vec<u8> {
71 fn glob_to_re(pat: &[u8]) -> Vec<u8> {
72 let mut input = pat;
72 let mut input = pat;
73 let mut res: Vec<u8> = vec![];
73 let mut res: Vec<u8> = vec![];
74 let mut group_depth = 0;
74 let mut group_depth = 0;
75
75
76 while let Some((c, rest)) = input.split_first() {
76 while let Some((c, rest)) = input.split_first() {
77 input = rest;
77 input = rest;
78
78
79 match c {
79 match c {
80 b'*' => {
80 b'*' => {
81 for (source, repl) in GLOB_REPLACEMENTS {
81 for (source, repl) in GLOB_REPLACEMENTS {
82 if let Some(rest) = input.drop_prefix(source) {
82 if let Some(rest) = input.drop_prefix(source) {
83 input = rest;
83 input = rest;
84 res.extend(*repl);
84 res.extend(*repl);
85 break;
85 break;
86 }
86 }
87 }
87 }
88 }
88 }
89 b'?' => res.extend(b"."),
89 b'?' => res.extend(b"."),
90 b'[' => {
90 b'[' => {
91 match input.iter().skip(1).position(|b| *b == b']') {
91 match input.iter().skip(1).position(|b| *b == b']') {
92 None => res.extend(b"\\["),
92 None => res.extend(b"\\["),
93 Some(end) => {
93 Some(end) => {
94 // Account for the one we skipped
94 // Account for the one we skipped
95 let end = end + 1;
95 let end = end + 1;
96
96
97 res.extend(b"[");
97 res.extend(b"[");
98
98
99 for (i, b) in input[..end].iter().enumerate() {
99 for (i, b) in input[..end].iter().enumerate() {
100 if *b == b'!' && i == 0 {
100 if *b == b'!' && i == 0 {
101 res.extend(b"^")
101 res.extend(b"^")
102 } else if *b == b'^' && i == 0 {
102 } else if *b == b'^' && i == 0 {
103 res.extend(b"\\^")
103 res.extend(b"\\^")
104 } else if *b == b'\\' {
104 } else if *b == b'\\' {
105 res.extend(b"\\\\")
105 res.extend(b"\\\\")
106 } else {
106 } else {
107 res.push(*b)
107 res.push(*b)
108 }
108 }
109 }
109 }
110 res.extend(b"]");
110 res.extend(b"]");
111 input = &input[end + 1..];
111 input = &input[end + 1..];
112 }
112 }
113 }
113 }
114 }
114 }
115 b'{' => {
115 b'{' => {
116 group_depth += 1;
116 group_depth += 1;
117 res.extend(b"(?:")
117 res.extend(b"(?:")
118 }
118 }
119 b'}' if group_depth > 0 => {
119 b'}' if group_depth > 0 => {
120 group_depth -= 1;
120 group_depth -= 1;
121 res.extend(b")");
121 res.extend(b")");
122 }
122 }
123 b',' if group_depth > 0 => res.extend(b"|"),
123 b',' if group_depth > 0 => res.extend(b"|"),
124 b'\\' => {
124 b'\\' => {
125 let c = {
125 let c = {
126 if let Some((c, rest)) = input.split_first() {
126 if let Some((c, rest)) = input.split_first() {
127 input = rest;
127 input = rest;
128 c
128 c
129 } else {
129 } else {
130 c
130 c
131 }
131 }
132 };
132 };
133 res.extend(&RE_ESCAPE[*c as usize])
133 res.extend(&RE_ESCAPE[*c as usize])
134 }
134 }
135 _ => res.extend(&RE_ESCAPE[*c as usize]),
135 _ => res.extend(&RE_ESCAPE[*c as usize]),
136 }
136 }
137 }
137 }
138 res
138 res
139 }
139 }
140
140
141 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
141 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
142 pattern
142 pattern
143 .iter()
143 .iter()
144 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
144 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
145 .collect()
145 .collect()
146 }
146 }
147
147
148 pub fn parse_pattern_syntax(
148 pub fn parse_pattern_syntax(
149 kind: &[u8],
149 kind: &[u8],
150 ) -> Result<PatternSyntax, PatternError> {
150 ) -> Result<PatternSyntax, PatternError> {
151 match kind {
151 match kind {
152 b"re:" => Ok(PatternSyntax::Regexp),
152 b"re:" => Ok(PatternSyntax::Regexp),
153 b"path:" => Ok(PatternSyntax::Path),
153 b"path:" => Ok(PatternSyntax::Path),
154 b"relpath:" => Ok(PatternSyntax::RelPath),
154 b"relpath:" => Ok(PatternSyntax::RelPath),
155 b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
155 b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
156 b"relglob:" => Ok(PatternSyntax::RelGlob),
156 b"relglob:" => Ok(PatternSyntax::RelGlob),
157 b"relre:" => Ok(PatternSyntax::RelRegexp),
157 b"relre:" => Ok(PatternSyntax::RelRegexp),
158 b"glob:" => Ok(PatternSyntax::Glob),
158 b"glob:" => Ok(PatternSyntax::Glob),
159 b"rootglob:" => Ok(PatternSyntax::RootGlob),
159 b"rootglob:" => Ok(PatternSyntax::RootGlob),
160 b"include:" => Ok(PatternSyntax::Include),
160 b"include:" => Ok(PatternSyntax::Include),
161 b"subinclude:" => Ok(PatternSyntax::SubInclude),
161 b"subinclude:" => Ok(PatternSyntax::SubInclude),
162 _ => Err(PatternError::UnsupportedSyntax(
162 _ => Err(PatternError::UnsupportedSyntax(
163 String::from_utf8_lossy(kind).to_string(),
163 String::from_utf8_lossy(kind).to_string(),
164 )),
164 )),
165 }
165 }
166 }
166 }
167
167
168 /// Builds the regex that corresponds to the given pattern.
168 /// Builds the regex that corresponds to the given pattern.
169 /// If within a `syntax: regexp` context, returns the pattern,
169 /// If within a `syntax: regexp` context, returns the pattern,
170 /// otherwise, returns the corresponding regex.
170 /// otherwise, returns the corresponding regex.
171 fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> {
171 fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> {
172 let IgnorePattern {
172 let IgnorePattern {
173 syntax, pattern, ..
173 syntax, pattern, ..
174 } = entry;
174 } = entry;
175 if pattern.is_empty() {
175 if pattern.is_empty() {
176 return vec![];
176 return vec![];
177 }
177 }
178 match syntax {
178 match syntax {
179 // The `regex` crate adds `.*` to the start and end of expressions
179 // The `regex` crate adds `.*` to the start and end of expressions
180 // if there are no anchors, so add them.
180 // if there are no anchors, so add them.
181 PatternSyntax::Regexp => [b"^", &pattern[..], b"$"].concat(),
181 PatternSyntax::Regexp => [b"^", &pattern[..], b"$"].concat(),
182 PatternSyntax::RelRegexp => {
182 PatternSyntax::RelRegexp => {
183 // The `regex` crate accepts `**` while `re2` and Python's `re`
183 // The `regex` crate accepts `**` while `re2` and Python's `re`
184 // do not. Checking for `*` correctly triggers the same error all
184 // do not. Checking for `*` correctly triggers the same error all
185 // engines.
185 // engines.
186 if pattern[0] == b'^' || pattern[0] == b'*' {
186 if pattern[0] == b'^' || pattern[0] == b'*' {
187 return pattern.to_owned();
187 return pattern.to_owned();
188 }
188 }
189 [&b".*"[..], pattern].concat()
189 [&b".*"[..], pattern].concat()
190 }
190 }
191 PatternSyntax::Path | PatternSyntax::RelPath => {
191 PatternSyntax::Path | PatternSyntax::RelPath => {
192 if pattern == b"." {
192 if pattern == b"." {
193 return vec![];
193 return vec![];
194 }
194 }
195 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
195 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
196 }
196 }
197 PatternSyntax::RootFiles => {
197 PatternSyntax::RootFiles => {
198 let mut res = if pattern == b"." {
198 let mut res = if pattern == b"." {
199 vec![b'^']
199 vec![b'^']
200 } else {
200 } else {
201 // Pattern is a directory name.
201 // Pattern is a directory name.
202 [b"^", escape_pattern(pattern).as_slice(), b"/"].concat()
202 [b"^", escape_pattern(pattern).as_slice(), b"/"].concat()
203 };
203 };
204
204
205 // Anything after the pattern must be a non-directory.
205 // Anything after the pattern must be a non-directory.
206 res.extend(b"[^/]+$");
206 res.extend(b"[^/]+$");
207 res.push(b'$');
207 res.push(b'$');
208 res
208 res
209 }
209 }
210 PatternSyntax::RelGlob => {
210 PatternSyntax::RelGlob => {
211 let glob_re = glob_to_re(pattern);
211 let glob_re = glob_to_re(pattern);
212 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
212 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
213 [b".*", rest, GLOB_SUFFIX].concat()
213 [b".*", rest, GLOB_SUFFIX].concat()
214 } else {
214 } else {
215 [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat()
215 [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat()
216 }
216 }
217 }
217 }
218 PatternSyntax::Glob | PatternSyntax::RootGlob => {
218 PatternSyntax::Glob | PatternSyntax::RootGlob => {
219 [b"^", glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
219 [b"^", glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
220 }
220 }
221 PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(),
221 PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(),
222 }
222 }
223 }
223 }
224
224
225 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
225 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
226 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
226 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
227
227
228 /// TODO support other platforms
228 /// TODO support other platforms
229 #[cfg(unix)]
229 #[cfg(unix)]
230 pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
230 pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
231 if bytes.is_empty() {
231 if bytes.is_empty() {
232 return b".".to_vec();
232 return b".".to_vec();
233 }
233 }
234 let sep = b'/';
234 let sep = b'/';
235
235
236 let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
236 let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
237 if initial_slashes > 2 {
237 if initial_slashes > 2 {
238 // POSIX allows one or two initial slashes, but treats three or more
238 // POSIX allows one or two initial slashes, but treats three or more
239 // as single slash.
239 // as single slash.
240 initial_slashes = 1;
240 initial_slashes = 1;
241 }
241 }
242 let components = bytes
242 let components = bytes
243 .split(|b| *b == sep)
243 .split(|b| *b == sep)
244 .filter(|c| !(c.is_empty() || c == b"."))
244 .filter(|c| !(c.is_empty() || c == b"."))
245 .fold(vec![], |mut acc, component| {
245 .fold(vec![], |mut acc, component| {
246 if component != b".."
246 if component != b".."
247 || (initial_slashes == 0 && acc.is_empty())
247 || (initial_slashes == 0 && acc.is_empty())
248 || (!acc.is_empty() && acc[acc.len() - 1] == b"..")
248 || (!acc.is_empty() && acc[acc.len() - 1] == b"..")
249 {
249 {
250 acc.push(component)
250 acc.push(component)
251 } else if !acc.is_empty() {
251 } else if !acc.is_empty() {
252 acc.pop();
252 acc.pop();
253 }
253 }
254 acc
254 acc
255 });
255 });
256 let mut new_bytes = components.join(&sep);
256 let mut new_bytes = components.join(&sep);
257
257
258 if initial_slashes > 0 {
258 if initial_slashes > 0 {
259 let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
259 let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
260 buf.extend(new_bytes);
260 buf.extend(new_bytes);
261 new_bytes = buf;
261 new_bytes = buf;
262 }
262 }
263 if new_bytes.is_empty() {
263 if new_bytes.is_empty() {
264 b".".to_vec()
264 b".".to_vec()
265 } else {
265 } else {
266 new_bytes
266 new_bytes
267 }
267 }
268 }
268 }
269
269
270 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
270 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
271 /// that don't need to be transformed into a regex.
271 /// that don't need to be transformed into a regex.
272 pub fn build_single_regex(
272 pub fn build_single_regex(
273 entry: &IgnorePattern,
273 entry: &IgnorePattern,
274 ) -> Result<Vec<u8>, PatternError> {
274 ) -> Result<Option<Vec<u8>>, PatternError> {
275 let IgnorePattern {
275 let IgnorePattern {
276 pattern, syntax, ..
276 pattern, syntax, ..
277 } = entry;
277 } = entry;
278 let pattern = match syntax {
278 let pattern = match syntax {
279 PatternSyntax::RootGlob
279 PatternSyntax::RootGlob
280 | PatternSyntax::Path
280 | PatternSyntax::Path
281 | PatternSyntax::RelGlob
281 | PatternSyntax::RelGlob
282 | PatternSyntax::RootFiles => normalize_path_bytes(&pattern),
282 | PatternSyntax::RootFiles => normalize_path_bytes(&pattern),
283 PatternSyntax::Include | PatternSyntax::SubInclude => {
283 PatternSyntax::Include | PatternSyntax::SubInclude => {
284 return Err(PatternError::NonRegexPattern(entry.clone()))
284 return Err(PatternError::NonRegexPattern(entry.clone()))
285 }
285 }
286 _ => pattern.to_owned(),
286 _ => pattern.to_owned(),
287 };
287 };
288 if *syntax == PatternSyntax::RootGlob
288 if *syntax == PatternSyntax::RootGlob
289 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
289 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
290 {
290 {
291 // The `regex` crate adds `.*` to the start and end of expressions
291 Ok(None)
292 // if there are no anchors, so add the start anchor.
293 let mut escaped = vec![b'^'];
294 escaped.extend(escape_pattern(&pattern));
295 escaped.extend(GLOB_SUFFIX);
296 Ok(escaped)
297 } else {
292 } else {
298 let mut entry = entry.clone();
293 let mut entry = entry.clone();
299 entry.pattern = pattern;
294 entry.pattern = pattern;
300 Ok(_build_single_regex(&entry))
295 Ok(Some(_build_single_regex(&entry)))
301 }
296 }
302 }
297 }
303
298
304 lazy_static! {
299 lazy_static! {
305 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
300 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
306 let mut m = FastHashMap::default();
301 let mut m = FastHashMap::default();
307
302
308 m.insert(b"re".as_ref(), b"relre:".as_ref());
303 m.insert(b"re".as_ref(), b"relre:".as_ref());
309 m.insert(b"regexp".as_ref(), b"relre:".as_ref());
304 m.insert(b"regexp".as_ref(), b"relre:".as_ref());
310 m.insert(b"glob".as_ref(), b"relglob:".as_ref());
305 m.insert(b"glob".as_ref(), b"relglob:".as_ref());
311 m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
306 m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
312 m.insert(b"include".as_ref(), b"include:".as_ref());
307 m.insert(b"include".as_ref(), b"include:".as_ref());
313 m.insert(b"subinclude".as_ref(), b"subinclude:".as_ref());
308 m.insert(b"subinclude".as_ref(), b"subinclude:".as_ref());
314 m
309 m
315 };
310 };
316 }
311 }
317
312
318 #[derive(Debug)]
313 #[derive(Debug)]
319 pub enum PatternFileWarning {
314 pub enum PatternFileWarning {
320 /// (file path, syntax bytes)
315 /// (file path, syntax bytes)
321 InvalidSyntax(PathBuf, Vec<u8>),
316 InvalidSyntax(PathBuf, Vec<u8>),
322 /// File path
317 /// File path
323 NoSuchFile(PathBuf),
318 NoSuchFile(PathBuf),
324 }
319 }
325
320
326 pub fn parse_pattern_file_contents<P: AsRef<Path>>(
321 pub fn parse_pattern_file_contents<P: AsRef<Path>>(
327 lines: &[u8],
322 lines: &[u8],
328 file_path: P,
323 file_path: P,
329 warn: bool,
324 warn: bool,
330 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
325 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
331 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
326 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
332 let comment_escape_regex = Regex::new(r"\\#").unwrap();
327 let comment_escape_regex = Regex::new(r"\\#").unwrap();
333 let mut inputs: Vec<IgnorePattern> = vec![];
328 let mut inputs: Vec<IgnorePattern> = vec![];
334 let mut warnings: Vec<PatternFileWarning> = vec![];
329 let mut warnings: Vec<PatternFileWarning> = vec![];
335
330
336 let mut current_syntax = b"relre:".as_ref();
331 let mut current_syntax = b"relre:".as_ref();
337
332
338 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
333 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
339 let line_number = line_number + 1;
334 let line_number = line_number + 1;
340
335
341 let line_buf;
336 let line_buf;
342 if line.contains(&b'#') {
337 if line.contains(&b'#') {
343 if let Some(cap) = comment_regex.captures(line) {
338 if let Some(cap) = comment_regex.captures(line) {
344 line = &line[..cap.get(1).unwrap().end()]
339 line = &line[..cap.get(1).unwrap().end()]
345 }
340 }
346 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
341 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
347 line = &line_buf;
342 line = &line_buf;
348 }
343 }
349
344
350 let mut line = line.trim_end();
345 let mut line = line.trim_end();
351
346
352 if line.is_empty() {
347 if line.is_empty() {
353 continue;
348 continue;
354 }
349 }
355
350
356 if let Some(syntax) = line.drop_prefix(b"syntax:") {
351 if let Some(syntax) = line.drop_prefix(b"syntax:") {
357 let syntax = syntax.trim();
352 let syntax = syntax.trim();
358
353
359 if let Some(rel_syntax) = SYNTAXES.get(syntax) {
354 if let Some(rel_syntax) = SYNTAXES.get(syntax) {
360 current_syntax = rel_syntax;
355 current_syntax = rel_syntax;
361 } else if warn {
356 } else if warn {
362 warnings.push(PatternFileWarning::InvalidSyntax(
357 warnings.push(PatternFileWarning::InvalidSyntax(
363 file_path.as_ref().to_owned(),
358 file_path.as_ref().to_owned(),
364 syntax.to_owned(),
359 syntax.to_owned(),
365 ));
360 ));
366 }
361 }
367 continue;
362 continue;
368 }
363 }
369
364
370 let mut line_syntax: &[u8] = &current_syntax;
365 let mut line_syntax: &[u8] = &current_syntax;
371
366
372 for (s, rels) in SYNTAXES.iter() {
367 for (s, rels) in SYNTAXES.iter() {
373 if let Some(rest) = line.drop_prefix(rels) {
368 if let Some(rest) = line.drop_prefix(rels) {
374 line_syntax = rels;
369 line_syntax = rels;
375 line = rest;
370 line = rest;
376 break;
371 break;
377 }
372 }
378 if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) {
373 if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) {
379 line_syntax = rels;
374 line_syntax = rels;
380 line = rest;
375 line = rest;
381 break;
376 break;
382 }
377 }
383 }
378 }
384
379
385 inputs.push(IgnorePattern::new(
380 inputs.push(IgnorePattern::new(
386 parse_pattern_syntax(&line_syntax).map_err(|e| match e {
381 parse_pattern_syntax(&line_syntax).map_err(|e| match e {
387 PatternError::UnsupportedSyntax(syntax) => {
382 PatternError::UnsupportedSyntax(syntax) => {
388 PatternError::UnsupportedSyntaxInFile(
383 PatternError::UnsupportedSyntaxInFile(
389 syntax,
384 syntax,
390 file_path.as_ref().to_string_lossy().into(),
385 file_path.as_ref().to_string_lossy().into(),
391 line_number,
386 line_number,
392 )
387 )
393 }
388 }
394 _ => e,
389 _ => e,
395 })?,
390 })?,
396 &line,
391 &line,
397 &file_path,
392 &file_path,
398 ));
393 ));
399 }
394 }
400 Ok((inputs, warnings))
395 Ok((inputs, warnings))
401 }
396 }
402
397
403 pub fn read_pattern_file<P: AsRef<Path>>(
398 pub fn read_pattern_file<P: AsRef<Path>>(
404 file_path: P,
399 file_path: P,
405 warn: bool,
400 warn: bool,
406 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
401 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
407 let mut f = match File::open(file_path.as_ref()) {
402 let mut f = match File::open(file_path.as_ref()) {
408 Ok(f) => Ok(f),
403 Ok(f) => Ok(f),
409 Err(e) => match e.kind() {
404 Err(e) => match e.kind() {
410 std::io::ErrorKind::NotFound => {
405 std::io::ErrorKind::NotFound => {
411 return Ok((
406 return Ok((
412 vec![],
407 vec![],
413 vec![PatternFileWarning::NoSuchFile(
408 vec![PatternFileWarning::NoSuchFile(
414 file_path.as_ref().to_owned(),
409 file_path.as_ref().to_owned(),
415 )],
410 )],
416 ))
411 ))
417 }
412 }
418 _ => Err(e),
413 _ => Err(e),
419 },
414 },
420 }?;
415 }?;
421 let mut contents = Vec::new();
416 let mut contents = Vec::new();
422
417
423 f.read_to_end(&mut contents)?;
418 f.read_to_end(&mut contents)?;
424
419
425 Ok(parse_pattern_file_contents(&contents, file_path, warn)?)
420 Ok(parse_pattern_file_contents(&contents, file_path, warn)?)
426 }
421 }
427
422
428 /// Represents an entry in an "ignore" file.
423 /// Represents an entry in an "ignore" file.
429 #[derive(Debug, Eq, PartialEq, Clone)]
424 #[derive(Debug, Eq, PartialEq, Clone)]
430 pub struct IgnorePattern {
425 pub struct IgnorePattern {
431 pub syntax: PatternSyntax,
426 pub syntax: PatternSyntax,
432 pub pattern: Vec<u8>,
427 pub pattern: Vec<u8>,
433 pub source: PathBuf,
428 pub source: PathBuf,
434 }
429 }
435
430
436 impl IgnorePattern {
431 impl IgnorePattern {
437 pub fn new(
432 pub fn new(
438 syntax: PatternSyntax,
433 syntax: PatternSyntax,
439 pattern: &[u8],
434 pattern: &[u8],
440 source: impl AsRef<Path>,
435 source: impl AsRef<Path>,
441 ) -> Self {
436 ) -> Self {
442 Self {
437 Self {
443 syntax,
438 syntax,
444 pattern: pattern.to_owned(),
439 pattern: pattern.to_owned(),
445 source: source.as_ref().to_owned(),
440 source: source.as_ref().to_owned(),
446 }
441 }
447 }
442 }
448 }
443 }
449
444
450 pub type PatternResult<T> = Result<T, PatternError>;
445 pub type PatternResult<T> = Result<T, PatternError>;
451
446
452 /// Wrapper for `read_pattern_file` that also recursively expands `include:`
447 /// Wrapper for `read_pattern_file` that also recursively expands `include:`
453 /// patterns.
448 /// patterns.
454 ///
449 ///
455 /// `subinclude:` is not treated as a special pattern here: unraveling them
450 /// `subinclude:` is not treated as a special pattern here: unraveling them
456 /// needs to occur in the "ignore" phase.
451 /// needs to occur in the "ignore" phase.
457 pub fn get_patterns_from_file(
452 pub fn get_patterns_from_file(
458 pattern_file: impl AsRef<Path>,
453 pattern_file: impl AsRef<Path>,
459 root_dir: impl AsRef<Path>,
454 root_dir: impl AsRef<Path>,
460 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
455 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
461 let (patterns, mut warnings) = read_pattern_file(&pattern_file, true)?;
456 let (patterns, mut warnings) = read_pattern_file(&pattern_file, true)?;
462 let patterns = patterns
457 let patterns = patterns
463 .into_iter()
458 .into_iter()
464 .flat_map(|entry| -> PatternResult<_> {
459 .flat_map(|entry| -> PatternResult<_> {
465 let IgnorePattern {
460 let IgnorePattern {
466 syntax,
461 syntax,
467 pattern,
462 pattern,
468 source: _,
463 source: _,
469 } = &entry;
464 } = &entry;
470 Ok(match syntax {
465 Ok(match syntax {
471 PatternSyntax::Include => {
466 PatternSyntax::Include => {
472 let inner_include =
467 let inner_include =
473 root_dir.as_ref().join(get_path_from_bytes(&pattern));
468 root_dir.as_ref().join(get_path_from_bytes(&pattern));
474 let (inner_pats, inner_warnings) = get_patterns_from_file(
469 let (inner_pats, inner_warnings) = get_patterns_from_file(
475 &inner_include,
470 &inner_include,
476 root_dir.as_ref(),
471 root_dir.as_ref(),
477 )?;
472 )?;
478 warnings.extend(inner_warnings);
473 warnings.extend(inner_warnings);
479 inner_pats
474 inner_pats
480 }
475 }
481 _ => vec![entry],
476 _ => vec![entry],
482 })
477 })
483 })
478 })
484 .flatten()
479 .flatten()
485 .collect();
480 .collect();
486
481
487 Ok((patterns, warnings))
482 Ok((patterns, warnings))
488 }
483 }
489
484
490 /// Holds all the information needed to handle a `subinclude:` pattern.
485 /// Holds all the information needed to handle a `subinclude:` pattern.
491 pub struct SubInclude {
486 pub struct SubInclude {
492 /// Will be used for repository (hg) paths that start with this prefix.
487 /// Will be used for repository (hg) paths that start with this prefix.
493 /// It is relative to the current working directory, so comparing against
488 /// It is relative to the current working directory, so comparing against
494 /// repository paths is painless.
489 /// repository paths is painless.
495 pub prefix: HgPathBuf,
490 pub prefix: HgPathBuf,
496 /// The file itself, containing the patterns
491 /// The file itself, containing the patterns
497 pub path: PathBuf,
492 pub path: PathBuf,
498 /// Folder in the filesystem where this it applies
493 /// Folder in the filesystem where this it applies
499 pub root: PathBuf,
494 pub root: PathBuf,
500 }
495 }
501
496
502 impl SubInclude {
497 impl SubInclude {
503 pub fn new(
498 pub fn new(
504 root_dir: impl AsRef<Path>,
499 root_dir: impl AsRef<Path>,
505 pattern: &[u8],
500 pattern: &[u8],
506 source: impl AsRef<Path>,
501 source: impl AsRef<Path>,
507 ) -> Result<SubInclude, HgPathError> {
502 ) -> Result<SubInclude, HgPathError> {
508 let normalized_source =
503 let normalized_source =
509 normalize_path_bytes(&get_bytes_from_path(source));
504 normalize_path_bytes(&get_bytes_from_path(source));
510
505
511 let source_root = get_path_from_bytes(&normalized_source);
506 let source_root = get_path_from_bytes(&normalized_source);
512 let source_root = source_root.parent().unwrap_or(source_root.deref());
507 let source_root = source_root.parent().unwrap_or(source_root.deref());
513
508
514 let path = source_root.join(get_path_from_bytes(pattern));
509 let path = source_root.join(get_path_from_bytes(pattern));
515 let new_root = path.parent().unwrap_or(path.deref());
510 let new_root = path.parent().unwrap_or(path.deref());
516
511
517 let prefix = canonical_path(&root_dir, &root_dir, new_root)?;
512 let prefix = canonical_path(&root_dir, &root_dir, new_root)?;
518
513
519 Ok(Self {
514 Ok(Self {
520 prefix: path_to_hg_path_buf(prefix).and_then(|mut p| {
515 prefix: path_to_hg_path_buf(prefix).and_then(|mut p| {
521 if !p.is_empty() {
516 if !p.is_empty() {
522 p.push(b'/');
517 p.push(b'/');
523 }
518 }
524 Ok(p)
519 Ok(p)
525 })?,
520 })?,
526 path: path.to_owned(),
521 path: path.to_owned(),
527 root: new_root.to_owned(),
522 root: new_root.to_owned(),
528 })
523 })
529 }
524 }
530 }
525 }
531
526
532 /// Separate and pre-process subincludes from other patterns for the "ignore"
527 /// Separate and pre-process subincludes from other patterns for the "ignore"
533 /// phase.
528 /// phase.
534 pub fn filter_subincludes(
529 pub fn filter_subincludes(
535 ignore_patterns: &[IgnorePattern],
530 ignore_patterns: &[IgnorePattern],
536 root_dir: impl AsRef<Path>,
531 root_dir: impl AsRef<Path>,
537 ) -> Result<(Vec<SubInclude>, Vec<&IgnorePattern>), HgPathError> {
532 ) -> Result<(Vec<SubInclude>, Vec<&IgnorePattern>), HgPathError> {
538 let mut subincludes = vec![];
533 let mut subincludes = vec![];
539 let mut others = vec![];
534 let mut others = vec![];
540
535
541 for ignore_pattern in ignore_patterns.iter() {
536 for ignore_pattern in ignore_patterns.iter() {
542 let IgnorePattern {
537 let IgnorePattern {
543 syntax,
538 syntax,
544 pattern,
539 pattern,
545 source,
540 source,
546 } = ignore_pattern;
541 } = ignore_pattern;
547 if *syntax == PatternSyntax::SubInclude {
542 if *syntax == PatternSyntax::SubInclude {
548 subincludes.push(SubInclude::new(&root_dir, pattern, &source)?);
543 subincludes.push(SubInclude::new(&root_dir, pattern, &source)?);
549 } else {
544 } else {
550 others.push(ignore_pattern)
545 others.push(ignore_pattern)
551 }
546 }
552 }
547 }
553 Ok((subincludes, others))
548 Ok((subincludes, others))
554 }
549 }
555
550
556 #[cfg(test)]
551 #[cfg(test)]
557 mod tests {
552 mod tests {
558 use super::*;
553 use super::*;
559 use pretty_assertions::assert_eq;
554 use pretty_assertions::assert_eq;
560
555
561 #[test]
556 #[test]
562 fn escape_pattern_test() {
557 fn escape_pattern_test() {
563 let untouched =
558 let untouched =
564 br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
559 br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
565 assert_eq!(escape_pattern(untouched), untouched.to_vec());
560 assert_eq!(escape_pattern(untouched), untouched.to_vec());
566 // All escape codes
561 // All escape codes
567 assert_eq!(
562 assert_eq!(
568 escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
563 escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
569 br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
564 br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
570 .to_vec()
565 .to_vec()
571 );
566 );
572 }
567 }
573
568
574 #[test]
569 #[test]
575 fn glob_test() {
570 fn glob_test() {
576 assert_eq!(glob_to_re(br#"?"#), br#"."#);
571 assert_eq!(glob_to_re(br#"?"#), br#"."#);
577 assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
572 assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
578 assert_eq!(glob_to_re(br#"**"#), br#".*"#);
573 assert_eq!(glob_to_re(br#"**"#), br#".*"#);
579 assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
574 assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
580 assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
575 assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
581 assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
576 assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
582 assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
577 assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
583 assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
578 assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
584 }
579 }
585
580
586 #[test]
581 #[test]
587 fn test_parse_pattern_file_contents() {
582 fn test_parse_pattern_file_contents() {
588 let lines = b"syntax: glob\n*.elc";
583 let lines = b"syntax: glob\n*.elc";
589
584
590 assert_eq!(
585 assert_eq!(
591 parse_pattern_file_contents(lines, Path::new("file_path"), false)
586 parse_pattern_file_contents(lines, Path::new("file_path"), false)
592 .unwrap()
587 .unwrap()
593 .0,
588 .0,
594 vec![IgnorePattern::new(
589 vec![IgnorePattern::new(
595 PatternSyntax::RelGlob,
590 PatternSyntax::RelGlob,
596 b"*.elc",
591 b"*.elc",
597 Path::new("file_path")
592 Path::new("file_path")
598 )],
593 )],
599 );
594 );
600
595
601 let lines = b"syntax: include\nsyntax: glob";
596 let lines = b"syntax: include\nsyntax: glob";
602
597
603 assert_eq!(
598 assert_eq!(
604 parse_pattern_file_contents(lines, Path::new("file_path"), false)
599 parse_pattern_file_contents(lines, Path::new("file_path"), false)
605 .unwrap()
600 .unwrap()
606 .0,
601 .0,
607 vec![]
602 vec![]
608 );
603 );
609 let lines = b"glob:**.o";
604 let lines = b"glob:**.o";
610 assert_eq!(
605 assert_eq!(
611 parse_pattern_file_contents(lines, Path::new("file_path"), false)
606 parse_pattern_file_contents(lines, Path::new("file_path"), false)
612 .unwrap()
607 .unwrap()
613 .0,
608 .0,
614 vec![IgnorePattern::new(
609 vec![IgnorePattern::new(
615 PatternSyntax::RelGlob,
610 PatternSyntax::RelGlob,
616 b"**.o",
611 b"**.o",
617 Path::new("file_path")
612 Path::new("file_path")
618 )]
613 )]
619 );
614 );
620 }
615 }
621
616
622 #[test]
617 #[test]
623 fn test_build_single_regex() {
618 fn test_build_single_regex() {
624 assert_eq!(
619 assert_eq!(
625 build_single_regex(&IgnorePattern::new(
620 build_single_regex(&IgnorePattern::new(
626 PatternSyntax::RelGlob,
621 PatternSyntax::RelGlob,
627 b"rust/target/",
622 b"rust/target/",
628 Path::new("")
623 Path::new("")
629 ))
624 ))
630 .unwrap(),
625 .unwrap(),
631 br"(?:.*/)?rust/target(?:/|$)".to_vec(),
626 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
632 );
627 );
633 }
628 }
634
629
635 #[test]
630 #[test]
636 fn test_build_single_regex_shortcut() {
631 fn test_build_single_regex_shortcut() {
637 assert_eq!(
632 assert_eq!(
638 build_single_regex(&IgnorePattern::new(
633 build_single_regex(&IgnorePattern::new(
639 PatternSyntax::RootGlob,
634 PatternSyntax::RootGlob,
640 b"",
635 b"",
641 Path::new("")
636 Path::new("")
642 ))
637 ))
643 .unwrap(),
638 .unwrap(),
644 br"^\.(?:/|$)".to_vec(),
639 None,
645 );
640 );
646 assert_eq!(
641 assert_eq!(
647 build_single_regex(&IgnorePattern::new(
642 build_single_regex(&IgnorePattern::new(
648 PatternSyntax::RootGlob,
643 PatternSyntax::RootGlob,
649 b"whatever",
644 b"whatever",
650 Path::new("")
645 Path::new("")
651 ))
646 ))
652 .unwrap(),
647 .unwrap(),
653 br"^whatever(?:/|$)".to_vec(),
648 None,
654 );
649 );
655 assert_eq!(
650 assert_eq!(
656 build_single_regex(&IgnorePattern::new(
651 build_single_regex(&IgnorePattern::new(
657 PatternSyntax::RootGlob,
652 PatternSyntax::RootGlob,
658 b"*.o",
653 b"*.o",
659 Path::new("")
654 Path::new("")
660 ))
655 ))
661 .unwrap(),
656 .unwrap(),
662 br"^[^/]*\.o(?:/|$)".to_vec(),
657 Some(br"^[^/]*\.o(?:/|$)".to_vec()),
663 );
658 );
664 }
659 }
665 }
660 }
@@ -1,930 +1,948 b''
1 // matchers.rs
1 // matchers.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Structs and types for matching files and directories.
8 //! Structs and types for matching files and directories.
9
9
10 #[cfg(feature = "with-re2")]
10 #[cfg(feature = "with-re2")]
11 use crate::re2::Re2;
11 use crate::re2::Re2;
12 use crate::{
12 use crate::{
13 dirstate::dirs_multiset::DirsChildrenMultiset,
13 dirstate::dirs_multiset::DirsChildrenMultiset,
14 filepatterns::{
14 filepatterns::{
15 build_single_regex, filter_subincludes, get_patterns_from_file,
15 build_single_regex, filter_subincludes, get_patterns_from_file,
16 PatternFileWarning, PatternResult, SubInclude,
16 PatternFileWarning, PatternResult, SubInclude,
17 },
17 },
18 utils::{
18 utils::{
19 files::find_dirs,
19 files::find_dirs,
20 hg_path::{HgPath, HgPathBuf},
20 hg_path::{HgPath, HgPathBuf},
21 Escaped,
21 Escaped,
22 },
22 },
23 DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
23 DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
24 PatternSyntax,
24 PatternSyntax,
25 };
25 };
26
26
27 use crate::filepatterns::normalize_path_bytes;
27 use std::borrow::ToOwned;
28 use std::borrow::ToOwned;
28 use std::collections::HashSet;
29 use std::collections::HashSet;
29 use std::fmt::{Display, Error, Formatter};
30 use std::fmt::{Display, Error, Formatter};
30 use std::iter::FromIterator;
31 use std::iter::FromIterator;
31 use std::ops::Deref;
32 use std::ops::Deref;
32 use std::path::{Path, PathBuf};
33 use std::path::{Path, PathBuf};
33
34
34 use micro_timer::timed;
35 use micro_timer::timed;
35
36
36 #[derive(Debug, PartialEq)]
37 #[derive(Debug, PartialEq)]
37 pub enum VisitChildrenSet<'a> {
38 pub enum VisitChildrenSet<'a> {
38 /// Don't visit anything
39 /// Don't visit anything
39 Empty,
40 Empty,
40 /// Only visit this directory
41 /// Only visit this directory
41 This,
42 This,
42 /// Visit this directory and these subdirectories
43 /// Visit this directory and these subdirectories
43 /// TODO Should we implement a `NonEmptyHashSet`?
44 /// TODO Should we implement a `NonEmptyHashSet`?
44 Set(HashSet<&'a HgPath>),
45 Set(HashSet<&'a HgPath>),
45 /// Visit this directory and all subdirectories
46 /// Visit this directory and all subdirectories
46 Recursive,
47 Recursive,
47 }
48 }
48
49
49 pub trait Matcher {
50 pub trait Matcher {
50 /// Explicitly listed files
51 /// Explicitly listed files
51 fn file_set(&self) -> Option<&HashSet<&HgPath>>;
52 fn file_set(&self) -> Option<&HashSet<&HgPath>>;
52 /// Returns whether `filename` is in `file_set`
53 /// Returns whether `filename` is in `file_set`
53 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool;
54 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool;
54 /// Returns whether `filename` is matched by this matcher
55 /// Returns whether `filename` is matched by this matcher
55 fn matches(&self, filename: impl AsRef<HgPath>) -> bool;
56 fn matches(&self, filename: impl AsRef<HgPath>) -> bool;
56 /// Decides whether a directory should be visited based on whether it
57 /// Decides whether a directory should be visited based on whether it
57 /// has potential matches in it or one of its subdirectories, and
58 /// has potential matches in it or one of its subdirectories, and
58 /// potentially lists which subdirectories of that directory should be
59 /// potentially lists which subdirectories of that directory should be
59 /// visited. This is based on the match's primary, included, and excluded
60 /// visited. This is based on the match's primary, included, and excluded
60 /// patterns.
61 /// patterns.
61 ///
62 ///
62 /// # Example
63 /// # Example
63 ///
64 ///
64 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
65 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
65 /// return the following values (assuming the implementation of
66 /// return the following values (assuming the implementation of
66 /// visit_children_set is capable of recognizing this; some implementations
67 /// visit_children_set is capable of recognizing this; some implementations
67 /// are not).
68 /// are not).
68 ///
69 ///
69 /// ```text
70 /// ```text
70 /// ```ignore
71 /// ```ignore
71 /// '' -> {'foo', 'qux'}
72 /// '' -> {'foo', 'qux'}
72 /// 'baz' -> set()
73 /// 'baz' -> set()
73 /// 'foo' -> {'bar'}
74 /// 'foo' -> {'bar'}
74 /// // Ideally this would be `Recursive`, but since the prefix nature of
75 /// // Ideally this would be `Recursive`, but since the prefix nature of
75 /// // matchers is applied to the entire matcher, we have to downgrade this
76 /// // matchers is applied to the entire matcher, we have to downgrade this
76 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
77 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
77 /// // `RootFilesIn'-kind matcher being mixed in.
78 /// // `RootFilesIn'-kind matcher being mixed in.
78 /// 'foo/bar' -> 'this'
79 /// 'foo/bar' -> 'this'
79 /// 'qux' -> 'this'
80 /// 'qux' -> 'this'
80 /// ```
81 /// ```
81 /// # Important
82 /// # Important
82 ///
83 ///
83 /// Most matchers do not know if they're representing files or
84 /// Most matchers do not know if they're representing files or
84 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
85 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
85 /// file or a directory, so `visit_children_set('dir')` for most matchers
86 /// file or a directory, so `visit_children_set('dir')` for most matchers
86 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
87 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
87 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
88 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
88 /// it may return `VisitChildrenSet::This`.
89 /// it may return `VisitChildrenSet::This`.
89 /// Do not rely on the return being a `HashSet` indicating that there are
90 /// Do not rely on the return being a `HashSet` indicating that there are
90 /// no files in this dir to investigate (or equivalently that if there are
91 /// no files in this dir to investigate (or equivalently that if there are
91 /// files to investigate in 'dir' that it will always return
92 /// files to investigate in 'dir' that it will always return
92 /// `VisitChildrenSet::This`).
93 /// `VisitChildrenSet::This`).
93 fn visit_children_set(
94 fn visit_children_set(
94 &self,
95 &self,
95 directory: impl AsRef<HgPath>,
96 directory: impl AsRef<HgPath>,
96 ) -> VisitChildrenSet;
97 ) -> VisitChildrenSet;
97 /// Matcher will match everything and `files_set()` will be empty:
98 /// Matcher will match everything and `files_set()` will be empty:
98 /// optimization might be possible.
99 /// optimization might be possible.
99 fn matches_everything(&self) -> bool;
100 fn matches_everything(&self) -> bool;
100 /// Matcher will match exactly the files in `files_set()`: optimization
101 /// Matcher will match exactly the files in `files_set()`: optimization
101 /// might be possible.
102 /// might be possible.
102 fn is_exact(&self) -> bool;
103 fn is_exact(&self) -> bool;
103 }
104 }
104
105
105 /// Matches everything.
106 /// Matches everything.
106 ///```
107 ///```
107 /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
108 /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
108 ///
109 ///
109 /// let matcher = AlwaysMatcher;
110 /// let matcher = AlwaysMatcher;
110 ///
111 ///
111 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
112 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
112 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
113 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
113 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
114 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
114 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
115 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
115 /// ```
116 /// ```
116 #[derive(Debug)]
117 #[derive(Debug)]
117 pub struct AlwaysMatcher;
118 pub struct AlwaysMatcher;
118
119
119 impl Matcher for AlwaysMatcher {
120 impl Matcher for AlwaysMatcher {
120 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
121 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
121 None
122 None
122 }
123 }
123 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
124 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
124 false
125 false
125 }
126 }
126 fn matches(&self, _filename: impl AsRef<HgPath>) -> bool {
127 fn matches(&self, _filename: impl AsRef<HgPath>) -> bool {
127 true
128 true
128 }
129 }
129 fn visit_children_set(
130 fn visit_children_set(
130 &self,
131 &self,
131 _directory: impl AsRef<HgPath>,
132 _directory: impl AsRef<HgPath>,
132 ) -> VisitChildrenSet {
133 ) -> VisitChildrenSet {
133 VisitChildrenSet::Recursive
134 VisitChildrenSet::Recursive
134 }
135 }
135 fn matches_everything(&self) -> bool {
136 fn matches_everything(&self) -> bool {
136 true
137 true
137 }
138 }
138 fn is_exact(&self) -> bool {
139 fn is_exact(&self) -> bool {
139 false
140 false
140 }
141 }
141 }
142 }
142
143
143 /// Matches the input files exactly. They are interpreted as paths, not
144 /// Matches the input files exactly. They are interpreted as paths, not
144 /// patterns.
145 /// patterns.
145 ///
146 ///
146 ///```
147 ///```
147 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::HgPath };
148 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::HgPath };
148 ///
149 ///
149 /// let files = [HgPath::new(b"a.txt"), HgPath::new(br"re:.*\.c$")];
150 /// let files = [HgPath::new(b"a.txt"), HgPath::new(br"re:.*\.c$")];
150 /// let matcher = FileMatcher::new(&files).unwrap();
151 /// let matcher = FileMatcher::new(&files).unwrap();
151 ///
152 ///
152 /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
153 /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
153 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
154 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
154 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
155 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
155 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
156 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
156 /// ```
157 /// ```
157 #[derive(Debug)]
158 #[derive(Debug)]
158 pub struct FileMatcher<'a> {
159 pub struct FileMatcher<'a> {
159 files: HashSet<&'a HgPath>,
160 files: HashSet<&'a HgPath>,
160 dirs: DirsMultiset,
161 dirs: DirsMultiset,
161 }
162 }
162
163
163 impl<'a> FileMatcher<'a> {
164 impl<'a> FileMatcher<'a> {
164 pub fn new(
165 pub fn new(
165 files: &'a [impl AsRef<HgPath>],
166 files: &'a [impl AsRef<HgPath>],
166 ) -> Result<Self, DirstateMapError> {
167 ) -> Result<Self, DirstateMapError> {
167 Ok(Self {
168 Ok(Self {
168 files: HashSet::from_iter(files.iter().map(|f| f.as_ref())),
169 files: HashSet::from_iter(files.iter().map(|f| f.as_ref())),
169 dirs: DirsMultiset::from_manifest(files)?,
170 dirs: DirsMultiset::from_manifest(files)?,
170 })
171 })
171 }
172 }
172 fn inner_matches(&self, filename: impl AsRef<HgPath>) -> bool {
173 fn inner_matches(&self, filename: impl AsRef<HgPath>) -> bool {
173 self.files.contains(filename.as_ref())
174 self.files.contains(filename.as_ref())
174 }
175 }
175 }
176 }
176
177
177 impl<'a> Matcher for FileMatcher<'a> {
178 impl<'a> Matcher for FileMatcher<'a> {
178 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
179 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
179 Some(&self.files)
180 Some(&self.files)
180 }
181 }
181 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool {
182 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool {
182 self.inner_matches(filename)
183 self.inner_matches(filename)
183 }
184 }
184 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
185 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
185 self.inner_matches(filename)
186 self.inner_matches(filename)
186 }
187 }
187 fn visit_children_set(
188 fn visit_children_set(
188 &self,
189 &self,
189 directory: impl AsRef<HgPath>,
190 directory: impl AsRef<HgPath>,
190 ) -> VisitChildrenSet {
191 ) -> VisitChildrenSet {
191 if self.files.is_empty() || !self.dirs.contains(&directory) {
192 if self.files.is_empty() || !self.dirs.contains(&directory) {
192 return VisitChildrenSet::Empty;
193 return VisitChildrenSet::Empty;
193 }
194 }
194 let dirs_as_set = self.dirs.iter().map(|k| k.deref()).collect();
195 let dirs_as_set = self.dirs.iter().map(|k| k.deref()).collect();
195
196
196 let mut candidates: HashSet<&HgPath> =
197 let mut candidates: HashSet<&HgPath> =
197 self.files.union(&dirs_as_set).map(|k| *k).collect();
198 self.files.union(&dirs_as_set).map(|k| *k).collect();
198 candidates.remove(HgPath::new(b""));
199 candidates.remove(HgPath::new(b""));
199
200
200 if !directory.as_ref().is_empty() {
201 if !directory.as_ref().is_empty() {
201 let directory = [directory.as_ref().as_bytes(), b"/"].concat();
202 let directory = [directory.as_ref().as_bytes(), b"/"].concat();
202 candidates = candidates
203 candidates = candidates
203 .iter()
204 .iter()
204 .filter_map(|c| {
205 .filter_map(|c| {
205 if c.as_bytes().starts_with(&directory) {
206 if c.as_bytes().starts_with(&directory) {
206 Some(HgPath::new(&c.as_bytes()[directory.len()..]))
207 Some(HgPath::new(&c.as_bytes()[directory.len()..]))
207 } else {
208 } else {
208 None
209 None
209 }
210 }
210 })
211 })
211 .collect();
212 .collect();
212 }
213 }
213
214
214 // `self.dirs` includes all of the directories, recursively, so if
215 // `self.dirs` includes all of the directories, recursively, so if
215 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
216 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
216 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
217 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
217 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
218 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
218 // subdir will be in there without a slash.
219 // subdir will be in there without a slash.
219 VisitChildrenSet::Set(
220 VisitChildrenSet::Set(
220 candidates
221 candidates
221 .iter()
222 .iter()
222 .filter_map(|c| {
223 .filter_map(|c| {
223 if c.bytes().all(|b| *b != b'/') {
224 if c.bytes().all(|b| *b != b'/') {
224 Some(*c)
225 Some(*c)
225 } else {
226 } else {
226 None
227 None
227 }
228 }
228 })
229 })
229 .collect(),
230 .collect(),
230 )
231 )
231 }
232 }
232 fn matches_everything(&self) -> bool {
233 fn matches_everything(&self) -> bool {
233 false
234 false
234 }
235 }
235 fn is_exact(&self) -> bool {
236 fn is_exact(&self) -> bool {
236 true
237 true
237 }
238 }
238 }
239 }
239
240
240 /// Matches files that are included in the ignore rules.
241 /// Matches files that are included in the ignore rules.
241 #[cfg_attr(
242 #[cfg_attr(
242 feature = "with-re2",
243 feature = "with-re2",
243 doc = r##"
244 doc = r##"
244 ```
245 ```
245 use hg::{
246 use hg::{
246 matchers::{IncludeMatcher, Matcher},
247 matchers::{IncludeMatcher, Matcher},
247 IgnorePattern,
248 IgnorePattern,
248 PatternSyntax,
249 PatternSyntax,
249 utils::hg_path::HgPath
250 utils::hg_path::HgPath
250 };
251 };
251 use std::path::Path;
252 use std::path::Path;
252 ///
253 ///
253 let ignore_patterns =
254 let ignore_patterns =
254 vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
255 vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
255 let (matcher, _) = IncludeMatcher::new(ignore_patterns, "").unwrap();
256 let (matcher, _) = IncludeMatcher::new(ignore_patterns, "").unwrap();
256 ///
257 ///
257 assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
258 assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
258 assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
259 assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
259 assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
260 assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
260 assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
261 assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
261 ```
262 ```
262 "##
263 "##
263 )]
264 )]
264 pub struct IncludeMatcher<'a> {
265 pub struct IncludeMatcher<'a> {
265 patterns: Vec<u8>,
266 patterns: Vec<u8>,
266 match_fn: Box<dyn for<'r> Fn(&'r HgPath) -> bool + 'a + Sync>,
267 match_fn: Box<dyn for<'r> Fn(&'r HgPath) -> bool + 'a + Sync>,
267 /// Whether all the patterns match a prefix (i.e. recursively)
268 /// Whether all the patterns match a prefix (i.e. recursively)
268 prefix: bool,
269 prefix: bool,
269 roots: HashSet<HgPathBuf>,
270 roots: HashSet<HgPathBuf>,
270 dirs: HashSet<HgPathBuf>,
271 dirs: HashSet<HgPathBuf>,
271 parents: HashSet<HgPathBuf>,
272 parents: HashSet<HgPathBuf>,
272 }
273 }
273
274
274 impl<'a> Matcher for IncludeMatcher<'a> {
275 impl<'a> Matcher for IncludeMatcher<'a> {
275 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
276 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
276 None
277 None
277 }
278 }
278
279
279 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
280 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
280 false
281 false
281 }
282 }
282
283
283 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
284 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
284 (self.match_fn)(filename.as_ref())
285 (self.match_fn)(filename.as_ref())
285 }
286 }
286
287
287 fn visit_children_set(
288 fn visit_children_set(
288 &self,
289 &self,
289 directory: impl AsRef<HgPath>,
290 directory: impl AsRef<HgPath>,
290 ) -> VisitChildrenSet {
291 ) -> VisitChildrenSet {
291 let dir = directory.as_ref();
292 let dir = directory.as_ref();
292 if self.prefix && self.roots.contains(dir) {
293 if self.prefix && self.roots.contains(dir) {
293 return VisitChildrenSet::Recursive;
294 return VisitChildrenSet::Recursive;
294 }
295 }
295 if self.roots.contains(HgPath::new(b""))
296 if self.roots.contains(HgPath::new(b""))
296 || self.roots.contains(dir)
297 || self.roots.contains(dir)
297 || self.dirs.contains(dir)
298 || self.dirs.contains(dir)
298 || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
299 || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
299 {
300 {
300 return VisitChildrenSet::This;
301 return VisitChildrenSet::This;
301 }
302 }
302
303
303 if self.parents.contains(directory.as_ref()) {
304 if self.parents.contains(directory.as_ref()) {
304 let multiset = self.get_all_parents_children();
305 let multiset = self.get_all_parents_children();
305 if let Some(children) = multiset.get(dir) {
306 if let Some(children) = multiset.get(dir) {
306 return VisitChildrenSet::Set(children.to_owned());
307 return VisitChildrenSet::Set(children.to_owned());
307 }
308 }
308 }
309 }
309 VisitChildrenSet::Empty
310 VisitChildrenSet::Empty
310 }
311 }
311
312
312 fn matches_everything(&self) -> bool {
313 fn matches_everything(&self) -> bool {
313 false
314 false
314 }
315 }
315
316
316 fn is_exact(&self) -> bool {
317 fn is_exact(&self) -> bool {
317 false
318 false
318 }
319 }
319 }
320 }
320
321
321 #[cfg(feature = "with-re2")]
322 #[cfg(feature = "with-re2")]
322 /// Returns a function that matches an `HgPath` against the given regex
323 /// Returns a function that matches an `HgPath` against the given regex
323 /// pattern.
324 /// pattern.
324 ///
325 ///
325 /// This can fail when the pattern is invalid or not supported by the
326 /// This can fail when the pattern is invalid or not supported by the
326 /// underlying engine `Re2`, for instance anything with back-references.
327 /// underlying engine `Re2`, for instance anything with back-references.
327 #[timed]
328 #[timed]
328 fn re_matcher(
329 fn re_matcher(
329 pattern: &[u8],
330 pattern: &[u8],
330 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
331 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
331 let regex = Re2::new(pattern);
332 let regex = Re2::new(pattern);
332 let regex = regex.map_err(|e| PatternError::UnsupportedSyntax(e))?;
333 let regex = regex.map_err(|e| PatternError::UnsupportedSyntax(e))?;
333 Ok(move |path: &HgPath| regex.is_match(path.as_bytes()))
334 Ok(move |path: &HgPath| regex.is_match(path.as_bytes()))
334 }
335 }
335
336
336 #[cfg(not(feature = "with-re2"))]
337 #[cfg(not(feature = "with-re2"))]
337 /// Returns a function that matches an `HgPath` against the given regex
338 /// Returns a function that matches an `HgPath` against the given regex
338 /// pattern.
339 /// pattern.
339 ///
340 ///
340 /// This can fail when the pattern is invalid or not supported by the
341 /// This can fail when the pattern is invalid or not supported by the
341 /// underlying engine (the `regex` crate), for instance anything with
342 /// underlying engine (the `regex` crate), for instance anything with
342 /// back-references.
343 /// back-references.
343 #[timed]
344 #[timed]
344 fn re_matcher(
345 fn re_matcher(
345 pattern: &[u8],
346 pattern: &[u8],
346 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
347 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
347 use std::io::Write;
348 use std::io::Write;
348
349
349 let mut escaped_bytes = vec![];
350 let mut escaped_bytes = vec![];
350 for byte in pattern {
351 for byte in pattern {
351 if *byte > 127 {
352 if *byte > 127 {
352 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
353 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
353 } else {
354 } else {
354 escaped_bytes.push(*byte);
355 escaped_bytes.push(*byte);
355 }
356 }
356 }
357 }
357
358
358 // Avoid the cost of UTF8 checking
359 // Avoid the cost of UTF8 checking
359 //
360 //
360 // # Safety
361 // # Safety
361 // This is safe because we escaped all non-ASCII bytes.
362 // This is safe because we escaped all non-ASCII bytes.
362 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
363 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
363 let re = regex::bytes::RegexBuilder::new(&pattern_string)
364 let re = regex::bytes::RegexBuilder::new(&pattern_string)
364 .unicode(false)
365 .unicode(false)
365 .build()
366 .build()
366 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
367 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
367
368
368 Ok(move |path: &HgPath| re.is_match(path.as_bytes()))
369 Ok(move |path: &HgPath| re.is_match(path.as_bytes()))
369 }
370 }
370
371
371 /// Returns the regex pattern and a function that matches an `HgPath` against
372 /// Returns the regex pattern and a function that matches an `HgPath` against
372 /// said regex formed by the given ignore patterns.
373 /// said regex formed by the given ignore patterns.
373 fn build_regex_match<'a>(
374 fn build_regex_match<'a>(
374 ignore_patterns: &'a [&'a IgnorePattern],
375 ignore_patterns: &'a [&'a IgnorePattern],
375 ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)> {
376 ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)> {
376 let regexps: Result<Vec<_>, PatternError> = ignore_patterns
377 let mut regexps = vec![];
377 .into_iter()
378 let mut exact_set = HashSet::new();
378 .map(|k| build_single_regex(*k))
379
379 .collect();
380 for pattern in ignore_patterns {
380 let regexps = regexps?;
381 if let Some(re) = build_single_regex(pattern)? {
382 regexps.push(re);
383 } else {
384 let exact = normalize_path_bytes(&pattern.pattern);
385 exact_set.insert(HgPathBuf::from_bytes(&exact));
386 }
387 }
388
381 let full_regex = regexps.join(&b'|');
389 let full_regex = regexps.join(&b'|');
382
390
391 // An empty pattern would cause the regex engine to incorrectly match the
392 // (empty) root directory
393 let func = if !(regexps.is_empty()) {
383 let matcher = re_matcher(&full_regex)?;
394 let matcher = re_matcher(&full_regex)?;
384 let func = Box::new(move |filename: &HgPath| matcher(filename));
395 let func = move |filename: &HgPath| {
396 exact_set.contains(filename) || matcher(filename)
397 };
398 Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
399 } else {
400 let func = move |filename: &HgPath| exact_set.contains(filename);
401 Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
402 };
385
403
386 Ok((full_regex, func))
404 Ok((full_regex, func))
387 }
405 }
388
406
389 /// Returns roots and directories corresponding to each pattern.
407 /// Returns roots and directories corresponding to each pattern.
390 ///
408 ///
391 /// This calculates the roots and directories exactly matching the patterns and
409 /// This calculates the roots and directories exactly matching the patterns and
392 /// returns a tuple of (roots, dirs). It does not return other directories
410 /// returns a tuple of (roots, dirs). It does not return other directories
393 /// which may also need to be considered, like the parent directories.
411 /// which may also need to be considered, like the parent directories.
394 fn roots_and_dirs(
412 fn roots_and_dirs(
395 ignore_patterns: &[IgnorePattern],
413 ignore_patterns: &[IgnorePattern],
396 ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
414 ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
397 let mut roots = Vec::new();
415 let mut roots = Vec::new();
398 let mut dirs = Vec::new();
416 let mut dirs = Vec::new();
399
417
400 for ignore_pattern in ignore_patterns {
418 for ignore_pattern in ignore_patterns {
401 let IgnorePattern {
419 let IgnorePattern {
402 syntax, pattern, ..
420 syntax, pattern, ..
403 } = ignore_pattern;
421 } = ignore_pattern;
404 match syntax {
422 match syntax {
405 PatternSyntax::RootGlob | PatternSyntax::Glob => {
423 PatternSyntax::RootGlob | PatternSyntax::Glob => {
406 let mut root = vec![];
424 let mut root = vec![];
407
425
408 for p in pattern.split(|c| *c == b'/') {
426 for p in pattern.split(|c| *c == b'/') {
409 if p.iter().any(|c| match *c {
427 if p.iter().any(|c| match *c {
410 b'[' | b'{' | b'*' | b'?' => true,
428 b'[' | b'{' | b'*' | b'?' => true,
411 _ => false,
429 _ => false,
412 }) {
430 }) {
413 break;
431 break;
414 }
432 }
415 root.push(HgPathBuf::from_bytes(p));
433 root.push(HgPathBuf::from_bytes(p));
416 }
434 }
417 let buf =
435 let buf =
418 root.iter().fold(HgPathBuf::new(), |acc, r| acc.join(r));
436 root.iter().fold(HgPathBuf::new(), |acc, r| acc.join(r));
419 roots.push(buf);
437 roots.push(buf);
420 }
438 }
421 PatternSyntax::Path | PatternSyntax::RelPath => {
439 PatternSyntax::Path | PatternSyntax::RelPath => {
422 let pat = HgPath::new(if pattern == b"." {
440 let pat = HgPath::new(if pattern == b"." {
423 &[] as &[u8]
441 &[] as &[u8]
424 } else {
442 } else {
425 pattern
443 pattern
426 });
444 });
427 roots.push(pat.to_owned());
445 roots.push(pat.to_owned());
428 }
446 }
429 PatternSyntax::RootFiles => {
447 PatternSyntax::RootFiles => {
430 let pat = if pattern == b"." {
448 let pat = if pattern == b"." {
431 &[] as &[u8]
449 &[] as &[u8]
432 } else {
450 } else {
433 pattern
451 pattern
434 };
452 };
435 dirs.push(HgPathBuf::from_bytes(pat));
453 dirs.push(HgPathBuf::from_bytes(pat));
436 }
454 }
437 _ => {
455 _ => {
438 roots.push(HgPathBuf::new());
456 roots.push(HgPathBuf::new());
439 }
457 }
440 }
458 }
441 }
459 }
442 (roots, dirs)
460 (roots, dirs)
443 }
461 }
444
462
445 /// Paths extracted from patterns
463 /// Paths extracted from patterns
446 #[derive(Debug, PartialEq)]
464 #[derive(Debug, PartialEq)]
447 struct RootsDirsAndParents {
465 struct RootsDirsAndParents {
448 /// Directories to match recursively
466 /// Directories to match recursively
449 pub roots: HashSet<HgPathBuf>,
467 pub roots: HashSet<HgPathBuf>,
450 /// Directories to match non-recursively
468 /// Directories to match non-recursively
451 pub dirs: HashSet<HgPathBuf>,
469 pub dirs: HashSet<HgPathBuf>,
452 /// Implicitly required directories to go to items in either roots or dirs
470 /// Implicitly required directories to go to items in either roots or dirs
453 pub parents: HashSet<HgPathBuf>,
471 pub parents: HashSet<HgPathBuf>,
454 }
472 }
455
473
456 /// Extract roots, dirs and parents from patterns.
474 /// Extract roots, dirs and parents from patterns.
457 fn roots_dirs_and_parents(
475 fn roots_dirs_and_parents(
458 ignore_patterns: &[IgnorePattern],
476 ignore_patterns: &[IgnorePattern],
459 ) -> PatternResult<RootsDirsAndParents> {
477 ) -> PatternResult<RootsDirsAndParents> {
460 let (roots, dirs) = roots_and_dirs(ignore_patterns);
478 let (roots, dirs) = roots_and_dirs(ignore_patterns);
461
479
462 let mut parents = HashSet::new();
480 let mut parents = HashSet::new();
463
481
464 parents.extend(
482 parents.extend(
465 DirsMultiset::from_manifest(&dirs)
483 DirsMultiset::from_manifest(&dirs)
466 .map_err(|e| match e {
484 .map_err(|e| match e {
467 DirstateMapError::InvalidPath(e) => e,
485 DirstateMapError::InvalidPath(e) => e,
468 _ => unreachable!(),
486 _ => unreachable!(),
469 })?
487 })?
470 .iter()
488 .iter()
471 .map(|k| k.to_owned()),
489 .map(|k| k.to_owned()),
472 );
490 );
473 parents.extend(
491 parents.extend(
474 DirsMultiset::from_manifest(&roots)
492 DirsMultiset::from_manifest(&roots)
475 .map_err(|e| match e {
493 .map_err(|e| match e {
476 DirstateMapError::InvalidPath(e) => e,
494 DirstateMapError::InvalidPath(e) => e,
477 _ => unreachable!(),
495 _ => unreachable!(),
478 })?
496 })?
479 .iter()
497 .iter()
480 .map(|k| k.to_owned()),
498 .map(|k| k.to_owned()),
481 );
499 );
482
500
483 Ok(RootsDirsAndParents {
501 Ok(RootsDirsAndParents {
484 roots: HashSet::from_iter(roots),
502 roots: HashSet::from_iter(roots),
485 dirs: HashSet::from_iter(dirs),
503 dirs: HashSet::from_iter(dirs),
486 parents,
504 parents,
487 })
505 })
488 }
506 }
489
507
490 /// Returns a function that checks whether a given file (in the general sense)
508 /// Returns a function that checks whether a given file (in the general sense)
491 /// should be matched.
509 /// should be matched.
492 fn build_match<'a, 'b>(
510 fn build_match<'a, 'b>(
493 ignore_patterns: &'a [IgnorePattern],
511 ignore_patterns: &'a [IgnorePattern],
494 root_dir: impl AsRef<Path>,
512 root_dir: impl AsRef<Path>,
495 ) -> PatternResult<(
513 ) -> PatternResult<(
496 Vec<u8>,
514 Vec<u8>,
497 Box<dyn Fn(&HgPath) -> bool + 'b + Sync>,
515 Box<dyn Fn(&HgPath) -> bool + 'b + Sync>,
498 Vec<PatternFileWarning>,
516 Vec<PatternFileWarning>,
499 )> {
517 )> {
500 let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![];
518 let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![];
501 // For debugging and printing
519 // For debugging and printing
502 let mut patterns = vec![];
520 let mut patterns = vec![];
503 let mut all_warnings = vec![];
521 let mut all_warnings = vec![];
504
522
505 let (subincludes, ignore_patterns) =
523 let (subincludes, ignore_patterns) =
506 filter_subincludes(ignore_patterns, root_dir)?;
524 filter_subincludes(ignore_patterns, root_dir)?;
507
525
508 if !subincludes.is_empty() {
526 if !subincludes.is_empty() {
509 // Build prefix-based matcher functions for subincludes
527 // Build prefix-based matcher functions for subincludes
510 let mut submatchers = FastHashMap::default();
528 let mut submatchers = FastHashMap::default();
511 let mut prefixes = vec![];
529 let mut prefixes = vec![];
512
530
513 for SubInclude { prefix, root, path } in subincludes.into_iter() {
531 for SubInclude { prefix, root, path } in subincludes.into_iter() {
514 let (match_fn, warnings) =
532 let (match_fn, warnings) =
515 get_ignore_function(vec![path.to_path_buf()], root)?;
533 get_ignore_function(vec![path.to_path_buf()], root)?;
516 all_warnings.extend(warnings);
534 all_warnings.extend(warnings);
517 prefixes.push(prefix.to_owned());
535 prefixes.push(prefix.to_owned());
518 submatchers.insert(prefix.to_owned(), match_fn);
536 submatchers.insert(prefix.to_owned(), match_fn);
519 }
537 }
520
538
521 let match_subinclude = move |filename: &HgPath| {
539 let match_subinclude = move |filename: &HgPath| {
522 for prefix in prefixes.iter() {
540 for prefix in prefixes.iter() {
523 if let Some(rel) = filename.relative_to(prefix) {
541 if let Some(rel) = filename.relative_to(prefix) {
524 if (submatchers.get(prefix).unwrap())(rel) {
542 if (submatchers.get(prefix).unwrap())(rel) {
525 return true;
543 return true;
526 }
544 }
527 }
545 }
528 }
546 }
529 false
547 false
530 };
548 };
531
549
532 match_funcs.push(Box::new(match_subinclude));
550 match_funcs.push(Box::new(match_subinclude));
533 }
551 }
534
552
535 if !ignore_patterns.is_empty() {
553 if !ignore_patterns.is_empty() {
536 // Either do dumb matching if all patterns are rootfiles, or match
554 // Either do dumb matching if all patterns are rootfiles, or match
537 // with a regex.
555 // with a regex.
538 if ignore_patterns
556 if ignore_patterns
539 .iter()
557 .iter()
540 .all(|k| k.syntax == PatternSyntax::RootFiles)
558 .all(|k| k.syntax == PatternSyntax::RootFiles)
541 {
559 {
542 let dirs: HashSet<_> = ignore_patterns
560 let dirs: HashSet<_> = ignore_patterns
543 .iter()
561 .iter()
544 .map(|k| k.pattern.to_owned())
562 .map(|k| k.pattern.to_owned())
545 .collect();
563 .collect();
546 let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
564 let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
547
565
548 let match_func = move |path: &HgPath| -> bool {
566 let match_func = move |path: &HgPath| -> bool {
549 let path = path.as_bytes();
567 let path = path.as_bytes();
550 let i = path.iter().rfind(|a| **a == b'/');
568 let i = path.iter().rfind(|a| **a == b'/');
551 let dir = if let Some(i) = i {
569 let dir = if let Some(i) = i {
552 &path[..*i as usize]
570 &path[..*i as usize]
553 } else {
571 } else {
554 b"."
572 b"."
555 };
573 };
556 dirs.contains(dir.deref())
574 dirs.contains(dir.deref())
557 };
575 };
558 match_funcs.push(Box::new(match_func));
576 match_funcs.push(Box::new(match_func));
559
577
560 patterns.extend(b"rootfilesin: ");
578 patterns.extend(b"rootfilesin: ");
561 dirs_vec.sort();
579 dirs_vec.sort();
562 patterns.extend(dirs_vec.escaped_bytes());
580 patterns.extend(dirs_vec.escaped_bytes());
563 } else {
581 } else {
564 let (new_re, match_func) = build_regex_match(&ignore_patterns)?;
582 let (new_re, match_func) = build_regex_match(&ignore_patterns)?;
565 patterns = new_re;
583 patterns = new_re;
566 match_funcs.push(match_func)
584 match_funcs.push(match_func)
567 }
585 }
568 }
586 }
569
587
570 Ok(if match_funcs.len() == 1 {
588 Ok(if match_funcs.len() == 1 {
571 (patterns, match_funcs.remove(0), all_warnings)
589 (patterns, match_funcs.remove(0), all_warnings)
572 } else {
590 } else {
573 (
591 (
574 patterns,
592 patterns,
575 Box::new(move |f: &HgPath| -> bool {
593 Box::new(move |f: &HgPath| -> bool {
576 match_funcs.iter().any(|match_func| match_func(f))
594 match_funcs.iter().any(|match_func| match_func(f))
577 }),
595 }),
578 all_warnings,
596 all_warnings,
579 )
597 )
580 })
598 })
581 }
599 }
582
600
583 /// Parses all "ignore" files with their recursive includes and returns a
601 /// Parses all "ignore" files with their recursive includes and returns a
584 /// function that checks whether a given file (in the general sense) should be
602 /// function that checks whether a given file (in the general sense) should be
585 /// ignored.
603 /// ignored.
586 pub fn get_ignore_function<'a>(
604 pub fn get_ignore_function<'a>(
587 all_pattern_files: Vec<PathBuf>,
605 all_pattern_files: Vec<PathBuf>,
588 root_dir: impl AsRef<Path>,
606 root_dir: impl AsRef<Path>,
589 ) -> PatternResult<(
607 ) -> PatternResult<(
590 Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>,
608 Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>,
591 Vec<PatternFileWarning>,
609 Vec<PatternFileWarning>,
592 )> {
610 )> {
593 let mut all_patterns = vec![];
611 let mut all_patterns = vec![];
594 let mut all_warnings = vec![];
612 let mut all_warnings = vec![];
595
613
596 for pattern_file in all_pattern_files.into_iter() {
614 for pattern_file in all_pattern_files.into_iter() {
597 let (patterns, warnings) =
615 let (patterns, warnings) =
598 get_patterns_from_file(pattern_file, &root_dir)?;
616 get_patterns_from_file(pattern_file, &root_dir)?;
599
617
600 all_patterns.extend(patterns.to_owned());
618 all_patterns.extend(patterns.to_owned());
601 all_warnings.extend(warnings);
619 all_warnings.extend(warnings);
602 }
620 }
603 let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?;
621 let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?;
604 all_warnings.extend(warnings);
622 all_warnings.extend(warnings);
605 Ok((
623 Ok((
606 Box::new(move |path: &HgPath| matcher.matches(path)),
624 Box::new(move |path: &HgPath| matcher.matches(path)),
607 all_warnings,
625 all_warnings,
608 ))
626 ))
609 }
627 }
610
628
611 impl<'a> IncludeMatcher<'a> {
629 impl<'a> IncludeMatcher<'a> {
612 pub fn new(
630 pub fn new(
613 ignore_patterns: Vec<IgnorePattern>,
631 ignore_patterns: Vec<IgnorePattern>,
614 root_dir: impl AsRef<Path>,
632 root_dir: impl AsRef<Path>,
615 ) -> PatternResult<(Self, Vec<PatternFileWarning>)> {
633 ) -> PatternResult<(Self, Vec<PatternFileWarning>)> {
616 let (patterns, match_fn, warnings) =
634 let (patterns, match_fn, warnings) =
617 build_match(&ignore_patterns, root_dir)?;
635 build_match(&ignore_patterns, root_dir)?;
618 let RootsDirsAndParents {
636 let RootsDirsAndParents {
619 roots,
637 roots,
620 dirs,
638 dirs,
621 parents,
639 parents,
622 } = roots_dirs_and_parents(&ignore_patterns)?;
640 } = roots_dirs_and_parents(&ignore_patterns)?;
623
641
624 let prefix = ignore_patterns.iter().any(|k| match k.syntax {
642 let prefix = ignore_patterns.iter().any(|k| match k.syntax {
625 PatternSyntax::Path | PatternSyntax::RelPath => true,
643 PatternSyntax::Path | PatternSyntax::RelPath => true,
626 _ => false,
644 _ => false,
627 });
645 });
628
646
629 Ok((
647 Ok((
630 Self {
648 Self {
631 patterns,
649 patterns,
632 match_fn,
650 match_fn,
633 prefix,
651 prefix,
634 roots,
652 roots,
635 dirs,
653 dirs,
636 parents,
654 parents,
637 },
655 },
638 warnings,
656 warnings,
639 ))
657 ))
640 }
658 }
641
659
642 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
660 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
643 // TODO cache
661 // TODO cache
644 let thing = self
662 let thing = self
645 .dirs
663 .dirs
646 .iter()
664 .iter()
647 .chain(self.roots.iter())
665 .chain(self.roots.iter())
648 .chain(self.parents.iter());
666 .chain(self.parents.iter());
649 DirsChildrenMultiset::new(thing, Some(&self.parents))
667 DirsChildrenMultiset::new(thing, Some(&self.parents))
650 }
668 }
651 }
669 }
652
670
653 impl<'a> Display for IncludeMatcher<'a> {
671 impl<'a> Display for IncludeMatcher<'a> {
654 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
672 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
655 write!(
673 write!(
656 f,
674 f,
657 "IncludeMatcher(includes='{}')",
675 "IncludeMatcher(includes='{}')",
658 String::from_utf8_lossy(&self.patterns.escaped_bytes())
676 String::from_utf8_lossy(&self.patterns.escaped_bytes())
659 )
677 )
660 }
678 }
661 }
679 }
662
680
663 #[cfg(test)]
681 #[cfg(test)]
664 mod tests {
682 mod tests {
665 use super::*;
683 use super::*;
666 use pretty_assertions::assert_eq;
684 use pretty_assertions::assert_eq;
667 use std::path::Path;
685 use std::path::Path;
668
686
669 #[test]
687 #[test]
670 fn test_roots_and_dirs() {
688 fn test_roots_and_dirs() {
671 let pats = vec![
689 let pats = vec![
672 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
690 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
673 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
691 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
674 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
692 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
675 ];
693 ];
676 let (roots, dirs) = roots_and_dirs(&pats);
694 let (roots, dirs) = roots_and_dirs(&pats);
677
695
678 assert_eq!(
696 assert_eq!(
679 roots,
697 roots,
680 vec!(
698 vec!(
681 HgPathBuf::from_bytes(b"g/h"),
699 HgPathBuf::from_bytes(b"g/h"),
682 HgPathBuf::from_bytes(b"g/h"),
700 HgPathBuf::from_bytes(b"g/h"),
683 HgPathBuf::new()
701 HgPathBuf::new()
684 ),
702 ),
685 );
703 );
686 assert_eq!(dirs, vec!());
704 assert_eq!(dirs, vec!());
687 }
705 }
688
706
689 #[test]
707 #[test]
690 fn test_roots_dirs_and_parents() {
708 fn test_roots_dirs_and_parents() {
691 let pats = vec![
709 let pats = vec![
692 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
710 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
693 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
711 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
694 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
712 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
695 ];
713 ];
696
714
697 let mut roots = HashSet::new();
715 let mut roots = HashSet::new();
698 roots.insert(HgPathBuf::from_bytes(b"g/h"));
716 roots.insert(HgPathBuf::from_bytes(b"g/h"));
699 roots.insert(HgPathBuf::new());
717 roots.insert(HgPathBuf::new());
700
718
701 let dirs = HashSet::new();
719 let dirs = HashSet::new();
702
720
703 let mut parents = HashSet::new();
721 let mut parents = HashSet::new();
704 parents.insert(HgPathBuf::new());
722 parents.insert(HgPathBuf::new());
705 parents.insert(HgPathBuf::from_bytes(b"g"));
723 parents.insert(HgPathBuf::from_bytes(b"g"));
706
724
707 assert_eq!(
725 assert_eq!(
708 roots_dirs_and_parents(&pats).unwrap(),
726 roots_dirs_and_parents(&pats).unwrap(),
709 RootsDirsAndParents {
727 RootsDirsAndParents {
710 roots,
728 roots,
711 dirs,
729 dirs,
712 parents
730 parents
713 }
731 }
714 );
732 );
715 }
733 }
716
734
717 #[test]
735 #[test]
718 fn test_filematcher_visit_children_set() {
736 fn test_filematcher_visit_children_set() {
719 // Visitchildrenset
737 // Visitchildrenset
720 let files = vec![HgPath::new(b"dir/subdir/foo.txt")];
738 let files = vec![HgPath::new(b"dir/subdir/foo.txt")];
721 let matcher = FileMatcher::new(&files).unwrap();
739 let matcher = FileMatcher::new(&files).unwrap();
722
740
723 let mut set = HashSet::new();
741 let mut set = HashSet::new();
724 set.insert(HgPath::new(b"dir"));
742 set.insert(HgPath::new(b"dir"));
725 assert_eq!(
743 assert_eq!(
726 matcher.visit_children_set(HgPath::new(b"")),
744 matcher.visit_children_set(HgPath::new(b"")),
727 VisitChildrenSet::Set(set)
745 VisitChildrenSet::Set(set)
728 );
746 );
729
747
730 let mut set = HashSet::new();
748 let mut set = HashSet::new();
731 set.insert(HgPath::new(b"subdir"));
749 set.insert(HgPath::new(b"subdir"));
732 assert_eq!(
750 assert_eq!(
733 matcher.visit_children_set(HgPath::new(b"dir")),
751 matcher.visit_children_set(HgPath::new(b"dir")),
734 VisitChildrenSet::Set(set)
752 VisitChildrenSet::Set(set)
735 );
753 );
736
754
737 let mut set = HashSet::new();
755 let mut set = HashSet::new();
738 set.insert(HgPath::new(b"foo.txt"));
756 set.insert(HgPath::new(b"foo.txt"));
739 assert_eq!(
757 assert_eq!(
740 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
758 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
741 VisitChildrenSet::Set(set)
759 VisitChildrenSet::Set(set)
742 );
760 );
743
761
744 assert_eq!(
762 assert_eq!(
745 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
763 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
746 VisitChildrenSet::Empty
764 VisitChildrenSet::Empty
747 );
765 );
748 assert_eq!(
766 assert_eq!(
749 matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
767 matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
750 VisitChildrenSet::Empty
768 VisitChildrenSet::Empty
751 );
769 );
752 assert_eq!(
770 assert_eq!(
753 matcher.visit_children_set(HgPath::new(b"folder")),
771 matcher.visit_children_set(HgPath::new(b"folder")),
754 VisitChildrenSet::Empty
772 VisitChildrenSet::Empty
755 );
773 );
756 }
774 }
757
775
758 #[test]
776 #[test]
759 fn test_filematcher_visit_children_set_files_and_dirs() {
777 fn test_filematcher_visit_children_set_files_and_dirs() {
760 let files = vec![
778 let files = vec![
761 HgPath::new(b"rootfile.txt"),
779 HgPath::new(b"rootfile.txt"),
762 HgPath::new(b"a/file1.txt"),
780 HgPath::new(b"a/file1.txt"),
763 HgPath::new(b"a/b/file2.txt"),
781 HgPath::new(b"a/b/file2.txt"),
764 // No file in a/b/c
782 // No file in a/b/c
765 HgPath::new(b"a/b/c/d/file4.txt"),
783 HgPath::new(b"a/b/c/d/file4.txt"),
766 ];
784 ];
767 let matcher = FileMatcher::new(&files).unwrap();
785 let matcher = FileMatcher::new(&files).unwrap();
768
786
769 let mut set = HashSet::new();
787 let mut set = HashSet::new();
770 set.insert(HgPath::new(b"a"));
788 set.insert(HgPath::new(b"a"));
771 set.insert(HgPath::new(b"rootfile.txt"));
789 set.insert(HgPath::new(b"rootfile.txt"));
772 assert_eq!(
790 assert_eq!(
773 matcher.visit_children_set(HgPath::new(b"")),
791 matcher.visit_children_set(HgPath::new(b"")),
774 VisitChildrenSet::Set(set)
792 VisitChildrenSet::Set(set)
775 );
793 );
776
794
777 let mut set = HashSet::new();
795 let mut set = HashSet::new();
778 set.insert(HgPath::new(b"b"));
796 set.insert(HgPath::new(b"b"));
779 set.insert(HgPath::new(b"file1.txt"));
797 set.insert(HgPath::new(b"file1.txt"));
780 assert_eq!(
798 assert_eq!(
781 matcher.visit_children_set(HgPath::new(b"a")),
799 matcher.visit_children_set(HgPath::new(b"a")),
782 VisitChildrenSet::Set(set)
800 VisitChildrenSet::Set(set)
783 );
801 );
784
802
785 let mut set = HashSet::new();
803 let mut set = HashSet::new();
786 set.insert(HgPath::new(b"c"));
804 set.insert(HgPath::new(b"c"));
787 set.insert(HgPath::new(b"file2.txt"));
805 set.insert(HgPath::new(b"file2.txt"));
788 assert_eq!(
806 assert_eq!(
789 matcher.visit_children_set(HgPath::new(b"a/b")),
807 matcher.visit_children_set(HgPath::new(b"a/b")),
790 VisitChildrenSet::Set(set)
808 VisitChildrenSet::Set(set)
791 );
809 );
792
810
793 let mut set = HashSet::new();
811 let mut set = HashSet::new();
794 set.insert(HgPath::new(b"d"));
812 set.insert(HgPath::new(b"d"));
795 assert_eq!(
813 assert_eq!(
796 matcher.visit_children_set(HgPath::new(b"a/b/c")),
814 matcher.visit_children_set(HgPath::new(b"a/b/c")),
797 VisitChildrenSet::Set(set)
815 VisitChildrenSet::Set(set)
798 );
816 );
799 let mut set = HashSet::new();
817 let mut set = HashSet::new();
800 set.insert(HgPath::new(b"file4.txt"));
818 set.insert(HgPath::new(b"file4.txt"));
801 assert_eq!(
819 assert_eq!(
802 matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
820 matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
803 VisitChildrenSet::Set(set)
821 VisitChildrenSet::Set(set)
804 );
822 );
805
823
806 assert_eq!(
824 assert_eq!(
807 matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
825 matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
808 VisitChildrenSet::Empty
826 VisitChildrenSet::Empty
809 );
827 );
810 assert_eq!(
828 assert_eq!(
811 matcher.visit_children_set(HgPath::new(b"folder")),
829 matcher.visit_children_set(HgPath::new(b"folder")),
812 VisitChildrenSet::Empty
830 VisitChildrenSet::Empty
813 );
831 );
814 }
832 }
815
833
816 #[cfg(feature = "with-re2")]
834 #[cfg(feature = "with-re2")]
817 #[test]
835 #[test]
818 fn test_includematcher() {
836 fn test_includematcher() {
819 // VisitchildrensetPrefix
837 // VisitchildrensetPrefix
820 let (matcher, _) = IncludeMatcher::new(
838 let (matcher, _) = IncludeMatcher::new(
821 vec![IgnorePattern::new(
839 vec![IgnorePattern::new(
822 PatternSyntax::RelPath,
840 PatternSyntax::RelPath,
823 b"dir/subdir",
841 b"dir/subdir",
824 Path::new(""),
842 Path::new(""),
825 )],
843 )],
826 "",
844 "",
827 )
845 )
828 .unwrap();
846 .unwrap();
829
847
830 let mut set = HashSet::new();
848 let mut set = HashSet::new();
831 set.insert(HgPath::new(b"dir"));
849 set.insert(HgPath::new(b"dir"));
832 assert_eq!(
850 assert_eq!(
833 matcher.visit_children_set(HgPath::new(b"")),
851 matcher.visit_children_set(HgPath::new(b"")),
834 VisitChildrenSet::Set(set)
852 VisitChildrenSet::Set(set)
835 );
853 );
836
854
837 let mut set = HashSet::new();
855 let mut set = HashSet::new();
838 set.insert(HgPath::new(b"subdir"));
856 set.insert(HgPath::new(b"subdir"));
839 assert_eq!(
857 assert_eq!(
840 matcher.visit_children_set(HgPath::new(b"dir")),
858 matcher.visit_children_set(HgPath::new(b"dir")),
841 VisitChildrenSet::Set(set)
859 VisitChildrenSet::Set(set)
842 );
860 );
843 assert_eq!(
861 assert_eq!(
844 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
862 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
845 VisitChildrenSet::Recursive
863 VisitChildrenSet::Recursive
846 );
864 );
847 // OPT: This should probably be 'all' if its parent is?
865 // OPT: This should probably be 'all' if its parent is?
848 assert_eq!(
866 assert_eq!(
849 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
867 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
850 VisitChildrenSet::This
868 VisitChildrenSet::This
851 );
869 );
852 assert_eq!(
870 assert_eq!(
853 matcher.visit_children_set(HgPath::new(b"folder")),
871 matcher.visit_children_set(HgPath::new(b"folder")),
854 VisitChildrenSet::Empty
872 VisitChildrenSet::Empty
855 );
873 );
856
874
857 // VisitchildrensetRootfilesin
875 // VisitchildrensetRootfilesin
858 let (matcher, _) = IncludeMatcher::new(
876 let (matcher, _) = IncludeMatcher::new(
859 vec![IgnorePattern::new(
877 vec![IgnorePattern::new(
860 PatternSyntax::RootFiles,
878 PatternSyntax::RootFiles,
861 b"dir/subdir",
879 b"dir/subdir",
862 Path::new(""),
880 Path::new(""),
863 )],
881 )],
864 "",
882 "",
865 )
883 )
866 .unwrap();
884 .unwrap();
867
885
868 let mut set = HashSet::new();
886 let mut set = HashSet::new();
869 set.insert(HgPath::new(b"dir"));
887 set.insert(HgPath::new(b"dir"));
870 assert_eq!(
888 assert_eq!(
871 matcher.visit_children_set(HgPath::new(b"")),
889 matcher.visit_children_set(HgPath::new(b"")),
872 VisitChildrenSet::Set(set)
890 VisitChildrenSet::Set(set)
873 );
891 );
874
892
875 let mut set = HashSet::new();
893 let mut set = HashSet::new();
876 set.insert(HgPath::new(b"subdir"));
894 set.insert(HgPath::new(b"subdir"));
877 assert_eq!(
895 assert_eq!(
878 matcher.visit_children_set(HgPath::new(b"dir")),
896 matcher.visit_children_set(HgPath::new(b"dir")),
879 VisitChildrenSet::Set(set)
897 VisitChildrenSet::Set(set)
880 );
898 );
881
899
882 assert_eq!(
900 assert_eq!(
883 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
901 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
884 VisitChildrenSet::This
902 VisitChildrenSet::This
885 );
903 );
886 assert_eq!(
904 assert_eq!(
887 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
905 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
888 VisitChildrenSet::Empty
906 VisitChildrenSet::Empty
889 );
907 );
890 assert_eq!(
908 assert_eq!(
891 matcher.visit_children_set(HgPath::new(b"folder")),
909 matcher.visit_children_set(HgPath::new(b"folder")),
892 VisitChildrenSet::Empty
910 VisitChildrenSet::Empty
893 );
911 );
894
912
895 // VisitchildrensetGlob
913 // VisitchildrensetGlob
896 let (matcher, _) = IncludeMatcher::new(
914 let (matcher, _) = IncludeMatcher::new(
897 vec![IgnorePattern::new(
915 vec![IgnorePattern::new(
898 PatternSyntax::Glob,
916 PatternSyntax::Glob,
899 b"dir/z*",
917 b"dir/z*",
900 Path::new(""),
918 Path::new(""),
901 )],
919 )],
902 "",
920 "",
903 )
921 )
904 .unwrap();
922 .unwrap();
905
923
906 let mut set = HashSet::new();
924 let mut set = HashSet::new();
907 set.insert(HgPath::new(b"dir"));
925 set.insert(HgPath::new(b"dir"));
908 assert_eq!(
926 assert_eq!(
909 matcher.visit_children_set(HgPath::new(b"")),
927 matcher.visit_children_set(HgPath::new(b"")),
910 VisitChildrenSet::Set(set)
928 VisitChildrenSet::Set(set)
911 );
929 );
912 assert_eq!(
930 assert_eq!(
913 matcher.visit_children_set(HgPath::new(b"folder")),
931 matcher.visit_children_set(HgPath::new(b"folder")),
914 VisitChildrenSet::Empty
932 VisitChildrenSet::Empty
915 );
933 );
916 assert_eq!(
934 assert_eq!(
917 matcher.visit_children_set(HgPath::new(b"dir")),
935 matcher.visit_children_set(HgPath::new(b"dir")),
918 VisitChildrenSet::This
936 VisitChildrenSet::This
919 );
937 );
920 // OPT: these should probably be set().
938 // OPT: these should probably be set().
921 assert_eq!(
939 assert_eq!(
922 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
940 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
923 VisitChildrenSet::This
941 VisitChildrenSet::This
924 );
942 );
925 assert_eq!(
943 assert_eq!(
926 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
944 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
927 VisitChildrenSet::This
945 VisitChildrenSet::This
928 );
946 );
929 }
947 }
930 }
948 }
General Comments 0
You need to be logged in to leave comments. Login now