##// END OF EJS Templates
rust-regex: increase the DFA size limit for the `regex` crate...
Raphaël Gomès -
r45286:b15a37d8 stable
parent child Browse files
Show More
@@ -1,926 +1,930 b''
1 // matchers.rs
1 // matchers.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Structs and types for matching files and directories.
8 //! Structs and types for matching files and directories.
9
9
10 #[cfg(feature = "with-re2")]
10 #[cfg(feature = "with-re2")]
11 use crate::re2::Re2;
11 use crate::re2::Re2;
12 use crate::{
12 use crate::{
13 dirstate::dirs_multiset::DirsChildrenMultiset,
13 dirstate::dirs_multiset::DirsChildrenMultiset,
14 filepatterns::{
14 filepatterns::{
15 build_single_regex, filter_subincludes, get_patterns_from_file,
15 build_single_regex, filter_subincludes, get_patterns_from_file,
16 PatternFileWarning, PatternResult, SubInclude,
16 PatternFileWarning, PatternResult, SubInclude,
17 },
17 },
18 utils::{
18 utils::{
19 files::find_dirs,
19 files::find_dirs,
20 hg_path::{HgPath, HgPathBuf},
20 hg_path::{HgPath, HgPathBuf},
21 Escaped,
21 Escaped,
22 },
22 },
23 DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
23 DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
24 PatternSyntax,
24 PatternSyntax,
25 };
25 };
26
26
27 use std::borrow::ToOwned;
27 use std::borrow::ToOwned;
28 use std::collections::HashSet;
28 use std::collections::HashSet;
29 use std::fmt::{Display, Error, Formatter};
29 use std::fmt::{Display, Error, Formatter};
30 use std::iter::FromIterator;
30 use std::iter::FromIterator;
31 use std::ops::Deref;
31 use std::ops::Deref;
32 use std::path::{Path, PathBuf};
32 use std::path::{Path, PathBuf};
33
33
34 #[derive(Debug, PartialEq)]
34 #[derive(Debug, PartialEq)]
35 pub enum VisitChildrenSet<'a> {
35 pub enum VisitChildrenSet<'a> {
36 /// Don't visit anything
36 /// Don't visit anything
37 Empty,
37 Empty,
38 /// Only visit this directory
38 /// Only visit this directory
39 This,
39 This,
40 /// Visit this directory and these subdirectories
40 /// Visit this directory and these subdirectories
41 /// TODO Should we implement a `NonEmptyHashSet`?
41 /// TODO Should we implement a `NonEmptyHashSet`?
42 Set(HashSet<&'a HgPath>),
42 Set(HashSet<&'a HgPath>),
43 /// Visit this directory and all subdirectories
43 /// Visit this directory and all subdirectories
44 Recursive,
44 Recursive,
45 }
45 }
46
46
47 pub trait Matcher {
47 pub trait Matcher {
48 /// Explicitly listed files
48 /// Explicitly listed files
49 fn file_set(&self) -> Option<&HashSet<&HgPath>>;
49 fn file_set(&self) -> Option<&HashSet<&HgPath>>;
50 /// Returns whether `filename` is in `file_set`
50 /// Returns whether `filename` is in `file_set`
51 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool;
51 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool;
52 /// Returns whether `filename` is matched by this matcher
52 /// Returns whether `filename` is matched by this matcher
53 fn matches(&self, filename: impl AsRef<HgPath>) -> bool;
53 fn matches(&self, filename: impl AsRef<HgPath>) -> bool;
54 /// Decides whether a directory should be visited based on whether it
54 /// Decides whether a directory should be visited based on whether it
55 /// has potential matches in it or one of its subdirectories, and
55 /// has potential matches in it or one of its subdirectories, and
56 /// potentially lists which subdirectories of that directory should be
56 /// potentially lists which subdirectories of that directory should be
57 /// visited. This is based on the match's primary, included, and excluded
57 /// visited. This is based on the match's primary, included, and excluded
58 /// patterns.
58 /// patterns.
59 ///
59 ///
60 /// # Example
60 /// # Example
61 ///
61 ///
62 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
62 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
63 /// return the following values (assuming the implementation of
63 /// return the following values (assuming the implementation of
64 /// visit_children_set is capable of recognizing this; some implementations
64 /// visit_children_set is capable of recognizing this; some implementations
65 /// are not).
65 /// are not).
66 ///
66 ///
67 /// ```text
67 /// ```text
68 /// ```ignore
68 /// ```ignore
69 /// '' -> {'foo', 'qux'}
69 /// '' -> {'foo', 'qux'}
70 /// 'baz' -> set()
70 /// 'baz' -> set()
71 /// 'foo' -> {'bar'}
71 /// 'foo' -> {'bar'}
72 /// // Ideally this would be `Recursive`, but since the prefix nature of
72 /// // Ideally this would be `Recursive`, but since the prefix nature of
73 /// // matchers is applied to the entire matcher, we have to downgrade this
73 /// // matchers is applied to the entire matcher, we have to downgrade this
74 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
74 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
75 /// // `RootFilesIn'-kind matcher being mixed in.
75 /// // `RootFilesIn'-kind matcher being mixed in.
76 /// 'foo/bar' -> 'this'
76 /// 'foo/bar' -> 'this'
77 /// 'qux' -> 'this'
77 /// 'qux' -> 'this'
78 /// ```
78 /// ```
79 /// # Important
79 /// # Important
80 ///
80 ///
81 /// Most matchers do not know if they're representing files or
81 /// Most matchers do not know if they're representing files or
82 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
82 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
83 /// file or a directory, so `visit_children_set('dir')` for most matchers
83 /// file or a directory, so `visit_children_set('dir')` for most matchers
84 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
84 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
85 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
85 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
86 /// it may return `VisitChildrenSet::This`.
86 /// it may return `VisitChildrenSet::This`.
87 /// Do not rely on the return being a `HashSet` indicating that there are
87 /// Do not rely on the return being a `HashSet` indicating that there are
88 /// no files in this dir to investigate (or equivalently that if there are
88 /// no files in this dir to investigate (or equivalently that if there are
89 /// files to investigate in 'dir' that it will always return
89 /// files to investigate in 'dir' that it will always return
90 /// `VisitChildrenSet::This`).
90 /// `VisitChildrenSet::This`).
91 fn visit_children_set(
91 fn visit_children_set(
92 &self,
92 &self,
93 directory: impl AsRef<HgPath>,
93 directory: impl AsRef<HgPath>,
94 ) -> VisitChildrenSet;
94 ) -> VisitChildrenSet;
95 /// Matcher will match everything and `files_set()` will be empty:
95 /// Matcher will match everything and `files_set()` will be empty:
96 /// optimization might be possible.
96 /// optimization might be possible.
97 fn matches_everything(&self) -> bool;
97 fn matches_everything(&self) -> bool;
98 /// Matcher will match exactly the files in `files_set()`: optimization
98 /// Matcher will match exactly the files in `files_set()`: optimization
99 /// might be possible.
99 /// might be possible.
100 fn is_exact(&self) -> bool;
100 fn is_exact(&self) -> bool;
101 }
101 }
102
102
103 /// Matches everything.
103 /// Matches everything.
104 ///```
104 ///```
105 /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
105 /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
106 ///
106 ///
107 /// let matcher = AlwaysMatcher;
107 /// let matcher = AlwaysMatcher;
108 ///
108 ///
109 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
109 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
110 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
110 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
111 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
111 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
112 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
112 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
113 /// ```
113 /// ```
114 #[derive(Debug)]
114 #[derive(Debug)]
115 pub struct AlwaysMatcher;
115 pub struct AlwaysMatcher;
116
116
117 impl Matcher for AlwaysMatcher {
117 impl Matcher for AlwaysMatcher {
118 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
118 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
119 None
119 None
120 }
120 }
121 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
121 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
122 false
122 false
123 }
123 }
124 fn matches(&self, _filename: impl AsRef<HgPath>) -> bool {
124 fn matches(&self, _filename: impl AsRef<HgPath>) -> bool {
125 true
125 true
126 }
126 }
127 fn visit_children_set(
127 fn visit_children_set(
128 &self,
128 &self,
129 _directory: impl AsRef<HgPath>,
129 _directory: impl AsRef<HgPath>,
130 ) -> VisitChildrenSet {
130 ) -> VisitChildrenSet {
131 VisitChildrenSet::Recursive
131 VisitChildrenSet::Recursive
132 }
132 }
133 fn matches_everything(&self) -> bool {
133 fn matches_everything(&self) -> bool {
134 true
134 true
135 }
135 }
136 fn is_exact(&self) -> bool {
136 fn is_exact(&self) -> bool {
137 false
137 false
138 }
138 }
139 }
139 }
140
140
141 /// Matches the input files exactly. They are interpreted as paths, not
141 /// Matches the input files exactly. They are interpreted as paths, not
142 /// patterns.
142 /// patterns.
143 ///
143 ///
144 ///```
144 ///```
145 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::HgPath };
145 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::HgPath };
146 ///
146 ///
147 /// let files = [HgPath::new(b"a.txt"), HgPath::new(br"re:.*\.c$")];
147 /// let files = [HgPath::new(b"a.txt"), HgPath::new(br"re:.*\.c$")];
148 /// let matcher = FileMatcher::new(&files).unwrap();
148 /// let matcher = FileMatcher::new(&files).unwrap();
149 ///
149 ///
150 /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
150 /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
151 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
151 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
152 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
152 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
153 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
153 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
154 /// ```
154 /// ```
155 #[derive(Debug)]
155 #[derive(Debug)]
156 pub struct FileMatcher<'a> {
156 pub struct FileMatcher<'a> {
157 files: HashSet<&'a HgPath>,
157 files: HashSet<&'a HgPath>,
158 dirs: DirsMultiset,
158 dirs: DirsMultiset,
159 }
159 }
160
160
161 impl<'a> FileMatcher<'a> {
161 impl<'a> FileMatcher<'a> {
162 pub fn new(
162 pub fn new(
163 files: &'a [impl AsRef<HgPath>],
163 files: &'a [impl AsRef<HgPath>],
164 ) -> Result<Self, DirstateMapError> {
164 ) -> Result<Self, DirstateMapError> {
165 Ok(Self {
165 Ok(Self {
166 files: HashSet::from_iter(files.iter().map(|f| f.as_ref())),
166 files: HashSet::from_iter(files.iter().map(|f| f.as_ref())),
167 dirs: DirsMultiset::from_manifest(files)?,
167 dirs: DirsMultiset::from_manifest(files)?,
168 })
168 })
169 }
169 }
170 fn inner_matches(&self, filename: impl AsRef<HgPath>) -> bool {
170 fn inner_matches(&self, filename: impl AsRef<HgPath>) -> bool {
171 self.files.contains(filename.as_ref())
171 self.files.contains(filename.as_ref())
172 }
172 }
173 }
173 }
174
174
175 impl<'a> Matcher for FileMatcher<'a> {
175 impl<'a> Matcher for FileMatcher<'a> {
176 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
176 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
177 Some(&self.files)
177 Some(&self.files)
178 }
178 }
179 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool {
179 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool {
180 self.inner_matches(filename)
180 self.inner_matches(filename)
181 }
181 }
182 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
182 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
183 self.inner_matches(filename)
183 self.inner_matches(filename)
184 }
184 }
185 fn visit_children_set(
185 fn visit_children_set(
186 &self,
186 &self,
187 directory: impl AsRef<HgPath>,
187 directory: impl AsRef<HgPath>,
188 ) -> VisitChildrenSet {
188 ) -> VisitChildrenSet {
189 if self.files.is_empty() || !self.dirs.contains(&directory) {
189 if self.files.is_empty() || !self.dirs.contains(&directory) {
190 return VisitChildrenSet::Empty;
190 return VisitChildrenSet::Empty;
191 }
191 }
192 let dirs_as_set = self.dirs.iter().map(|k| k.deref()).collect();
192 let dirs_as_set = self.dirs.iter().map(|k| k.deref()).collect();
193
193
194 let mut candidates: HashSet<&HgPath> =
194 let mut candidates: HashSet<&HgPath> =
195 self.files.union(&dirs_as_set).map(|k| *k).collect();
195 self.files.union(&dirs_as_set).map(|k| *k).collect();
196 candidates.remove(HgPath::new(b""));
196 candidates.remove(HgPath::new(b""));
197
197
198 if !directory.as_ref().is_empty() {
198 if !directory.as_ref().is_empty() {
199 let directory = [directory.as_ref().as_bytes(), b"/"].concat();
199 let directory = [directory.as_ref().as_bytes(), b"/"].concat();
200 candidates = candidates
200 candidates = candidates
201 .iter()
201 .iter()
202 .filter_map(|c| {
202 .filter_map(|c| {
203 if c.as_bytes().starts_with(&directory) {
203 if c.as_bytes().starts_with(&directory) {
204 Some(HgPath::new(&c.as_bytes()[directory.len()..]))
204 Some(HgPath::new(&c.as_bytes()[directory.len()..]))
205 } else {
205 } else {
206 None
206 None
207 }
207 }
208 })
208 })
209 .collect();
209 .collect();
210 }
210 }
211
211
212 // `self.dirs` includes all of the directories, recursively, so if
212 // `self.dirs` includes all of the directories, recursively, so if
213 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
213 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
214 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
214 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
215 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
215 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
216 // subdir will be in there without a slash.
216 // subdir will be in there without a slash.
217 VisitChildrenSet::Set(
217 VisitChildrenSet::Set(
218 candidates
218 candidates
219 .iter()
219 .iter()
220 .filter_map(|c| {
220 .filter_map(|c| {
221 if c.bytes().all(|b| *b != b'/') {
221 if c.bytes().all(|b| *b != b'/') {
222 Some(*c)
222 Some(*c)
223 } else {
223 } else {
224 None
224 None
225 }
225 }
226 })
226 })
227 .collect(),
227 .collect(),
228 )
228 )
229 }
229 }
230 fn matches_everything(&self) -> bool {
230 fn matches_everything(&self) -> bool {
231 false
231 false
232 }
232 }
233 fn is_exact(&self) -> bool {
233 fn is_exact(&self) -> bool {
234 true
234 true
235 }
235 }
236 }
236 }
237
237
238 /// Matches files that are included in the ignore rules.
238 /// Matches files that are included in the ignore rules.
239 #[cfg_attr(
239 #[cfg_attr(
240 feature = "with-re2",
240 feature = "with-re2",
241 doc = r##"
241 doc = r##"
242 ```
242 ```
243 use hg::{
243 use hg::{
244 matchers::{IncludeMatcher, Matcher},
244 matchers::{IncludeMatcher, Matcher},
245 IgnorePattern,
245 IgnorePattern,
246 PatternSyntax,
246 PatternSyntax,
247 utils::hg_path::HgPath
247 utils::hg_path::HgPath
248 };
248 };
249 use std::path::Path;
249 use std::path::Path;
250 ///
250 ///
251 let ignore_patterns =
251 let ignore_patterns =
252 vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
252 vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
253 let (matcher, _) = IncludeMatcher::new(ignore_patterns, "").unwrap();
253 let (matcher, _) = IncludeMatcher::new(ignore_patterns, "").unwrap();
254 ///
254 ///
255 assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
255 assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
256 assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
256 assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
257 assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
257 assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
258 assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
258 assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
259 ```
259 ```
260 "##
260 "##
261 )]
261 )]
262 pub struct IncludeMatcher<'a> {
262 pub struct IncludeMatcher<'a> {
263 patterns: Vec<u8>,
263 patterns: Vec<u8>,
264 match_fn: Box<dyn for<'r> Fn(&'r HgPath) -> bool + 'a + Sync>,
264 match_fn: Box<dyn for<'r> Fn(&'r HgPath) -> bool + 'a + Sync>,
265 /// Whether all the patterns match a prefix (i.e. recursively)
265 /// Whether all the patterns match a prefix (i.e. recursively)
266 prefix: bool,
266 prefix: bool,
267 roots: HashSet<HgPathBuf>,
267 roots: HashSet<HgPathBuf>,
268 dirs: HashSet<HgPathBuf>,
268 dirs: HashSet<HgPathBuf>,
269 parents: HashSet<HgPathBuf>,
269 parents: HashSet<HgPathBuf>,
270 }
270 }
271
271
272 impl<'a> Matcher for IncludeMatcher<'a> {
272 impl<'a> Matcher for IncludeMatcher<'a> {
273 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
273 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
274 None
274 None
275 }
275 }
276
276
277 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
277 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
278 false
278 false
279 }
279 }
280
280
281 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
281 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
282 (self.match_fn)(filename.as_ref())
282 (self.match_fn)(filename.as_ref())
283 }
283 }
284
284
285 fn visit_children_set(
285 fn visit_children_set(
286 &self,
286 &self,
287 directory: impl AsRef<HgPath>,
287 directory: impl AsRef<HgPath>,
288 ) -> VisitChildrenSet {
288 ) -> VisitChildrenSet {
289 let dir = directory.as_ref();
289 let dir = directory.as_ref();
290 if self.prefix && self.roots.contains(dir) {
290 if self.prefix && self.roots.contains(dir) {
291 return VisitChildrenSet::Recursive;
291 return VisitChildrenSet::Recursive;
292 }
292 }
293 if self.roots.contains(HgPath::new(b""))
293 if self.roots.contains(HgPath::new(b""))
294 || self.roots.contains(dir)
294 || self.roots.contains(dir)
295 || self.dirs.contains(dir)
295 || self.dirs.contains(dir)
296 || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
296 || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
297 {
297 {
298 return VisitChildrenSet::This;
298 return VisitChildrenSet::This;
299 }
299 }
300
300
301 if self.parents.contains(directory.as_ref()) {
301 if self.parents.contains(directory.as_ref()) {
302 let multiset = self.get_all_parents_children();
302 let multiset = self.get_all_parents_children();
303 if let Some(children) = multiset.get(dir) {
303 if let Some(children) = multiset.get(dir) {
304 return VisitChildrenSet::Set(children.to_owned());
304 return VisitChildrenSet::Set(children.to_owned());
305 }
305 }
306 }
306 }
307 VisitChildrenSet::Empty
307 VisitChildrenSet::Empty
308 }
308 }
309
309
310 fn matches_everything(&self) -> bool {
310 fn matches_everything(&self) -> bool {
311 false
311 false
312 }
312 }
313
313
314 fn is_exact(&self) -> bool {
314 fn is_exact(&self) -> bool {
315 false
315 false
316 }
316 }
317 }
317 }
318
318
319 #[cfg(feature = "with-re2")]
319 #[cfg(feature = "with-re2")]
320 /// Returns a function that matches an `HgPath` against the given regex
320 /// Returns a function that matches an `HgPath` against the given regex
321 /// pattern.
321 /// pattern.
322 ///
322 ///
323 /// This can fail when the pattern is invalid or not supported by the
323 /// This can fail when the pattern is invalid or not supported by the
324 /// underlying engine `Re2`, for instance anything with back-references.
324 /// underlying engine `Re2`, for instance anything with back-references.
325 fn re_matcher(
325 fn re_matcher(
326 pattern: &[u8],
326 pattern: &[u8],
327 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
327 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
328 let regex = Re2::new(pattern);
328 let regex = Re2::new(pattern);
329 let regex = regex.map_err(|e| PatternError::UnsupportedSyntax(e))?;
329 let regex = regex.map_err(|e| PatternError::UnsupportedSyntax(e))?;
330 Ok(move |path: &HgPath| regex.is_match(path.as_bytes()))
330 Ok(move |path: &HgPath| regex.is_match(path.as_bytes()))
331 }
331 }
332
332
333 #[cfg(not(feature = "with-re2"))]
333 #[cfg(not(feature = "with-re2"))]
334 /// Returns a function that matches an `HgPath` against the given regex
334 /// Returns a function that matches an `HgPath` against the given regex
335 /// pattern.
335 /// pattern.
336 ///
336 ///
337 /// This can fail when the pattern is invalid or not supported by the
337 /// This can fail when the pattern is invalid or not supported by the
338 /// underlying engine (the `regex` crate), for instance anything with
338 /// underlying engine (the `regex` crate), for instance anything with
339 /// back-references.
339 /// back-references.
340 fn re_matcher(
340 fn re_matcher(
341 pattern: &[u8],
341 pattern: &[u8],
342 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
342 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
343 use std::io::Write;
343 use std::io::Write;
344
344
345 let mut escaped_bytes = vec![];
345 let mut escaped_bytes = vec![];
346 for byte in pattern {
346 for byte in pattern {
347 if *byte > 127 {
347 if *byte > 127 {
348 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
348 write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
349 } else {
349 } else {
350 escaped_bytes.push(*byte);
350 escaped_bytes.push(*byte);
351 }
351 }
352 }
352 }
353
353
354 // Avoid the cost of UTF8 checking
354 // Avoid the cost of UTF8 checking
355 //
355 //
356 // # Safety
356 // # Safety
357 // This is safe because we escaped all non-ASCII bytes.
357 // This is safe because we escaped all non-ASCII bytes.
358 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
358 let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
359 let re = regex::bytes::RegexBuilder::new(&pattern_string)
359 let re = regex::bytes::RegexBuilder::new(&pattern_string)
360 .unicode(false)
360 .unicode(false)
361 // Big repos with big `.hgignore` will hit the default limit and
362 // incur a significant performance hit. One repo's `hg status` hit
363 // multiple *minutes*.
364 .dfa_size_limit(50 * (1 << 20))
361 .build()
365 .build()
362 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
366 .map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
363
367
364 Ok(move |path: &HgPath| re.is_match(path.as_bytes()))
368 Ok(move |path: &HgPath| re.is_match(path.as_bytes()))
365 }
369 }
366
370
367 /// Returns the regex pattern and a function that matches an `HgPath` against
371 /// Returns the regex pattern and a function that matches an `HgPath` against
368 /// said regex formed by the given ignore patterns.
372 /// said regex formed by the given ignore patterns.
369 fn build_regex_match<'a>(
373 fn build_regex_match<'a>(
370 ignore_patterns: &'a [&'a IgnorePattern],
374 ignore_patterns: &'a [&'a IgnorePattern],
371 ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)> {
375 ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)> {
372 let regexps: Result<Vec<_>, PatternError> = ignore_patterns
376 let regexps: Result<Vec<_>, PatternError> = ignore_patterns
373 .into_iter()
377 .into_iter()
374 .map(|k| build_single_regex(*k))
378 .map(|k| build_single_regex(*k))
375 .collect();
379 .collect();
376 let regexps = regexps?;
380 let regexps = regexps?;
377 let full_regex = regexps.join(&b'|');
381 let full_regex = regexps.join(&b'|');
378
382
379 let matcher = re_matcher(&full_regex)?;
383 let matcher = re_matcher(&full_regex)?;
380 let func = Box::new(move |filename: &HgPath| matcher(filename));
384 let func = Box::new(move |filename: &HgPath| matcher(filename));
381
385
382 Ok((full_regex, func))
386 Ok((full_regex, func))
383 }
387 }
384
388
385 /// Returns roots and directories corresponding to each pattern.
389 /// Returns roots and directories corresponding to each pattern.
386 ///
390 ///
387 /// This calculates the roots and directories exactly matching the patterns and
391 /// This calculates the roots and directories exactly matching the patterns and
388 /// returns a tuple of (roots, dirs). It does not return other directories
392 /// returns a tuple of (roots, dirs). It does not return other directories
389 /// which may also need to be considered, like the parent directories.
393 /// which may also need to be considered, like the parent directories.
390 fn roots_and_dirs(
394 fn roots_and_dirs(
391 ignore_patterns: &[IgnorePattern],
395 ignore_patterns: &[IgnorePattern],
392 ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
396 ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
393 let mut roots = Vec::new();
397 let mut roots = Vec::new();
394 let mut dirs = Vec::new();
398 let mut dirs = Vec::new();
395
399
396 for ignore_pattern in ignore_patterns {
400 for ignore_pattern in ignore_patterns {
397 let IgnorePattern {
401 let IgnorePattern {
398 syntax, pattern, ..
402 syntax, pattern, ..
399 } = ignore_pattern;
403 } = ignore_pattern;
400 match syntax {
404 match syntax {
401 PatternSyntax::RootGlob | PatternSyntax::Glob => {
405 PatternSyntax::RootGlob | PatternSyntax::Glob => {
402 let mut root = vec![];
406 let mut root = vec![];
403
407
404 for p in pattern.split(|c| *c == b'/') {
408 for p in pattern.split(|c| *c == b'/') {
405 if p.iter().any(|c| match *c {
409 if p.iter().any(|c| match *c {
406 b'[' | b'{' | b'*' | b'?' => true,
410 b'[' | b'{' | b'*' | b'?' => true,
407 _ => false,
411 _ => false,
408 }) {
412 }) {
409 break;
413 break;
410 }
414 }
411 root.push(HgPathBuf::from_bytes(p));
415 root.push(HgPathBuf::from_bytes(p));
412 }
416 }
413 let buf =
417 let buf =
414 root.iter().fold(HgPathBuf::new(), |acc, r| acc.join(r));
418 root.iter().fold(HgPathBuf::new(), |acc, r| acc.join(r));
415 roots.push(buf);
419 roots.push(buf);
416 }
420 }
417 PatternSyntax::Path | PatternSyntax::RelPath => {
421 PatternSyntax::Path | PatternSyntax::RelPath => {
418 let pat = HgPath::new(if pattern == b"." {
422 let pat = HgPath::new(if pattern == b"." {
419 &[] as &[u8]
423 &[] as &[u8]
420 } else {
424 } else {
421 pattern
425 pattern
422 });
426 });
423 roots.push(pat.to_owned());
427 roots.push(pat.to_owned());
424 }
428 }
425 PatternSyntax::RootFiles => {
429 PatternSyntax::RootFiles => {
426 let pat = if pattern == b"." {
430 let pat = if pattern == b"." {
427 &[] as &[u8]
431 &[] as &[u8]
428 } else {
432 } else {
429 pattern
433 pattern
430 };
434 };
431 dirs.push(HgPathBuf::from_bytes(pat));
435 dirs.push(HgPathBuf::from_bytes(pat));
432 }
436 }
433 _ => {
437 _ => {
434 roots.push(HgPathBuf::new());
438 roots.push(HgPathBuf::new());
435 }
439 }
436 }
440 }
437 }
441 }
438 (roots, dirs)
442 (roots, dirs)
439 }
443 }
440
444
441 /// Paths extracted from patterns
445 /// Paths extracted from patterns
442 #[derive(Debug, PartialEq)]
446 #[derive(Debug, PartialEq)]
443 struct RootsDirsAndParents {
447 struct RootsDirsAndParents {
444 /// Directories to match recursively
448 /// Directories to match recursively
445 pub roots: HashSet<HgPathBuf>,
449 pub roots: HashSet<HgPathBuf>,
446 /// Directories to match non-recursively
450 /// Directories to match non-recursively
447 pub dirs: HashSet<HgPathBuf>,
451 pub dirs: HashSet<HgPathBuf>,
448 /// Implicitly required directories to go to items in either roots or dirs
452 /// Implicitly required directories to go to items in either roots or dirs
449 pub parents: HashSet<HgPathBuf>,
453 pub parents: HashSet<HgPathBuf>,
450 }
454 }
451
455
452 /// Extract roots, dirs and parents from patterns.
456 /// Extract roots, dirs and parents from patterns.
453 fn roots_dirs_and_parents(
457 fn roots_dirs_and_parents(
454 ignore_patterns: &[IgnorePattern],
458 ignore_patterns: &[IgnorePattern],
455 ) -> PatternResult<RootsDirsAndParents> {
459 ) -> PatternResult<RootsDirsAndParents> {
456 let (roots, dirs) = roots_and_dirs(ignore_patterns);
460 let (roots, dirs) = roots_and_dirs(ignore_patterns);
457
461
458 let mut parents = HashSet::new();
462 let mut parents = HashSet::new();
459
463
460 parents.extend(
464 parents.extend(
461 DirsMultiset::from_manifest(&dirs)
465 DirsMultiset::from_manifest(&dirs)
462 .map_err(|e| match e {
466 .map_err(|e| match e {
463 DirstateMapError::InvalidPath(e) => e,
467 DirstateMapError::InvalidPath(e) => e,
464 _ => unreachable!(),
468 _ => unreachable!(),
465 })?
469 })?
466 .iter()
470 .iter()
467 .map(|k| k.to_owned()),
471 .map(|k| k.to_owned()),
468 );
472 );
469 parents.extend(
473 parents.extend(
470 DirsMultiset::from_manifest(&roots)
474 DirsMultiset::from_manifest(&roots)
471 .map_err(|e| match e {
475 .map_err(|e| match e {
472 DirstateMapError::InvalidPath(e) => e,
476 DirstateMapError::InvalidPath(e) => e,
473 _ => unreachable!(),
477 _ => unreachable!(),
474 })?
478 })?
475 .iter()
479 .iter()
476 .map(|k| k.to_owned()),
480 .map(|k| k.to_owned()),
477 );
481 );
478
482
479 Ok(RootsDirsAndParents {
483 Ok(RootsDirsAndParents {
480 roots: HashSet::from_iter(roots),
484 roots: HashSet::from_iter(roots),
481 dirs: HashSet::from_iter(dirs),
485 dirs: HashSet::from_iter(dirs),
482 parents,
486 parents,
483 })
487 })
484 }
488 }
485
489
486 /// Returns a function that checks whether a given file (in the general sense)
490 /// Returns a function that checks whether a given file (in the general sense)
487 /// should be matched.
491 /// should be matched.
488 fn build_match<'a, 'b>(
492 fn build_match<'a, 'b>(
489 ignore_patterns: &'a [IgnorePattern],
493 ignore_patterns: &'a [IgnorePattern],
490 root_dir: impl AsRef<Path>,
494 root_dir: impl AsRef<Path>,
491 ) -> PatternResult<(
495 ) -> PatternResult<(
492 Vec<u8>,
496 Vec<u8>,
493 Box<dyn Fn(&HgPath) -> bool + 'b + Sync>,
497 Box<dyn Fn(&HgPath) -> bool + 'b + Sync>,
494 Vec<PatternFileWarning>,
498 Vec<PatternFileWarning>,
495 )> {
499 )> {
496 let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![];
500 let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![];
497 // For debugging and printing
501 // For debugging and printing
498 let mut patterns = vec![];
502 let mut patterns = vec![];
499 let mut all_warnings = vec![];
503 let mut all_warnings = vec![];
500
504
501 let (subincludes, ignore_patterns) =
505 let (subincludes, ignore_patterns) =
502 filter_subincludes(ignore_patterns, root_dir)?;
506 filter_subincludes(ignore_patterns, root_dir)?;
503
507
504 if !subincludes.is_empty() {
508 if !subincludes.is_empty() {
505 // Build prefix-based matcher functions for subincludes
509 // Build prefix-based matcher functions for subincludes
506 let mut submatchers = FastHashMap::default();
510 let mut submatchers = FastHashMap::default();
507 let mut prefixes = vec![];
511 let mut prefixes = vec![];
508
512
509 for SubInclude { prefix, root, path } in subincludes.into_iter() {
513 for SubInclude { prefix, root, path } in subincludes.into_iter() {
510 let (match_fn, warnings) =
514 let (match_fn, warnings) =
511 get_ignore_function(vec![path.to_path_buf()], root)?;
515 get_ignore_function(vec![path.to_path_buf()], root)?;
512 all_warnings.extend(warnings);
516 all_warnings.extend(warnings);
513 prefixes.push(prefix.to_owned());
517 prefixes.push(prefix.to_owned());
514 submatchers.insert(prefix.to_owned(), match_fn);
518 submatchers.insert(prefix.to_owned(), match_fn);
515 }
519 }
516
520
517 let match_subinclude = move |filename: &HgPath| {
521 let match_subinclude = move |filename: &HgPath| {
518 for prefix in prefixes.iter() {
522 for prefix in prefixes.iter() {
519 if let Some(rel) = filename.relative_to(prefix) {
523 if let Some(rel) = filename.relative_to(prefix) {
520 if (submatchers.get(prefix).unwrap())(rel) {
524 if (submatchers.get(prefix).unwrap())(rel) {
521 return true;
525 return true;
522 }
526 }
523 }
527 }
524 }
528 }
525 false
529 false
526 };
530 };
527
531
528 match_funcs.push(Box::new(match_subinclude));
532 match_funcs.push(Box::new(match_subinclude));
529 }
533 }
530
534
531 if !ignore_patterns.is_empty() {
535 if !ignore_patterns.is_empty() {
532 // Either do dumb matching if all patterns are rootfiles, or match
536 // Either do dumb matching if all patterns are rootfiles, or match
533 // with a regex.
537 // with a regex.
534 if ignore_patterns
538 if ignore_patterns
535 .iter()
539 .iter()
536 .all(|k| k.syntax == PatternSyntax::RootFiles)
540 .all(|k| k.syntax == PatternSyntax::RootFiles)
537 {
541 {
538 let dirs: HashSet<_> = ignore_patterns
542 let dirs: HashSet<_> = ignore_patterns
539 .iter()
543 .iter()
540 .map(|k| k.pattern.to_owned())
544 .map(|k| k.pattern.to_owned())
541 .collect();
545 .collect();
542 let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
546 let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
543
547
544 let match_func = move |path: &HgPath| -> bool {
548 let match_func = move |path: &HgPath| -> bool {
545 let path = path.as_bytes();
549 let path = path.as_bytes();
546 let i = path.iter().rfind(|a| **a == b'/');
550 let i = path.iter().rfind(|a| **a == b'/');
547 let dir = if let Some(i) = i {
551 let dir = if let Some(i) = i {
548 &path[..*i as usize]
552 &path[..*i as usize]
549 } else {
553 } else {
550 b"."
554 b"."
551 };
555 };
552 dirs.contains(dir.deref())
556 dirs.contains(dir.deref())
553 };
557 };
554 match_funcs.push(Box::new(match_func));
558 match_funcs.push(Box::new(match_func));
555
559
556 patterns.extend(b"rootfilesin: ");
560 patterns.extend(b"rootfilesin: ");
557 dirs_vec.sort();
561 dirs_vec.sort();
558 patterns.extend(dirs_vec.escaped_bytes());
562 patterns.extend(dirs_vec.escaped_bytes());
559 } else {
563 } else {
560 let (new_re, match_func) = build_regex_match(&ignore_patterns)?;
564 let (new_re, match_func) = build_regex_match(&ignore_patterns)?;
561 patterns = new_re;
565 patterns = new_re;
562 match_funcs.push(match_func)
566 match_funcs.push(match_func)
563 }
567 }
564 }
568 }
565
569
566 Ok(if match_funcs.len() == 1 {
570 Ok(if match_funcs.len() == 1 {
567 (patterns, match_funcs.remove(0), all_warnings)
571 (patterns, match_funcs.remove(0), all_warnings)
568 } else {
572 } else {
569 (
573 (
570 patterns,
574 patterns,
571 Box::new(move |f: &HgPath| -> bool {
575 Box::new(move |f: &HgPath| -> bool {
572 match_funcs.iter().any(|match_func| match_func(f))
576 match_funcs.iter().any(|match_func| match_func(f))
573 }),
577 }),
574 all_warnings,
578 all_warnings,
575 )
579 )
576 })
580 })
577 }
581 }
578
582
579 /// Parses all "ignore" files with their recursive includes and returns a
583 /// Parses all "ignore" files with their recursive includes and returns a
580 /// function that checks whether a given file (in the general sense) should be
584 /// function that checks whether a given file (in the general sense) should be
581 /// ignored.
585 /// ignored.
582 pub fn get_ignore_function<'a>(
586 pub fn get_ignore_function<'a>(
583 all_pattern_files: Vec<PathBuf>,
587 all_pattern_files: Vec<PathBuf>,
584 root_dir: impl AsRef<Path>,
588 root_dir: impl AsRef<Path>,
585 ) -> PatternResult<(
589 ) -> PatternResult<(
586 Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>,
590 Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>,
587 Vec<PatternFileWarning>,
591 Vec<PatternFileWarning>,
588 )> {
592 )> {
589 let mut all_patterns = vec![];
593 let mut all_patterns = vec![];
590 let mut all_warnings = vec![];
594 let mut all_warnings = vec![];
591
595
592 for pattern_file in all_pattern_files.into_iter() {
596 for pattern_file in all_pattern_files.into_iter() {
593 let (patterns, warnings) =
597 let (patterns, warnings) =
594 get_patterns_from_file(pattern_file, &root_dir)?;
598 get_patterns_from_file(pattern_file, &root_dir)?;
595
599
596 all_patterns.extend(patterns.to_owned());
600 all_patterns.extend(patterns.to_owned());
597 all_warnings.extend(warnings);
601 all_warnings.extend(warnings);
598 }
602 }
599 let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?;
603 let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?;
600 all_warnings.extend(warnings);
604 all_warnings.extend(warnings);
601 Ok((
605 Ok((
602 Box::new(move |path: &HgPath| matcher.matches(path)),
606 Box::new(move |path: &HgPath| matcher.matches(path)),
603 all_warnings,
607 all_warnings,
604 ))
608 ))
605 }
609 }
606
610
607 impl<'a> IncludeMatcher<'a> {
611 impl<'a> IncludeMatcher<'a> {
608 pub fn new(
612 pub fn new(
609 ignore_patterns: Vec<IgnorePattern>,
613 ignore_patterns: Vec<IgnorePattern>,
610 root_dir: impl AsRef<Path>,
614 root_dir: impl AsRef<Path>,
611 ) -> PatternResult<(Self, Vec<PatternFileWarning>)> {
615 ) -> PatternResult<(Self, Vec<PatternFileWarning>)> {
612 let (patterns, match_fn, warnings) =
616 let (patterns, match_fn, warnings) =
613 build_match(&ignore_patterns, root_dir)?;
617 build_match(&ignore_patterns, root_dir)?;
614 let RootsDirsAndParents {
618 let RootsDirsAndParents {
615 roots,
619 roots,
616 dirs,
620 dirs,
617 parents,
621 parents,
618 } = roots_dirs_and_parents(&ignore_patterns)?;
622 } = roots_dirs_and_parents(&ignore_patterns)?;
619
623
620 let prefix = ignore_patterns.iter().any(|k| match k.syntax {
624 let prefix = ignore_patterns.iter().any(|k| match k.syntax {
621 PatternSyntax::Path | PatternSyntax::RelPath => true,
625 PatternSyntax::Path | PatternSyntax::RelPath => true,
622 _ => false,
626 _ => false,
623 });
627 });
624
628
625 Ok((
629 Ok((
626 Self {
630 Self {
627 patterns,
631 patterns,
628 match_fn,
632 match_fn,
629 prefix,
633 prefix,
630 roots,
634 roots,
631 dirs,
635 dirs,
632 parents,
636 parents,
633 },
637 },
634 warnings,
638 warnings,
635 ))
639 ))
636 }
640 }
637
641
638 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
642 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
639 // TODO cache
643 // TODO cache
640 let thing = self
644 let thing = self
641 .dirs
645 .dirs
642 .iter()
646 .iter()
643 .chain(self.roots.iter())
647 .chain(self.roots.iter())
644 .chain(self.parents.iter());
648 .chain(self.parents.iter());
645 DirsChildrenMultiset::new(thing, Some(&self.parents))
649 DirsChildrenMultiset::new(thing, Some(&self.parents))
646 }
650 }
647 }
651 }
648
652
649 impl<'a> Display for IncludeMatcher<'a> {
653 impl<'a> Display for IncludeMatcher<'a> {
650 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
654 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
651 write!(
655 write!(
652 f,
656 f,
653 "IncludeMatcher(includes='{}')",
657 "IncludeMatcher(includes='{}')",
654 String::from_utf8_lossy(&self.patterns.escaped_bytes())
658 String::from_utf8_lossy(&self.patterns.escaped_bytes())
655 )
659 )
656 }
660 }
657 }
661 }
658
662
659 #[cfg(test)]
663 #[cfg(test)]
660 mod tests {
664 mod tests {
661 use super::*;
665 use super::*;
662 use pretty_assertions::assert_eq;
666 use pretty_assertions::assert_eq;
663 use std::path::Path;
667 use std::path::Path;
664
668
665 #[test]
669 #[test]
666 fn test_roots_and_dirs() {
670 fn test_roots_and_dirs() {
667 let pats = vec![
671 let pats = vec![
668 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
672 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
669 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
673 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
670 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
674 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
671 ];
675 ];
672 let (roots, dirs) = roots_and_dirs(&pats);
676 let (roots, dirs) = roots_and_dirs(&pats);
673
677
674 assert_eq!(
678 assert_eq!(
675 roots,
679 roots,
676 vec!(
680 vec!(
677 HgPathBuf::from_bytes(b"g/h"),
681 HgPathBuf::from_bytes(b"g/h"),
678 HgPathBuf::from_bytes(b"g/h"),
682 HgPathBuf::from_bytes(b"g/h"),
679 HgPathBuf::new()
683 HgPathBuf::new()
680 ),
684 ),
681 );
685 );
682 assert_eq!(dirs, vec!());
686 assert_eq!(dirs, vec!());
683 }
687 }
684
688
685 #[test]
689 #[test]
686 fn test_roots_dirs_and_parents() {
690 fn test_roots_dirs_and_parents() {
687 let pats = vec![
691 let pats = vec![
688 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
692 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
689 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
693 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
690 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
694 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
691 ];
695 ];
692
696
693 let mut roots = HashSet::new();
697 let mut roots = HashSet::new();
694 roots.insert(HgPathBuf::from_bytes(b"g/h"));
698 roots.insert(HgPathBuf::from_bytes(b"g/h"));
695 roots.insert(HgPathBuf::new());
699 roots.insert(HgPathBuf::new());
696
700
697 let dirs = HashSet::new();
701 let dirs = HashSet::new();
698
702
699 let mut parents = HashSet::new();
703 let mut parents = HashSet::new();
700 parents.insert(HgPathBuf::new());
704 parents.insert(HgPathBuf::new());
701 parents.insert(HgPathBuf::from_bytes(b"g"));
705 parents.insert(HgPathBuf::from_bytes(b"g"));
702
706
703 assert_eq!(
707 assert_eq!(
704 roots_dirs_and_parents(&pats).unwrap(),
708 roots_dirs_and_parents(&pats).unwrap(),
705 RootsDirsAndParents {
709 RootsDirsAndParents {
706 roots,
710 roots,
707 dirs,
711 dirs,
708 parents
712 parents
709 }
713 }
710 );
714 );
711 }
715 }
712
716
713 #[test]
717 #[test]
714 fn test_filematcher_visit_children_set() {
718 fn test_filematcher_visit_children_set() {
715 // Visitchildrenset
719 // Visitchildrenset
716 let files = vec![HgPath::new(b"dir/subdir/foo.txt")];
720 let files = vec![HgPath::new(b"dir/subdir/foo.txt")];
717 let matcher = FileMatcher::new(&files).unwrap();
721 let matcher = FileMatcher::new(&files).unwrap();
718
722
719 let mut set = HashSet::new();
723 let mut set = HashSet::new();
720 set.insert(HgPath::new(b"dir"));
724 set.insert(HgPath::new(b"dir"));
721 assert_eq!(
725 assert_eq!(
722 matcher.visit_children_set(HgPath::new(b"")),
726 matcher.visit_children_set(HgPath::new(b"")),
723 VisitChildrenSet::Set(set)
727 VisitChildrenSet::Set(set)
724 );
728 );
725
729
726 let mut set = HashSet::new();
730 let mut set = HashSet::new();
727 set.insert(HgPath::new(b"subdir"));
731 set.insert(HgPath::new(b"subdir"));
728 assert_eq!(
732 assert_eq!(
729 matcher.visit_children_set(HgPath::new(b"dir")),
733 matcher.visit_children_set(HgPath::new(b"dir")),
730 VisitChildrenSet::Set(set)
734 VisitChildrenSet::Set(set)
731 );
735 );
732
736
733 let mut set = HashSet::new();
737 let mut set = HashSet::new();
734 set.insert(HgPath::new(b"foo.txt"));
738 set.insert(HgPath::new(b"foo.txt"));
735 assert_eq!(
739 assert_eq!(
736 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
740 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
737 VisitChildrenSet::Set(set)
741 VisitChildrenSet::Set(set)
738 );
742 );
739
743
740 assert_eq!(
744 assert_eq!(
741 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
745 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
742 VisitChildrenSet::Empty
746 VisitChildrenSet::Empty
743 );
747 );
744 assert_eq!(
748 assert_eq!(
745 matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
749 matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
746 VisitChildrenSet::Empty
750 VisitChildrenSet::Empty
747 );
751 );
748 assert_eq!(
752 assert_eq!(
749 matcher.visit_children_set(HgPath::new(b"folder")),
753 matcher.visit_children_set(HgPath::new(b"folder")),
750 VisitChildrenSet::Empty
754 VisitChildrenSet::Empty
751 );
755 );
752 }
756 }
753
757
754 #[test]
758 #[test]
755 fn test_filematcher_visit_children_set_files_and_dirs() {
759 fn test_filematcher_visit_children_set_files_and_dirs() {
756 let files = vec![
760 let files = vec![
757 HgPath::new(b"rootfile.txt"),
761 HgPath::new(b"rootfile.txt"),
758 HgPath::new(b"a/file1.txt"),
762 HgPath::new(b"a/file1.txt"),
759 HgPath::new(b"a/b/file2.txt"),
763 HgPath::new(b"a/b/file2.txt"),
760 // No file in a/b/c
764 // No file in a/b/c
761 HgPath::new(b"a/b/c/d/file4.txt"),
765 HgPath::new(b"a/b/c/d/file4.txt"),
762 ];
766 ];
763 let matcher = FileMatcher::new(&files).unwrap();
767 let matcher = FileMatcher::new(&files).unwrap();
764
768
765 let mut set = HashSet::new();
769 let mut set = HashSet::new();
766 set.insert(HgPath::new(b"a"));
770 set.insert(HgPath::new(b"a"));
767 set.insert(HgPath::new(b"rootfile.txt"));
771 set.insert(HgPath::new(b"rootfile.txt"));
768 assert_eq!(
772 assert_eq!(
769 matcher.visit_children_set(HgPath::new(b"")),
773 matcher.visit_children_set(HgPath::new(b"")),
770 VisitChildrenSet::Set(set)
774 VisitChildrenSet::Set(set)
771 );
775 );
772
776
773 let mut set = HashSet::new();
777 let mut set = HashSet::new();
774 set.insert(HgPath::new(b"b"));
778 set.insert(HgPath::new(b"b"));
775 set.insert(HgPath::new(b"file1.txt"));
779 set.insert(HgPath::new(b"file1.txt"));
776 assert_eq!(
780 assert_eq!(
777 matcher.visit_children_set(HgPath::new(b"a")),
781 matcher.visit_children_set(HgPath::new(b"a")),
778 VisitChildrenSet::Set(set)
782 VisitChildrenSet::Set(set)
779 );
783 );
780
784
781 let mut set = HashSet::new();
785 let mut set = HashSet::new();
782 set.insert(HgPath::new(b"c"));
786 set.insert(HgPath::new(b"c"));
783 set.insert(HgPath::new(b"file2.txt"));
787 set.insert(HgPath::new(b"file2.txt"));
784 assert_eq!(
788 assert_eq!(
785 matcher.visit_children_set(HgPath::new(b"a/b")),
789 matcher.visit_children_set(HgPath::new(b"a/b")),
786 VisitChildrenSet::Set(set)
790 VisitChildrenSet::Set(set)
787 );
791 );
788
792
789 let mut set = HashSet::new();
793 let mut set = HashSet::new();
790 set.insert(HgPath::new(b"d"));
794 set.insert(HgPath::new(b"d"));
791 assert_eq!(
795 assert_eq!(
792 matcher.visit_children_set(HgPath::new(b"a/b/c")),
796 matcher.visit_children_set(HgPath::new(b"a/b/c")),
793 VisitChildrenSet::Set(set)
797 VisitChildrenSet::Set(set)
794 );
798 );
795 let mut set = HashSet::new();
799 let mut set = HashSet::new();
796 set.insert(HgPath::new(b"file4.txt"));
800 set.insert(HgPath::new(b"file4.txt"));
797 assert_eq!(
801 assert_eq!(
798 matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
802 matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
799 VisitChildrenSet::Set(set)
803 VisitChildrenSet::Set(set)
800 );
804 );
801
805
802 assert_eq!(
806 assert_eq!(
803 matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
807 matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
804 VisitChildrenSet::Empty
808 VisitChildrenSet::Empty
805 );
809 );
806 assert_eq!(
810 assert_eq!(
807 matcher.visit_children_set(HgPath::new(b"folder")),
811 matcher.visit_children_set(HgPath::new(b"folder")),
808 VisitChildrenSet::Empty
812 VisitChildrenSet::Empty
809 );
813 );
810 }
814 }
811
815
812 #[cfg(feature = "with-re2")]
816 #[cfg(feature = "with-re2")]
813 #[test]
817 #[test]
814 fn test_includematcher() {
818 fn test_includematcher() {
815 // VisitchildrensetPrefix
819 // VisitchildrensetPrefix
816 let (matcher, _) = IncludeMatcher::new(
820 let (matcher, _) = IncludeMatcher::new(
817 vec![IgnorePattern::new(
821 vec![IgnorePattern::new(
818 PatternSyntax::RelPath,
822 PatternSyntax::RelPath,
819 b"dir/subdir",
823 b"dir/subdir",
820 Path::new(""),
824 Path::new(""),
821 )],
825 )],
822 "",
826 "",
823 )
827 )
824 .unwrap();
828 .unwrap();
825
829
826 let mut set = HashSet::new();
830 let mut set = HashSet::new();
827 set.insert(HgPath::new(b"dir"));
831 set.insert(HgPath::new(b"dir"));
828 assert_eq!(
832 assert_eq!(
829 matcher.visit_children_set(HgPath::new(b"")),
833 matcher.visit_children_set(HgPath::new(b"")),
830 VisitChildrenSet::Set(set)
834 VisitChildrenSet::Set(set)
831 );
835 );
832
836
833 let mut set = HashSet::new();
837 let mut set = HashSet::new();
834 set.insert(HgPath::new(b"subdir"));
838 set.insert(HgPath::new(b"subdir"));
835 assert_eq!(
839 assert_eq!(
836 matcher.visit_children_set(HgPath::new(b"dir")),
840 matcher.visit_children_set(HgPath::new(b"dir")),
837 VisitChildrenSet::Set(set)
841 VisitChildrenSet::Set(set)
838 );
842 );
839 assert_eq!(
843 assert_eq!(
840 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
844 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
841 VisitChildrenSet::Recursive
845 VisitChildrenSet::Recursive
842 );
846 );
843 // OPT: This should probably be 'all' if its parent is?
847 // OPT: This should probably be 'all' if its parent is?
844 assert_eq!(
848 assert_eq!(
845 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
849 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
846 VisitChildrenSet::This
850 VisitChildrenSet::This
847 );
851 );
848 assert_eq!(
852 assert_eq!(
849 matcher.visit_children_set(HgPath::new(b"folder")),
853 matcher.visit_children_set(HgPath::new(b"folder")),
850 VisitChildrenSet::Empty
854 VisitChildrenSet::Empty
851 );
855 );
852
856
853 // VisitchildrensetRootfilesin
857 // VisitchildrensetRootfilesin
854 let (matcher, _) = IncludeMatcher::new(
858 let (matcher, _) = IncludeMatcher::new(
855 vec![IgnorePattern::new(
859 vec![IgnorePattern::new(
856 PatternSyntax::RootFiles,
860 PatternSyntax::RootFiles,
857 b"dir/subdir",
861 b"dir/subdir",
858 Path::new(""),
862 Path::new(""),
859 )],
863 )],
860 "",
864 "",
861 )
865 )
862 .unwrap();
866 .unwrap();
863
867
864 let mut set = HashSet::new();
868 let mut set = HashSet::new();
865 set.insert(HgPath::new(b"dir"));
869 set.insert(HgPath::new(b"dir"));
866 assert_eq!(
870 assert_eq!(
867 matcher.visit_children_set(HgPath::new(b"")),
871 matcher.visit_children_set(HgPath::new(b"")),
868 VisitChildrenSet::Set(set)
872 VisitChildrenSet::Set(set)
869 );
873 );
870
874
871 let mut set = HashSet::new();
875 let mut set = HashSet::new();
872 set.insert(HgPath::new(b"subdir"));
876 set.insert(HgPath::new(b"subdir"));
873 assert_eq!(
877 assert_eq!(
874 matcher.visit_children_set(HgPath::new(b"dir")),
878 matcher.visit_children_set(HgPath::new(b"dir")),
875 VisitChildrenSet::Set(set)
879 VisitChildrenSet::Set(set)
876 );
880 );
877
881
878 assert_eq!(
882 assert_eq!(
879 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
883 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
880 VisitChildrenSet::This
884 VisitChildrenSet::This
881 );
885 );
882 assert_eq!(
886 assert_eq!(
883 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
887 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
884 VisitChildrenSet::Empty
888 VisitChildrenSet::Empty
885 );
889 );
886 assert_eq!(
890 assert_eq!(
887 matcher.visit_children_set(HgPath::new(b"folder")),
891 matcher.visit_children_set(HgPath::new(b"folder")),
888 VisitChildrenSet::Empty
892 VisitChildrenSet::Empty
889 );
893 );
890
894
891 // VisitchildrensetGlob
895 // VisitchildrensetGlob
892 let (matcher, _) = IncludeMatcher::new(
896 let (matcher, _) = IncludeMatcher::new(
893 vec![IgnorePattern::new(
897 vec![IgnorePattern::new(
894 PatternSyntax::Glob,
898 PatternSyntax::Glob,
895 b"dir/z*",
899 b"dir/z*",
896 Path::new(""),
900 Path::new(""),
897 )],
901 )],
898 "",
902 "",
899 )
903 )
900 .unwrap();
904 .unwrap();
901
905
902 let mut set = HashSet::new();
906 let mut set = HashSet::new();
903 set.insert(HgPath::new(b"dir"));
907 set.insert(HgPath::new(b"dir"));
904 assert_eq!(
908 assert_eq!(
905 matcher.visit_children_set(HgPath::new(b"")),
909 matcher.visit_children_set(HgPath::new(b"")),
906 VisitChildrenSet::Set(set)
910 VisitChildrenSet::Set(set)
907 );
911 );
908 assert_eq!(
912 assert_eq!(
909 matcher.visit_children_set(HgPath::new(b"folder")),
913 matcher.visit_children_set(HgPath::new(b"folder")),
910 VisitChildrenSet::Empty
914 VisitChildrenSet::Empty
911 );
915 );
912 assert_eq!(
916 assert_eq!(
913 matcher.visit_children_set(HgPath::new(b"dir")),
917 matcher.visit_children_set(HgPath::new(b"dir")),
914 VisitChildrenSet::This
918 VisitChildrenSet::This
915 );
919 );
916 // OPT: these should probably be set().
920 // OPT: these should probably be set().
917 assert_eq!(
921 assert_eq!(
918 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
922 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
919 VisitChildrenSet::This
923 VisitChildrenSet::This
920 );
924 );
921 assert_eq!(
925 assert_eq!(
922 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
926 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
923 VisitChildrenSet::This
927 VisitChildrenSet::This
924 );
928 );
925 }
929 }
926 }
930 }
General Comments 0
You need to be logged in to leave comments. Login now