##// END OF EJS Templates
rust-status: refactor options into a `StatusOptions` struct...
Raphaël Gomès -
r45011:483fce65 default
parent child Browse files
Show More
@@ -1,335 +1,321 b''
1 // status.rs
1 // status.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Rust implementation of dirstate.status (dirstate.py).
8 //! Rust implementation of dirstate.status (dirstate.py).
9 //! It is currently missing a lot of functionality compared to the Python one
9 //! It is currently missing a lot of functionality compared to the Python one
10 //! and will only be triggered in narrow cases.
10 //! and will only be triggered in narrow cases.
11
11
12 use crate::{
12 use crate::{
13 dirstate::SIZE_FROM_OTHER_PARENT,
13 dirstate::SIZE_FROM_OTHER_PARENT,
14 matchers::Matcher,
14 matchers::Matcher,
15 utils::{
15 utils::{
16 files::HgMetadata,
16 files::HgMetadata,
17 hg_path::{
17 hg_path::{
18 hg_path_to_path_buf, os_string_to_hg_path_buf, HgPath, HgPathBuf,
18 hg_path_to_path_buf, os_string_to_hg_path_buf, HgPath, HgPathBuf,
19 },
19 },
20 },
20 },
21 CopyMap, DirstateEntry, DirstateMap, EntryState,
21 CopyMap, DirstateEntry, DirstateMap, EntryState,
22 };
22 };
23 use rayon::prelude::*;
23 use rayon::prelude::*;
24 use std::collections::HashSet;
24 use std::collections::HashSet;
25 use std::fs::{read_dir, DirEntry};
25 use std::fs::{read_dir, DirEntry};
26 use std::path::Path;
26 use std::path::Path;
27
27
28 /// Marker enum used to dispatch new status entries into the right collections.
28 /// Marker enum used to dispatch new status entries into the right collections.
29 /// Is similar to `crate::EntryState`, but represents the transient state of
29 /// Is similar to `crate::EntryState`, but represents the transient state of
30 /// entries during the lifetime of a command.
30 /// entries during the lifetime of a command.
31 enum Dispatch {
31 enum Dispatch {
32 Unsure,
32 Unsure,
33 Modified,
33 Modified,
34 Added,
34 Added,
35 Removed,
35 Removed,
36 Deleted,
36 Deleted,
37 Clean,
37 Clean,
38 Unknown,
38 Unknown,
39 }
39 }
40
40
41 type IoResult<T> = std::io::Result<T>;
41 type IoResult<T> = std::io::Result<T>;
42
42
43 /// Dates and times that are outside the 31-bit signed range are compared
43 /// Dates and times that are outside the 31-bit signed range are compared
44 /// modulo 2^31. This should prevent hg from behaving badly with very large
44 /// modulo 2^31. This should prevent hg from behaving badly with very large
45 /// files or corrupt dates while still having a high probability of detecting
45 /// files or corrupt dates while still having a high probability of detecting
46 /// changes. (issue2608)
46 /// changes. (issue2608)
47 /// TODO I haven't found a way of having `b` be `Into<i32>`, since `From<u64>`
47 /// TODO I haven't found a way of having `b` be `Into<i32>`, since `From<u64>`
48 /// is not defined for `i32`, and there is no `As` trait. This forces the
48 /// is not defined for `i32`, and there is no `As` trait. This forces the
49 /// caller to cast `b` as `i32`.
49 /// caller to cast `b` as `i32`.
50 fn mod_compare(a: i32, b: i32) -> bool {
50 fn mod_compare(a: i32, b: i32) -> bool {
51 a & i32::max_value() != b & i32::max_value()
51 a & i32::max_value() != b & i32::max_value()
52 }
52 }
53
53
54 /// Return a sorted list containing information about the entries
54 /// Return a sorted list containing information about the entries
55 /// in the directory.
55 /// in the directory.
56 ///
56 ///
57 /// * `skip_dot_hg` - Return an empty vec if `path` contains a `.hg` directory
57 /// * `skip_dot_hg` - Return an empty vec if `path` contains a `.hg` directory
58 fn list_directory(
58 fn list_directory(
59 path: impl AsRef<Path>,
59 path: impl AsRef<Path>,
60 skip_dot_hg: bool,
60 skip_dot_hg: bool,
61 ) -> std::io::Result<Vec<(HgPathBuf, DirEntry)>> {
61 ) -> std::io::Result<Vec<(HgPathBuf, DirEntry)>> {
62 let mut results = vec![];
62 let mut results = vec![];
63 let entries = read_dir(path.as_ref())?;
63 let entries = read_dir(path.as_ref())?;
64
64
65 for entry in entries {
65 for entry in entries {
66 let entry = entry?;
66 let entry = entry?;
67 let filename = os_string_to_hg_path_buf(entry.file_name())?;
67 let filename = os_string_to_hg_path_buf(entry.file_name())?;
68 let file_type = entry.file_type()?;
68 let file_type = entry.file_type()?;
69 if skip_dot_hg && filename.as_bytes() == b".hg" && file_type.is_dir() {
69 if skip_dot_hg && filename.as_bytes() == b".hg" && file_type.is_dir() {
70 return Ok(vec![]);
70 return Ok(vec![]);
71 } else {
71 } else {
72 results.push((HgPathBuf::from(filename), entry))
72 results.push((HgPathBuf::from(filename), entry))
73 }
73 }
74 }
74 }
75
75
76 results.sort_unstable_by_key(|e| e.0.clone());
76 results.sort_unstable_by_key(|e| e.0.clone());
77 Ok(results)
77 Ok(results)
78 }
78 }
79
79
80 /// The file corresponding to the dirstate entry was found on the filesystem.
80 /// The file corresponding to the dirstate entry was found on the filesystem.
81 fn dispatch_found(
81 fn dispatch_found(
82 filename: impl AsRef<HgPath>,
82 filename: impl AsRef<HgPath>,
83 entry: DirstateEntry,
83 entry: DirstateEntry,
84 metadata: HgMetadata,
84 metadata: HgMetadata,
85 copy_map: &CopyMap,
85 copy_map: &CopyMap,
86 check_exec: bool,
86 options: StatusOptions,
87 list_clean: bool,
88 last_normal_time: i64,
89 ) -> Dispatch {
87 ) -> Dispatch {
90 let DirstateEntry {
88 let DirstateEntry {
91 state,
89 state,
92 mode,
90 mode,
93 mtime,
91 mtime,
94 size,
92 size,
95 } = entry;
93 } = entry;
96
94
97 let HgMetadata {
95 let HgMetadata {
98 st_mode,
96 st_mode,
99 st_size,
97 st_size,
100 st_mtime,
98 st_mtime,
101 ..
99 ..
102 } = metadata;
100 } = metadata;
103
101
104 match state {
102 match state {
105 EntryState::Normal => {
103 EntryState::Normal => {
106 let size_changed = mod_compare(size, st_size as i32);
104 let size_changed = mod_compare(size, st_size as i32);
107 let mode_changed =
105 let mode_changed =
108 (mode ^ st_mode as i32) & 0o100 != 0o000 && check_exec;
106 (mode ^ st_mode as i32) & 0o100 != 0o000 && options.check_exec;
109 let metadata_changed = size >= 0 && (size_changed || mode_changed);
107 let metadata_changed = size >= 0 && (size_changed || mode_changed);
110 let other_parent = size == SIZE_FROM_OTHER_PARENT;
108 let other_parent = size == SIZE_FROM_OTHER_PARENT;
111 if metadata_changed
109 if metadata_changed
112 || other_parent
110 || other_parent
113 || copy_map.contains_key(filename.as_ref())
111 || copy_map.contains_key(filename.as_ref())
114 {
112 {
115 Dispatch::Modified
113 Dispatch::Modified
116 } else if mod_compare(mtime, st_mtime as i32) {
114 } else if mod_compare(mtime, st_mtime as i32) {
117 Dispatch::Unsure
115 Dispatch::Unsure
118 } else if st_mtime == last_normal_time {
116 } else if st_mtime == options.last_normal_time {
119 // the file may have just been marked as normal and
117 // the file may have just been marked as normal and
120 // it may have changed in the same second without
118 // it may have changed in the same second without
121 // changing its size. This can happen if we quickly
119 // changing its size. This can happen if we quickly
122 // do multiple commits. Force lookup, so we don't
120 // do multiple commits. Force lookup, so we don't
123 // miss such a racy file change.
121 // miss such a racy file change.
124 Dispatch::Unsure
122 Dispatch::Unsure
125 } else if list_clean {
123 } else if options.list_clean {
126 Dispatch::Clean
124 Dispatch::Clean
127 } else {
125 } else {
128 Dispatch::Unknown
126 Dispatch::Unknown
129 }
127 }
130 }
128 }
131 EntryState::Merged => Dispatch::Modified,
129 EntryState::Merged => Dispatch::Modified,
132 EntryState::Added => Dispatch::Added,
130 EntryState::Added => Dispatch::Added,
133 EntryState::Removed => Dispatch::Removed,
131 EntryState::Removed => Dispatch::Removed,
134 EntryState::Unknown => Dispatch::Unknown,
132 EntryState::Unknown => Dispatch::Unknown,
135 }
133 }
136 }
134 }
137
135
138 /// The file corresponding to this Dirstate entry is missing.
136 /// The file corresponding to this Dirstate entry is missing.
139 fn dispatch_missing(state: EntryState) -> Dispatch {
137 fn dispatch_missing(state: EntryState) -> Dispatch {
140 match state {
138 match state {
141 // File was removed from the filesystem during commands
139 // File was removed from the filesystem during commands
142 EntryState::Normal | EntryState::Merged | EntryState::Added => {
140 EntryState::Normal | EntryState::Merged | EntryState::Added => {
143 Dispatch::Deleted
141 Dispatch::Deleted
144 }
142 }
145 // File was removed, everything is normal
143 // File was removed, everything is normal
146 EntryState::Removed => Dispatch::Removed,
144 EntryState::Removed => Dispatch::Removed,
147 // File is unknown to Mercurial, everything is normal
145 // File is unknown to Mercurial, everything is normal
148 EntryState::Unknown => Dispatch::Unknown,
146 EntryState::Unknown => Dispatch::Unknown,
149 }
147 }
150 }
148 }
151
149
152 /// Get stat data about the files explicitly specified by match.
150 /// Get stat data about the files explicitly specified by match.
153 /// TODO subrepos
151 /// TODO subrepos
154 fn walk_explicit<'a>(
152 fn walk_explicit<'a>(
155 files: &'a HashSet<&HgPath>,
153 files: &'a HashSet<&HgPath>,
156 dmap: &'a DirstateMap,
154 dmap: &'a DirstateMap,
157 root_dir: impl AsRef<Path> + Sync + Send,
155 root_dir: impl AsRef<Path> + Sync + Send,
158 check_exec: bool,
156 options: StatusOptions,
159 list_clean: bool,
160 last_normal_time: i64,
161 ) -> impl ParallelIterator<Item = IoResult<(&'a HgPath, Dispatch)>> {
157 ) -> impl ParallelIterator<Item = IoResult<(&'a HgPath, Dispatch)>> {
162 files.par_iter().filter_map(move |filename| {
158 files.par_iter().filter_map(move |filename| {
163 // TODO normalization
159 // TODO normalization
164 let normalized = filename.as_ref();
160 let normalized = filename.as_ref();
165
161
166 let buf = match hg_path_to_path_buf(normalized) {
162 let buf = match hg_path_to_path_buf(normalized) {
167 Ok(x) => x,
163 Ok(x) => x,
168 Err(e) => return Some(Err(e.into())),
164 Err(e) => return Some(Err(e.into())),
169 };
165 };
170 let target = root_dir.as_ref().join(buf);
166 let target = root_dir.as_ref().join(buf);
171 let st = target.symlink_metadata();
167 let st = target.symlink_metadata();
172 match st {
168 match st {
173 Ok(meta) => {
169 Ok(meta) => {
174 let file_type = meta.file_type();
170 let file_type = meta.file_type();
175 if file_type.is_file() || file_type.is_symlink() {
171 if file_type.is_file() || file_type.is_symlink() {
176 if let Some(entry) = dmap.get(normalized) {
172 if let Some(entry) = dmap.get(normalized) {
177 return Some(Ok((
173 return Some(Ok((
178 normalized,
174 normalized,
179 dispatch_found(
175 dispatch_found(
180 &normalized,
176 &normalized,
181 *entry,
177 *entry,
182 HgMetadata::from_metadata(meta),
178 HgMetadata::from_metadata(meta),
183 &dmap.copy_map,
179 &dmap.copy_map,
184 check_exec,
180 options,
185 list_clean,
186 last_normal_time,
187 ),
181 ),
188 )));
182 )));
189 }
183 }
190 } else {
184 } else {
191 if dmap.contains_key(normalized) {
185 if dmap.contains_key(normalized) {
192 return Some(Ok((normalized, Dispatch::Removed)));
186 return Some(Ok((normalized, Dispatch::Removed)));
193 }
187 }
194 }
188 }
195 }
189 }
196 Err(_) => {
190 Err(_) => {
197 if let Some(entry) = dmap.get(normalized) {
191 if let Some(entry) = dmap.get(normalized) {
198 return Some(Ok((
192 return Some(Ok((
199 normalized,
193 normalized,
200 dispatch_missing(entry.state),
194 dispatch_missing(entry.state),
201 )));
195 )));
202 }
196 }
203 }
197 }
204 };
198 };
205 None
199 None
206 })
200 })
207 }
201 }
208
202
203 #[derive(Debug, Copy, Clone)]
204 pub struct StatusOptions {
205 /// Remember the most recent modification timeslot for status, to make
206 /// sure we won't miss future size-preserving file content modifications
207 /// that happen within the same timeslot.
208 pub last_normal_time: i64,
209 /// Whether we are on a filesystem with UNIX-like exec flags
210 pub check_exec: bool,
211 pub list_clean: bool,
212 }
213
209 /// Stat all entries in the `DirstateMap` and mark them for dispatch into
214 /// Stat all entries in the `DirstateMap` and mark them for dispatch into
210 /// the relevant collections.
215 /// the relevant collections.
211 fn stat_dmap_entries(
216 fn stat_dmap_entries(
212 dmap: &DirstateMap,
217 dmap: &DirstateMap,
213 root_dir: impl AsRef<Path> + Sync + Send,
218 root_dir: impl AsRef<Path> + Sync + Send,
214 check_exec: bool,
219 options: StatusOptions,
215 list_clean: bool,
216 last_normal_time: i64,
217 ) -> impl ParallelIterator<Item = IoResult<(&HgPath, Dispatch)>> {
220 ) -> impl ParallelIterator<Item = IoResult<(&HgPath, Dispatch)>> {
218 dmap.par_iter().map(move |(filename, entry)| {
221 dmap.par_iter().map(move |(filename, entry)| {
219 let filename: &HgPath = filename;
222 let filename: &HgPath = filename;
220 let filename_as_path = hg_path_to_path_buf(filename)?;
223 let filename_as_path = hg_path_to_path_buf(filename)?;
221 let meta = root_dir.as_ref().join(filename_as_path).symlink_metadata();
224 let meta = root_dir.as_ref().join(filename_as_path).symlink_metadata();
222
225
223 match meta {
226 match meta {
224 Ok(ref m)
227 Ok(ref m)
225 if !(m.file_type().is_file()
228 if !(m.file_type().is_file()
226 || m.file_type().is_symlink()) =>
229 || m.file_type().is_symlink()) =>
227 {
230 {
228 Ok((filename, dispatch_missing(entry.state)))
231 Ok((filename, dispatch_missing(entry.state)))
229 }
232 }
230 Ok(m) => Ok((
233 Ok(m) => Ok((
231 filename,
234 filename,
232 dispatch_found(
235 dispatch_found(
233 filename,
236 filename,
234 *entry,
237 *entry,
235 HgMetadata::from_metadata(m),
238 HgMetadata::from_metadata(m),
236 &dmap.copy_map,
239 &dmap.copy_map,
237 check_exec,
240 options,
238 list_clean,
239 last_normal_time,
240 ),
241 ),
241 )),
242 )),
242 Err(ref e)
243 Err(ref e)
243 if e.kind() == std::io::ErrorKind::NotFound
244 if e.kind() == std::io::ErrorKind::NotFound
244 || e.raw_os_error() == Some(20) =>
245 || e.raw_os_error() == Some(20) =>
245 {
246 {
246 // Rust does not yet have an `ErrorKind` for
247 // Rust does not yet have an `ErrorKind` for
247 // `NotADirectory` (errno 20)
248 // `NotADirectory` (errno 20)
248 // It happens if the dirstate contains `foo/bar` and
249 // It happens if the dirstate contains `foo/bar` and
249 // foo is not a directory
250 // foo is not a directory
250 Ok((filename, dispatch_missing(entry.state)))
251 Ok((filename, dispatch_missing(entry.state)))
251 }
252 }
252 Err(e) => Err(e),
253 Err(e) => Err(e),
253 }
254 }
254 })
255 })
255 }
256 }
256
257
257 pub struct StatusResult<'a> {
258 pub struct StatusResult<'a> {
258 pub modified: Vec<&'a HgPath>,
259 pub modified: Vec<&'a HgPath>,
259 pub added: Vec<&'a HgPath>,
260 pub added: Vec<&'a HgPath>,
260 pub removed: Vec<&'a HgPath>,
261 pub removed: Vec<&'a HgPath>,
261 pub deleted: Vec<&'a HgPath>,
262 pub deleted: Vec<&'a HgPath>,
262 pub clean: Vec<&'a HgPath>,
263 pub clean: Vec<&'a HgPath>,
263 /* TODO ignored
264 /* TODO ignored
264 * TODO unknown */
265 * TODO unknown */
265 }
266 }
266
267
267 fn build_response<'a>(
268 fn build_response<'a>(
268 results: impl IntoIterator<Item = IoResult<(&'a HgPath, Dispatch)>>,
269 results: impl IntoIterator<Item = IoResult<(&'a HgPath, Dispatch)>>,
269 ) -> IoResult<(Vec<&'a HgPath>, StatusResult<'a>)> {
270 ) -> IoResult<(Vec<&'a HgPath>, StatusResult<'a>)> {
270 let mut lookup = vec![];
271 let mut lookup = vec![];
271 let mut modified = vec![];
272 let mut modified = vec![];
272 let mut added = vec![];
273 let mut added = vec![];
273 let mut removed = vec![];
274 let mut removed = vec![];
274 let mut deleted = vec![];
275 let mut deleted = vec![];
275 let mut clean = vec![];
276 let mut clean = vec![];
276
277
277 for res in results.into_iter() {
278 for res in results.into_iter() {
278 let (filename, dispatch) = res?;
279 let (filename, dispatch) = res?;
279 match dispatch {
280 match dispatch {
280 Dispatch::Unknown => {}
281 Dispatch::Unknown => {}
281 Dispatch::Unsure => lookup.push(filename),
282 Dispatch::Unsure => lookup.push(filename),
282 Dispatch::Modified => modified.push(filename),
283 Dispatch::Modified => modified.push(filename),
283 Dispatch::Added => added.push(filename),
284 Dispatch::Added => added.push(filename),
284 Dispatch::Removed => removed.push(filename),
285 Dispatch::Removed => removed.push(filename),
285 Dispatch::Deleted => deleted.push(filename),
286 Dispatch::Deleted => deleted.push(filename),
286 Dispatch::Clean => clean.push(filename),
287 Dispatch::Clean => clean.push(filename),
287 }
288 }
288 }
289 }
289
290
290 Ok((
291 Ok((
291 lookup,
292 lookup,
292 StatusResult {
293 StatusResult {
293 modified,
294 modified,
294 added,
295 added,
295 removed,
296 removed,
296 deleted,
297 deleted,
297 clean,
298 clean,
298 },
299 },
299 ))
300 ))
300 }
301 }
301
302
302 pub fn status<'a: 'c, 'b: 'c, 'c>(
303 pub fn status<'a: 'c, 'b: 'c, 'c>(
303 dmap: &'a DirstateMap,
304 dmap: &'a DirstateMap,
304 matcher: &'b impl Matcher,
305 matcher: &'b impl Matcher,
305 root_dir: impl AsRef<Path> + Sync + Send + Copy,
306 root_dir: impl AsRef<Path> + Sync + Send + Copy,
306 list_clean: bool,
307 options: StatusOptions,
307 last_normal_time: i64,
308 check_exec: bool,
309 ) -> IoResult<(Vec<&'c HgPath>, StatusResult<'c>)> {
308 ) -> IoResult<(Vec<&'c HgPath>, StatusResult<'c>)> {
310 let files = matcher.file_set();
309 let files = matcher.file_set();
311 let mut results = vec![];
310 let mut results = vec![];
312 if let Some(files) = files {
311 if let Some(files) = files {
313 results.par_extend(walk_explicit(
312 results.par_extend(walk_explicit(&files, &dmap, root_dir, options));
314 &files,
315 &dmap,
316 root_dir,
317 check_exec,
318 list_clean,
319 last_normal_time,
320 ));
321 }
313 }
322
314
323 if !matcher.is_exact() {
315 if !matcher.is_exact() {
324 let stat_results = stat_dmap_entries(
316 let stat_results = stat_dmap_entries(&dmap, root_dir, options);
325 &dmap,
326 root_dir,
327 check_exec,
328 list_clean,
329 last_normal_time,
330 );
331 results.par_extend(stat_results);
317 results.par_extend(stat_results);
332 }
318 }
333
319
334 build_response(results)
320 build_response(results)
335 }
321 }
@@ -1,184 +1,184 b''
1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 // and Mercurial contributors
2 // and Mercurial contributors
3 //
3 //
4 // This software may be used and distributed according to the terms of the
4 // This software may be used and distributed according to the terms of the
5 // GNU General Public License version 2 or any later version.
5 // GNU General Public License version 2 or any later version.
6 mod ancestors;
6 mod ancestors;
7 pub mod dagops;
7 pub mod dagops;
8 pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors};
8 pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors};
9 mod dirstate;
9 mod dirstate;
10 pub mod discovery;
10 pub mod discovery;
11 pub mod testing; // unconditionally built, for use from integration tests
11 pub mod testing; // unconditionally built, for use from integration tests
12 pub use dirstate::{
12 pub use dirstate::{
13 dirs_multiset::{DirsMultiset, DirsMultisetIter},
13 dirs_multiset::{DirsMultiset, DirsMultisetIter},
14 dirstate_map::DirstateMap,
14 dirstate_map::DirstateMap,
15 parsers::{pack_dirstate, parse_dirstate, PARENT_SIZE},
15 parsers::{pack_dirstate, parse_dirstate, PARENT_SIZE},
16 status::{status, StatusResult},
16 status::{status, StatusOptions, StatusResult},
17 CopyMap, CopyMapIter, DirstateEntry, DirstateParents, EntryState,
17 CopyMap, CopyMapIter, DirstateEntry, DirstateParents, EntryState,
18 StateMap, StateMapIter,
18 StateMap, StateMapIter,
19 };
19 };
20 mod filepatterns;
20 mod filepatterns;
21 pub mod matchers;
21 pub mod matchers;
22 pub mod revlog;
22 pub mod revlog;
23 pub use revlog::*;
23 pub use revlog::*;
24 #[cfg(feature = "with-re2")]
24 #[cfg(feature = "with-re2")]
25 pub mod re2;
25 pub mod re2;
26 pub mod utils;
26 pub mod utils;
27
27
28 use crate::utils::hg_path::{HgPathBuf, HgPathError};
28 use crate::utils::hg_path::{HgPathBuf, HgPathError};
29 pub use filepatterns::{
29 pub use filepatterns::{
30 parse_pattern_syntax, read_pattern_file, IgnorePattern,
30 parse_pattern_syntax, read_pattern_file, IgnorePattern,
31 PatternFileWarning, PatternSyntax,
31 PatternFileWarning, PatternSyntax,
32 };
32 };
33 use std::collections::HashMap;
33 use std::collections::HashMap;
34 use twox_hash::RandomXxHashBuilder64;
34 use twox_hash::RandomXxHashBuilder64;
35
35
36 pub type LineNumber = usize;
36 pub type LineNumber = usize;
37
37
38 /// Rust's default hasher is too slow because it tries to prevent collision
38 /// Rust's default hasher is too slow because it tries to prevent collision
39 /// attacks. We are not concerned about those: if an ill-minded person has
39 /// attacks. We are not concerned about those: if an ill-minded person has
40 /// write access to your repository, you have other issues.
40 /// write access to your repository, you have other issues.
41 pub type FastHashMap<K, V> = HashMap<K, V, RandomXxHashBuilder64>;
41 pub type FastHashMap<K, V> = HashMap<K, V, RandomXxHashBuilder64>;
42
42
43 #[derive(Clone, Debug, PartialEq)]
43 #[derive(Clone, Debug, PartialEq)]
44 pub enum DirstateParseError {
44 pub enum DirstateParseError {
45 TooLittleData,
45 TooLittleData,
46 Overflow,
46 Overflow,
47 CorruptedEntry(String),
47 CorruptedEntry(String),
48 Damaged,
48 Damaged,
49 }
49 }
50
50
51 impl From<std::io::Error> for DirstateParseError {
51 impl From<std::io::Error> for DirstateParseError {
52 fn from(e: std::io::Error) -> Self {
52 fn from(e: std::io::Error) -> Self {
53 DirstateParseError::CorruptedEntry(e.to_string())
53 DirstateParseError::CorruptedEntry(e.to_string())
54 }
54 }
55 }
55 }
56
56
57 impl ToString for DirstateParseError {
57 impl ToString for DirstateParseError {
58 fn to_string(&self) -> String {
58 fn to_string(&self) -> String {
59 use crate::DirstateParseError::*;
59 use crate::DirstateParseError::*;
60 match self {
60 match self {
61 TooLittleData => "Too little data for dirstate.".to_string(),
61 TooLittleData => "Too little data for dirstate.".to_string(),
62 Overflow => "Overflow in dirstate.".to_string(),
62 Overflow => "Overflow in dirstate.".to_string(),
63 CorruptedEntry(e) => format!("Corrupted entry: {:?}.", e),
63 CorruptedEntry(e) => format!("Corrupted entry: {:?}.", e),
64 Damaged => "Dirstate appears to be damaged.".to_string(),
64 Damaged => "Dirstate appears to be damaged.".to_string(),
65 }
65 }
66 }
66 }
67 }
67 }
68
68
69 #[derive(Debug, PartialEq)]
69 #[derive(Debug, PartialEq)]
70 pub enum DirstatePackError {
70 pub enum DirstatePackError {
71 CorruptedEntry(String),
71 CorruptedEntry(String),
72 CorruptedParent,
72 CorruptedParent,
73 BadSize(usize, usize),
73 BadSize(usize, usize),
74 }
74 }
75
75
76 impl From<std::io::Error> for DirstatePackError {
76 impl From<std::io::Error> for DirstatePackError {
77 fn from(e: std::io::Error) -> Self {
77 fn from(e: std::io::Error) -> Self {
78 DirstatePackError::CorruptedEntry(e.to_string())
78 DirstatePackError::CorruptedEntry(e.to_string())
79 }
79 }
80 }
80 }
81 #[derive(Debug, PartialEq)]
81 #[derive(Debug, PartialEq)]
82 pub enum DirstateMapError {
82 pub enum DirstateMapError {
83 PathNotFound(HgPathBuf),
83 PathNotFound(HgPathBuf),
84 EmptyPath,
84 EmptyPath,
85 InvalidPath(HgPathError),
85 InvalidPath(HgPathError),
86 }
86 }
87
87
88 impl ToString for DirstateMapError {
88 impl ToString for DirstateMapError {
89 fn to_string(&self) -> String {
89 fn to_string(&self) -> String {
90 match self {
90 match self {
91 DirstateMapError::PathNotFound(_) => {
91 DirstateMapError::PathNotFound(_) => {
92 "expected a value, found none".to_string()
92 "expected a value, found none".to_string()
93 }
93 }
94 DirstateMapError::EmptyPath => "Overflow in dirstate.".to_string(),
94 DirstateMapError::EmptyPath => "Overflow in dirstate.".to_string(),
95 DirstateMapError::InvalidPath(e) => e.to_string(),
95 DirstateMapError::InvalidPath(e) => e.to_string(),
96 }
96 }
97 }
97 }
98 }
98 }
99
99
100 pub enum DirstateError {
100 pub enum DirstateError {
101 Parse(DirstateParseError),
101 Parse(DirstateParseError),
102 Pack(DirstatePackError),
102 Pack(DirstatePackError),
103 Map(DirstateMapError),
103 Map(DirstateMapError),
104 IO(std::io::Error),
104 IO(std::io::Error),
105 }
105 }
106
106
107 impl From<DirstateParseError> for DirstateError {
107 impl From<DirstateParseError> for DirstateError {
108 fn from(e: DirstateParseError) -> Self {
108 fn from(e: DirstateParseError) -> Self {
109 DirstateError::Parse(e)
109 DirstateError::Parse(e)
110 }
110 }
111 }
111 }
112
112
113 impl From<DirstatePackError> for DirstateError {
113 impl From<DirstatePackError> for DirstateError {
114 fn from(e: DirstatePackError) -> Self {
114 fn from(e: DirstatePackError) -> Self {
115 DirstateError::Pack(e)
115 DirstateError::Pack(e)
116 }
116 }
117 }
117 }
118
118
119 #[derive(Debug)]
119 #[derive(Debug)]
120 pub enum PatternError {
120 pub enum PatternError {
121 Path(HgPathError),
121 Path(HgPathError),
122 UnsupportedSyntax(String),
122 UnsupportedSyntax(String),
123 UnsupportedSyntaxInFile(String, String, usize),
123 UnsupportedSyntaxInFile(String, String, usize),
124 TooLong(usize),
124 TooLong(usize),
125 IO(std::io::Error),
125 IO(std::io::Error),
126 /// Needed a pattern that can be turned into a regex but got one that
126 /// Needed a pattern that can be turned into a regex but got one that
127 /// can't. This should only happen through programmer error.
127 /// can't. This should only happen through programmer error.
128 NonRegexPattern(IgnorePattern),
128 NonRegexPattern(IgnorePattern),
129 /// This is temporary, see `re2/mod.rs`.
129 /// This is temporary, see `re2/mod.rs`.
130 /// This will cause a fallback to Python.
130 /// This will cause a fallback to Python.
131 Re2NotInstalled,
131 Re2NotInstalled,
132 }
132 }
133
133
134 impl ToString for PatternError {
134 impl ToString for PatternError {
135 fn to_string(&self) -> String {
135 fn to_string(&self) -> String {
136 match self {
136 match self {
137 PatternError::UnsupportedSyntax(syntax) => {
137 PatternError::UnsupportedSyntax(syntax) => {
138 format!("Unsupported syntax {}", syntax)
138 format!("Unsupported syntax {}", syntax)
139 }
139 }
140 PatternError::UnsupportedSyntaxInFile(syntax, file_path, line) => {
140 PatternError::UnsupportedSyntaxInFile(syntax, file_path, line) => {
141 format!(
141 format!(
142 "{}:{}: unsupported syntax {}",
142 "{}:{}: unsupported syntax {}",
143 file_path, line, syntax
143 file_path, line, syntax
144 )
144 )
145 }
145 }
146 PatternError::TooLong(size) => {
146 PatternError::TooLong(size) => {
147 format!("matcher pattern is too long ({} bytes)", size)
147 format!("matcher pattern is too long ({} bytes)", size)
148 }
148 }
149 PatternError::IO(e) => e.to_string(),
149 PatternError::IO(e) => e.to_string(),
150 PatternError::Path(e) => e.to_string(),
150 PatternError::Path(e) => e.to_string(),
151 PatternError::NonRegexPattern(pattern) => {
151 PatternError::NonRegexPattern(pattern) => {
152 format!("'{:?}' cannot be turned into a regex", pattern)
152 format!("'{:?}' cannot be turned into a regex", pattern)
153 }
153 }
154 PatternError::Re2NotInstalled => {
154 PatternError::Re2NotInstalled => {
155 "Re2 is not installed, cannot use regex functionality."
155 "Re2 is not installed, cannot use regex functionality."
156 .to_string()
156 .to_string()
157 }
157 }
158 }
158 }
159 }
159 }
160 }
160 }
161
161
162 impl From<DirstateMapError> for DirstateError {
162 impl From<DirstateMapError> for DirstateError {
163 fn from(e: DirstateMapError) -> Self {
163 fn from(e: DirstateMapError) -> Self {
164 DirstateError::Map(e)
164 DirstateError::Map(e)
165 }
165 }
166 }
166 }
167
167
168 impl From<std::io::Error> for DirstateError {
168 impl From<std::io::Error> for DirstateError {
169 fn from(e: std::io::Error) -> Self {
169 fn from(e: std::io::Error) -> Self {
170 DirstateError::IO(e)
170 DirstateError::IO(e)
171 }
171 }
172 }
172 }
173
173
174 impl From<std::io::Error> for PatternError {
174 impl From<std::io::Error> for PatternError {
175 fn from(e: std::io::Error) -> Self {
175 fn from(e: std::io::Error) -> Self {
176 PatternError::IO(e)
176 PatternError::IO(e)
177 }
177 }
178 }
178 }
179
179
180 impl From<HgPathError> for PatternError {
180 impl From<HgPathError> for PatternError {
181 fn from(e: HgPathError) -> Self {
181 fn from(e: HgPathError) -> Self {
182 PatternError::Path(e)
182 PatternError::Path(e)
183 }
183 }
184 }
184 }
@@ -1,888 +1,892 b''
1 // matchers.rs
1 // matchers.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Structs and types for matching files and directories.
8 //! Structs and types for matching files and directories.
9
9
10 #[cfg(feature = "with-re2")]
10 #[cfg(feature = "with-re2")]
11 use crate::re2::Re2;
11 use crate::re2::Re2;
12 use crate::{
12 use crate::{
13 dirstate::dirs_multiset::DirsChildrenMultiset,
13 dirstate::dirs_multiset::DirsChildrenMultiset,
14 filepatterns::{
14 filepatterns::{
15 build_single_regex, filter_subincludes, get_patterns_from_file,
15 build_single_regex, filter_subincludes, get_patterns_from_file,
16 PatternFileWarning, PatternResult, SubInclude,
16 PatternFileWarning, PatternResult, SubInclude,
17 },
17 },
18 utils::{
18 utils::{
19 files::find_dirs,
19 files::find_dirs,
20 hg_path::{HgPath, HgPathBuf},
20 hg_path::{HgPath, HgPathBuf},
21 Escaped,
21 Escaped,
22 },
22 },
23 DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
23 DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
24 PatternSyntax,
24 PatternSyntax,
25 };
25 };
26
26
27 use std::collections::HashSet;
27 use std::collections::HashSet;
28 use std::fmt::{Display, Error, Formatter};
28 use std::fmt::{Display, Error, Formatter};
29 use std::iter::FromIterator;
29 use std::iter::FromIterator;
30 use std::ops::Deref;
30 use std::ops::Deref;
31 use std::path::Path;
31 use std::path::Path;
32
32
33 #[derive(Debug, PartialEq)]
33 #[derive(Debug, PartialEq)]
34 pub enum VisitChildrenSet<'a> {
34 pub enum VisitChildrenSet<'a> {
35 /// Don't visit anything
35 /// Don't visit anything
36 Empty,
36 Empty,
37 /// Only visit this directory
37 /// Only visit this directory
38 This,
38 This,
39 /// Visit this directory and these subdirectories
39 /// Visit this directory and these subdirectories
40 /// TODO Should we implement a `NonEmptyHashSet`?
40 /// TODO Should we implement a `NonEmptyHashSet`?
41 Set(HashSet<&'a HgPath>),
41 Set(HashSet<&'a HgPath>),
42 /// Visit this directory and all subdirectories
42 /// Visit this directory and all subdirectories
43 Recursive,
43 Recursive,
44 }
44 }
45
45
46 pub trait Matcher {
46 pub trait Matcher {
47 /// Explicitly listed files
47 /// Explicitly listed files
48 fn file_set(&self) -> Option<&HashSet<&HgPath>>;
48 fn file_set(&self) -> Option<&HashSet<&HgPath>>;
49 /// Returns whether `filename` is in `file_set`
49 /// Returns whether `filename` is in `file_set`
50 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool;
50 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool;
51 /// Returns whether `filename` is matched by this matcher
51 /// Returns whether `filename` is matched by this matcher
52 fn matches(&self, filename: impl AsRef<HgPath>) -> bool;
52 fn matches(&self, filename: impl AsRef<HgPath>) -> bool;
53 /// Decides whether a directory should be visited based on whether it
53 /// Decides whether a directory should be visited based on whether it
54 /// has potential matches in it or one of its subdirectories, and
54 /// has potential matches in it or one of its subdirectories, and
55 /// potentially lists which subdirectories of that directory should be
55 /// potentially lists which subdirectories of that directory should be
56 /// visited. This is based on the match's primary, included, and excluded
56 /// visited. This is based on the match's primary, included, and excluded
57 /// patterns.
57 /// patterns.
58 ///
58 ///
59 /// # Example
59 /// # Example
60 ///
60 ///
61 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
61 /// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
62 /// return the following values (assuming the implementation of
62 /// return the following values (assuming the implementation of
63 /// visit_children_set is capable of recognizing this; some implementations
63 /// visit_children_set is capable of recognizing this; some implementations
64 /// are not).
64 /// are not).
65 ///
65 ///
66 /// ```text
66 /// ```text
67 /// ```ignore
67 /// ```ignore
68 /// '' -> {'foo', 'qux'}
68 /// '' -> {'foo', 'qux'}
69 /// 'baz' -> set()
69 /// 'baz' -> set()
70 /// 'foo' -> {'bar'}
70 /// 'foo' -> {'bar'}
71 /// // Ideally this would be `Recursive`, but since the prefix nature of
71 /// // Ideally this would be `Recursive`, but since the prefix nature of
72 /// // matchers is applied to the entire matcher, we have to downgrade this
72 /// // matchers is applied to the entire matcher, we have to downgrade this
73 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
73 /// // to `This` due to the (yet to be implemented in Rust) non-prefix
74 /// // `RootFilesIn'-kind matcher being mixed in.
74 /// // `RootFilesIn'-kind matcher being mixed in.
75 /// 'foo/bar' -> 'this'
75 /// 'foo/bar' -> 'this'
76 /// 'qux' -> 'this'
76 /// 'qux' -> 'this'
77 /// ```
77 /// ```
78 /// # Important
78 /// # Important
79 ///
79 ///
80 /// Most matchers do not know if they're representing files or
80 /// Most matchers do not know if they're representing files or
81 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
81 /// directories. They see `['path:dir/f']` and don't know whether `f` is a
82 /// file or a directory, so `visit_children_set('dir')` for most matchers
82 /// file or a directory, so `visit_children_set('dir')` for most matchers
83 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
83 /// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
84 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
84 /// a file (like the yet to be implemented in Rust `ExactMatcher` does),
85 /// it may return `VisitChildrenSet::This`.
85 /// it may return `VisitChildrenSet::This`.
86 /// Do not rely on the return being a `HashSet` indicating that there are
86 /// Do not rely on the return being a `HashSet` indicating that there are
87 /// no files in this dir to investigate (or equivalently that if there are
87 /// no files in this dir to investigate (or equivalently that if there are
88 /// files to investigate in 'dir' that it will always return
88 /// files to investigate in 'dir' that it will always return
89 /// `VisitChildrenSet::This`).
89 /// `VisitChildrenSet::This`).
90 fn visit_children_set(
90 fn visit_children_set(
91 &self,
91 &self,
92 directory: impl AsRef<HgPath>,
92 directory: impl AsRef<HgPath>,
93 ) -> VisitChildrenSet;
93 ) -> VisitChildrenSet;
94 /// Matcher will match everything and `files_set()` will be empty:
94 /// Matcher will match everything and `files_set()` will be empty:
95 /// optimization might be possible.
95 /// optimization might be possible.
96 fn matches_everything(&self) -> bool;
96 fn matches_everything(&self) -> bool;
97 /// Matcher will match exactly the files in `files_set()`: optimization
97 /// Matcher will match exactly the files in `files_set()`: optimization
98 /// might be possible.
98 /// might be possible.
99 fn is_exact(&self) -> bool;
99 fn is_exact(&self) -> bool;
100 }
100 }
101
101
102 /// Matches everything.
102 /// Matches everything.
103 ///```
103 ///```
104 /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
104 /// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
105 ///
105 ///
106 /// let matcher = AlwaysMatcher;
106 /// let matcher = AlwaysMatcher;
107 ///
107 ///
108 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
108 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
109 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
109 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
110 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
110 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
111 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
111 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
112 /// ```
112 /// ```
113 #[derive(Debug)]
113 #[derive(Debug)]
114 pub struct AlwaysMatcher;
114 pub struct AlwaysMatcher;
115
115
116 impl Matcher for AlwaysMatcher {
116 impl Matcher for AlwaysMatcher {
117 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
117 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
118 None
118 None
119 }
119 }
120 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
120 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
121 false
121 false
122 }
122 }
123 fn matches(&self, _filename: impl AsRef<HgPath>) -> bool {
123 fn matches(&self, _filename: impl AsRef<HgPath>) -> bool {
124 true
124 true
125 }
125 }
126 fn visit_children_set(
126 fn visit_children_set(
127 &self,
127 &self,
128 _directory: impl AsRef<HgPath>,
128 _directory: impl AsRef<HgPath>,
129 ) -> VisitChildrenSet {
129 ) -> VisitChildrenSet {
130 VisitChildrenSet::Recursive
130 VisitChildrenSet::Recursive
131 }
131 }
132 fn matches_everything(&self) -> bool {
132 fn matches_everything(&self) -> bool {
133 true
133 true
134 }
134 }
135 fn is_exact(&self) -> bool {
135 fn is_exact(&self) -> bool {
136 false
136 false
137 }
137 }
138 }
138 }
139
139
140 /// Matches the input files exactly. They are interpreted as paths, not
140 /// Matches the input files exactly. They are interpreted as paths, not
141 /// patterns.
141 /// patterns.
142 ///
142 ///
143 ///```
143 ///```
144 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::HgPath };
144 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::HgPath };
145 ///
145 ///
146 /// let files = [HgPath::new(b"a.txt"), HgPath::new(br"re:.*\.c$")];
146 /// let files = [HgPath::new(b"a.txt"), HgPath::new(br"re:.*\.c$")];
147 /// let matcher = FileMatcher::new(&files).unwrap();
147 /// let matcher = FileMatcher::new(&files).unwrap();
148 ///
148 ///
149 /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
149 /// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
150 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
150 /// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
151 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
151 /// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
152 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
152 /// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
153 /// ```
153 /// ```
154 #[derive(Debug)]
154 #[derive(Debug)]
155 pub struct FileMatcher<'a> {
155 pub struct FileMatcher<'a> {
156 files: HashSet<&'a HgPath>,
156 files: HashSet<&'a HgPath>,
157 dirs: DirsMultiset,
157 dirs: DirsMultiset,
158 }
158 }
159
159
160 impl<'a> FileMatcher<'a> {
160 impl<'a> FileMatcher<'a> {
161 pub fn new(
161 pub fn new(
162 files: &'a [impl AsRef<HgPath>],
162 files: &'a [impl AsRef<HgPath>],
163 ) -> Result<Self, DirstateMapError> {
163 ) -> Result<Self, DirstateMapError> {
164 Ok(Self {
164 Ok(Self {
165 files: HashSet::from_iter(files.iter().map(|f| f.as_ref())),
165 files: HashSet::from_iter(files.iter().map(|f| f.as_ref())),
166 dirs: DirsMultiset::from_manifest(files)?,
166 dirs: DirsMultiset::from_manifest(files)?,
167 })
167 })
168 }
168 }
169 fn inner_matches(&self, filename: impl AsRef<HgPath>) -> bool {
169 fn inner_matches(&self, filename: impl AsRef<HgPath>) -> bool {
170 self.files.contains(filename.as_ref())
170 self.files.contains(filename.as_ref())
171 }
171 }
172 }
172 }
173
173
174 impl<'a> Matcher for FileMatcher<'a> {
174 impl<'a> Matcher for FileMatcher<'a> {
175 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
175 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
176 Some(&self.files)
176 Some(&self.files)
177 }
177 }
178 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool {
178 fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool {
179 self.inner_matches(filename)
179 self.inner_matches(filename)
180 }
180 }
181 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
181 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
182 self.inner_matches(filename)
182 self.inner_matches(filename)
183 }
183 }
184 fn visit_children_set(
184 fn visit_children_set(
185 &self,
185 &self,
186 directory: impl AsRef<HgPath>,
186 directory: impl AsRef<HgPath>,
187 ) -> VisitChildrenSet {
187 ) -> VisitChildrenSet {
188 if self.files.is_empty() || !self.dirs.contains(&directory) {
188 if self.files.is_empty() || !self.dirs.contains(&directory) {
189 return VisitChildrenSet::Empty;
189 return VisitChildrenSet::Empty;
190 }
190 }
191 let dirs_as_set = self.dirs.iter().map(|k| k.deref()).collect();
191 let dirs_as_set = self.dirs.iter().map(|k| k.deref()).collect();
192
192
193 let mut candidates: HashSet<&HgPath> =
193 let mut candidates: HashSet<&HgPath> =
194 self.files.union(&dirs_as_set).map(|k| *k).collect();
194 self.files.union(&dirs_as_set).map(|k| *k).collect();
195 candidates.remove(HgPath::new(b""));
195 candidates.remove(HgPath::new(b""));
196
196
197 if !directory.as_ref().is_empty() {
197 if !directory.as_ref().is_empty() {
198 let directory = [directory.as_ref().as_bytes(), b"/"].concat();
198 let directory = [directory.as_ref().as_bytes(), b"/"].concat();
199 candidates = candidates
199 candidates = candidates
200 .iter()
200 .iter()
201 .filter_map(|c| {
201 .filter_map(|c| {
202 if c.as_bytes().starts_with(&directory) {
202 if c.as_bytes().starts_with(&directory) {
203 Some(HgPath::new(&c.as_bytes()[directory.len()..]))
203 Some(HgPath::new(&c.as_bytes()[directory.len()..]))
204 } else {
204 } else {
205 None
205 None
206 }
206 }
207 })
207 })
208 .collect();
208 .collect();
209 }
209 }
210
210
211 // `self.dirs` includes all of the directories, recursively, so if
211 // `self.dirs` includes all of the directories, recursively, so if
212 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
212 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
213 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
213 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
214 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
214 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
215 // subdir will be in there without a slash.
215 // subdir will be in there without a slash.
216 VisitChildrenSet::Set(
216 VisitChildrenSet::Set(
217 candidates
217 candidates
218 .iter()
218 .iter()
219 .filter_map(|c| {
219 .filter_map(|c| {
220 if c.bytes().all(|b| *b != b'/') {
220 if c.bytes().all(|b| *b != b'/') {
221 Some(*c)
221 Some(*c)
222 } else {
222 } else {
223 None
223 None
224 }
224 }
225 })
225 })
226 .collect(),
226 .collect(),
227 )
227 )
228 }
228 }
229 fn matches_everything(&self) -> bool {
229 fn matches_everything(&self) -> bool {
230 false
230 false
231 }
231 }
232 fn is_exact(&self) -> bool {
232 fn is_exact(&self) -> bool {
233 true
233 true
234 }
234 }
235 }
235 }
236
236
237 /// Matches files that are included in the ignore rules.
237 /// Matches files that are included in the ignore rules.
238 #[cfg_attr(
238 #[cfg_attr(
239 feature = "with-re2",
239 feature = "with-re2",
240 doc = r##"
240 doc = r##"
241 ```
241 ```
242 use hg::{
242 use hg::{
243 matchers::{IncludeMatcher, Matcher},
243 matchers::{IncludeMatcher, Matcher},
244 IgnorePattern,
244 IgnorePattern,
245 PatternSyntax,
245 PatternSyntax,
246 utils::hg_path::HgPath
246 utils::hg_path::HgPath
247 };
247 };
248 use std::path::Path;
248 use std::path::Path;
249 ///
249 ///
250 let ignore_patterns =
250 let ignore_patterns =
251 vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
251 vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
252 let (matcher, _) = IncludeMatcher::new(ignore_patterns, "").unwrap();
252 let (matcher, _) = IncludeMatcher::new(ignore_patterns, "").unwrap();
253 ///
253 ///
254 assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
254 assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
255 assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
255 assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
256 assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
256 assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
257 assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
257 assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
258 ```
258 ```
259 "##
259 "##
260 )]
260 )]
261 pub struct IncludeMatcher<'a> {
261 pub struct IncludeMatcher<'a> {
262 patterns: Vec<u8>,
262 patterns: Vec<u8>,
263 match_fn: Box<dyn for<'r> Fn(&'r HgPath) -> bool + 'a + Sync>,
263 match_fn: Box<dyn for<'r> Fn(&'r HgPath) -> bool + 'a + Sync>,
264 /// Whether all the patterns match a prefix (i.e. recursively)
264 /// Whether all the patterns match a prefix (i.e. recursively)
265 prefix: bool,
265 prefix: bool,
266 roots: HashSet<HgPathBuf>,
266 roots: HashSet<HgPathBuf>,
267 dirs: HashSet<HgPathBuf>,
267 dirs: HashSet<HgPathBuf>,
268 parents: HashSet<HgPathBuf>,
268 parents: HashSet<HgPathBuf>,
269 }
269 }
270
270
271 impl<'a> Matcher for IncludeMatcher<'a> {
271 impl<'a> Matcher for IncludeMatcher<'a> {
272 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
272 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
273 None
273 None
274 }
274 }
275
275
276 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
276 fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
277 false
277 false
278 }
278 }
279
279
280 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
280 fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
281 (self.match_fn)(filename.as_ref())
281 (self.match_fn)(filename.as_ref())
282 }
282 }
283
283
284 fn visit_children_set(
284 fn visit_children_set(
285 &self,
285 &self,
286 directory: impl AsRef<HgPath>,
286 directory: impl AsRef<HgPath>,
287 ) -> VisitChildrenSet {
287 ) -> VisitChildrenSet {
288 let dir = directory.as_ref();
288 let dir = directory.as_ref();
289 if self.prefix && self.roots.contains(dir) {
289 if self.prefix && self.roots.contains(dir) {
290 return VisitChildrenSet::Recursive;
290 return VisitChildrenSet::Recursive;
291 }
291 }
292 if self.roots.contains(HgPath::new(b""))
292 if self.roots.contains(HgPath::new(b""))
293 || self.roots.contains(dir)
293 || self.roots.contains(dir)
294 || self.dirs.contains(dir)
294 || self.dirs.contains(dir)
295 || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
295 || find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
296 {
296 {
297 return VisitChildrenSet::This;
297 return VisitChildrenSet::This;
298 }
298 }
299
299
300 if self.parents.contains(directory.as_ref()) {
300 if self.parents.contains(directory.as_ref()) {
301 let multiset = self.get_all_parents_children();
301 let multiset = self.get_all_parents_children();
302 if let Some(children) = multiset.get(dir) {
302 if let Some(children) = multiset.get(dir) {
303 return VisitChildrenSet::Set(children.to_owned());
303 return VisitChildrenSet::Set(children.to_owned());
304 }
304 }
305 }
305 }
306 VisitChildrenSet::Empty
306 VisitChildrenSet::Empty
307 }
307 }
308
308
309 fn matches_everything(&self) -> bool {
309 fn matches_everything(&self) -> bool {
310 false
310 false
311 }
311 }
312
312
313 fn is_exact(&self) -> bool {
313 fn is_exact(&self) -> bool {
314 false
314 false
315 }
315 }
316 }
316 }
317
317
318 #[cfg(feature = "with-re2")]
318 #[cfg(feature = "with-re2")]
319 /// Returns a function that matches an `HgPath` against the given regex
319 /// Returns a function that matches an `HgPath` against the given regex
320 /// pattern.
320 /// pattern.
321 ///
321 ///
322 /// This can fail when the pattern is invalid or not supported by the
322 /// This can fail when the pattern is invalid or not supported by the
323 /// underlying engine `Re2`, for instance anything with back-references.
323 /// underlying engine `Re2`, for instance anything with back-references.
324 fn re_matcher(
324 fn re_matcher(
325 pattern: &[u8],
325 pattern: &[u8],
326 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
326 ) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
327 let regex = Re2::new(pattern);
327 let regex = Re2::new(pattern);
328 let regex = regex.map_err(|e| PatternError::UnsupportedSyntax(e))?;
328 let regex = regex.map_err(|e| PatternError::UnsupportedSyntax(e))?;
329 Ok(move |path: &HgPath| regex.is_match(path.as_bytes()))
329 Ok(move |path: &HgPath| regex.is_match(path.as_bytes()))
330 }
330 }
331
331
332 #[cfg(not(feature = "with-re2"))]
332 #[cfg(not(feature = "with-re2"))]
333 fn re_matcher(_: &[u8]) -> PatternResult<Box<dyn Fn(&HgPath) -> bool + Sync>> {
333 fn re_matcher(_: &[u8]) -> PatternResult<Box<dyn Fn(&HgPath) -> bool + Sync>> {
334 Err(PatternError::Re2NotInstalled)
334 Err(PatternError::Re2NotInstalled)
335 }
335 }
336
336
337 /// Returns the regex pattern and a function that matches an `HgPath` against
337 /// Returns the regex pattern and a function that matches an `HgPath` against
338 /// said regex formed by the given ignore patterns.
338 /// said regex formed by the given ignore patterns.
339 fn build_regex_match<'a>(
339 fn build_regex_match<'a>(
340 ignore_patterns: &'a [&'a IgnorePattern],
340 ignore_patterns: &'a [&'a IgnorePattern],
341 ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)> {
341 ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)> {
342 let regexps: Result<Vec<_>, PatternError> = ignore_patterns
342 let regexps: Result<Vec<_>, PatternError> = ignore_patterns
343 .into_iter()
343 .into_iter()
344 .map(|k| build_single_regex(*k))
344 .map(|k| build_single_regex(*k))
345 .collect();
345 .collect();
346 let regexps = regexps?;
346 let regexps = regexps?;
347 let full_regex = regexps.join(&b'|');
347 let full_regex = regexps.join(&b'|');
348
348
349 let matcher = re_matcher(&full_regex)?;
349 let matcher = re_matcher(&full_regex)?;
350 let func = Box::new(move |filename: &HgPath| matcher(filename));
350 let func = Box::new(move |filename: &HgPath| matcher(filename));
351
351
352 Ok((full_regex, func))
352 Ok((full_regex, func))
353 }
353 }
354
354
355 /// Returns roots and directories corresponding to each pattern.
355 /// Returns roots and directories corresponding to each pattern.
356 ///
356 ///
357 /// This calculates the roots and directories exactly matching the patterns and
357 /// This calculates the roots and directories exactly matching the patterns and
358 /// returns a tuple of (roots, dirs). It does not return other directories
358 /// returns a tuple of (roots, dirs). It does not return other directories
359 /// which may also need to be considered, like the parent directories.
359 /// which may also need to be considered, like the parent directories.
360 fn roots_and_dirs(
360 fn roots_and_dirs(
361 ignore_patterns: &[IgnorePattern],
361 ignore_patterns: &[IgnorePattern],
362 ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
362 ) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
363 let mut roots = Vec::new();
363 let mut roots = Vec::new();
364 let mut dirs = Vec::new();
364 let mut dirs = Vec::new();
365
365
366 for ignore_pattern in ignore_patterns {
366 for ignore_pattern in ignore_patterns {
367 let IgnorePattern {
367 let IgnorePattern {
368 syntax, pattern, ..
368 syntax, pattern, ..
369 } = ignore_pattern;
369 } = ignore_pattern;
370 match syntax {
370 match syntax {
371 PatternSyntax::RootGlob | PatternSyntax::Glob => {
371 PatternSyntax::RootGlob | PatternSyntax::Glob => {
372 let mut root = vec![];
372 let mut root = vec![];
373
373
374 for p in pattern.split(|c| *c == b'/') {
374 for p in pattern.split(|c| *c == b'/') {
375 if p.iter().any(|c| match *c {
375 if p.iter().any(|c| match *c {
376 b'[' | b'{' | b'*' | b'?' => true,
376 b'[' | b'{' | b'*' | b'?' => true,
377 _ => false,
377 _ => false,
378 }) {
378 }) {
379 break;
379 break;
380 }
380 }
381 root.push(HgPathBuf::from_bytes(p));
381 root.push(HgPathBuf::from_bytes(p));
382 }
382 }
383 let buf =
383 let buf =
384 root.iter().fold(HgPathBuf::new(), |acc, r| acc.join(r));
384 root.iter().fold(HgPathBuf::new(), |acc, r| acc.join(r));
385 roots.push(buf);
385 roots.push(buf);
386 }
386 }
387 PatternSyntax::Path | PatternSyntax::RelPath => {
387 PatternSyntax::Path | PatternSyntax::RelPath => {
388 let pat = HgPath::new(if pattern == b"." {
388 let pat = HgPath::new(if pattern == b"." {
389 &[] as &[u8]
389 &[] as &[u8]
390 } else {
390 } else {
391 pattern
391 pattern
392 });
392 });
393 roots.push(pat.to_owned());
393 roots.push(pat.to_owned());
394 }
394 }
395 PatternSyntax::RootFiles => {
395 PatternSyntax::RootFiles => {
396 let pat = if pattern == b"." {
396 let pat = if pattern == b"." {
397 &[] as &[u8]
397 &[] as &[u8]
398 } else {
398 } else {
399 pattern
399 pattern
400 };
400 };
401 dirs.push(HgPathBuf::from_bytes(pat));
401 dirs.push(HgPathBuf::from_bytes(pat));
402 }
402 }
403 _ => {
403 _ => {
404 roots.push(HgPathBuf::new());
404 roots.push(HgPathBuf::new());
405 }
405 }
406 }
406 }
407 }
407 }
408 (roots, dirs)
408 (roots, dirs)
409 }
409 }
410
410
411 /// Paths extracted from patterns
411 /// Paths extracted from patterns
412 #[derive(Debug, PartialEq)]
412 #[derive(Debug, PartialEq)]
413 struct RootsDirsAndParents {
413 struct RootsDirsAndParents {
414 /// Directories to match recursively
414 /// Directories to match recursively
415 pub roots: HashSet<HgPathBuf>,
415 pub roots: HashSet<HgPathBuf>,
416 /// Directories to match non-recursively
416 /// Directories to match non-recursively
417 pub dirs: HashSet<HgPathBuf>,
417 pub dirs: HashSet<HgPathBuf>,
418 /// Implicitly required directories to go to items in either roots or dirs
418 /// Implicitly required directories to go to items in either roots or dirs
419 pub parents: HashSet<HgPathBuf>,
419 pub parents: HashSet<HgPathBuf>,
420 }
420 }
421
421
422 /// Extract roots, dirs and parents from patterns.
422 /// Extract roots, dirs and parents from patterns.
423 fn roots_dirs_and_parents(
423 fn roots_dirs_and_parents(
424 ignore_patterns: &[IgnorePattern],
424 ignore_patterns: &[IgnorePattern],
425 ) -> PatternResult<RootsDirsAndParents> {
425 ) -> PatternResult<RootsDirsAndParents> {
426 let (roots, dirs) = roots_and_dirs(ignore_patterns);
426 let (roots, dirs) = roots_and_dirs(ignore_patterns);
427
427
428 let mut parents = HashSet::new();
428 let mut parents = HashSet::new();
429
429
430 parents.extend(
430 parents.extend(
431 DirsMultiset::from_manifest(&dirs)
431 DirsMultiset::from_manifest(&dirs)
432 .map_err(|e| match e {
432 .map_err(|e| match e {
433 DirstateMapError::InvalidPath(e) => e,
433 DirstateMapError::InvalidPath(e) => e,
434 _ => unreachable!(),
434 _ => unreachable!(),
435 })?
435 })?
436 .iter()
436 .iter()
437 .map(|k| k.to_owned()),
437 .map(|k| k.to_owned()),
438 );
438 );
439 parents.extend(
439 parents.extend(
440 DirsMultiset::from_manifest(&roots)
440 DirsMultiset::from_manifest(&roots)
441 .map_err(|e| match e {
441 .map_err(|e| match e {
442 DirstateMapError::InvalidPath(e) => e,
442 DirstateMapError::InvalidPath(e) => e,
443 _ => unreachable!(),
443 _ => unreachable!(),
444 })?
444 })?
445 .iter()
445 .iter()
446 .map(|k| k.to_owned()),
446 .map(|k| k.to_owned()),
447 );
447 );
448
448
449 Ok(RootsDirsAndParents {
449 Ok(RootsDirsAndParents {
450 roots: HashSet::from_iter(roots),
450 roots: HashSet::from_iter(roots),
451 dirs: HashSet::from_iter(dirs),
451 dirs: HashSet::from_iter(dirs),
452 parents,
452 parents,
453 })
453 })
454 }
454 }
455
455
456 /// Returns a function that checks whether a given file (in the general sense)
456 /// Returns a function that checks whether a given file (in the general sense)
457 /// should be matched.
457 /// should be matched.
458 fn build_match<'a, 'b>(
458 fn build_match<'a, 'b>(
459 ignore_patterns: &'a [IgnorePattern],
459 ignore_patterns: &'a [IgnorePattern],
460 root_dir: impl AsRef<Path>,
460 root_dir: impl AsRef<Path>,
461 ) -> PatternResult<(
461 ) -> PatternResult<(
462 Vec<u8>,
462 Vec<u8>,
463 Box<dyn Fn(&HgPath) -> bool + 'b + Sync>,
463 Box<dyn Fn(&HgPath) -> bool + 'b + Sync>,
464 Vec<PatternFileWarning>,
464 Vec<PatternFileWarning>,
465 )> {
465 )> {
466 let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![];
466 let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![];
467 // For debugging and printing
467 // For debugging and printing
468 let mut patterns = vec![];
468 let mut patterns = vec![];
469 let mut all_warnings = vec![];
469 let mut all_warnings = vec![];
470
470
471 let (subincludes, ignore_patterns) =
471 let (subincludes, ignore_patterns) =
472 filter_subincludes(ignore_patterns, root_dir)?;
472 filter_subincludes(ignore_patterns, root_dir)?;
473
473
474 if !subincludes.is_empty() {
474 if !subincludes.is_empty() {
475 // Build prefix-based matcher functions for subincludes
475 // Build prefix-based matcher functions for subincludes
476 let mut submatchers = FastHashMap::default();
476 let mut submatchers = FastHashMap::default();
477 let mut prefixes = vec![];
477 let mut prefixes = vec![];
478
478
479 for SubInclude { prefix, root, path } in subincludes.into_iter() {
479 for SubInclude { prefix, root, path } in subincludes.into_iter() {
480 let (match_fn, warnings) = get_ignore_function(&[path], root)?;
480 let (match_fn, warnings) = get_ignore_function(&[path], root)?;
481 all_warnings.extend(warnings);
481 all_warnings.extend(warnings);
482 prefixes.push(prefix.to_owned());
482 prefixes.push(prefix.to_owned());
483 submatchers.insert(prefix.to_owned(), match_fn);
483 submatchers.insert(prefix.to_owned(), match_fn);
484 }
484 }
485
485
486 let match_subinclude = move |filename: &HgPath| {
486 let match_subinclude = move |filename: &HgPath| {
487 for prefix in prefixes.iter() {
487 for prefix in prefixes.iter() {
488 if let Some(rel) = filename.relative_to(prefix) {
488 if let Some(rel) = filename.relative_to(prefix) {
489 if (submatchers.get(prefix).unwrap())(rel) {
489 if (submatchers.get(prefix).unwrap())(rel) {
490 return true;
490 return true;
491 }
491 }
492 }
492 }
493 }
493 }
494 false
494 false
495 };
495 };
496
496
497 match_funcs.push(Box::new(match_subinclude));
497 match_funcs.push(Box::new(match_subinclude));
498 }
498 }
499
499
500 if !ignore_patterns.is_empty() {
500 if !ignore_patterns.is_empty() {
501 // Either do dumb matching if all patterns are rootfiles, or match
501 // Either do dumb matching if all patterns are rootfiles, or match
502 // with a regex.
502 // with a regex.
503 if ignore_patterns
503 if ignore_patterns
504 .iter()
504 .iter()
505 .all(|k| k.syntax == PatternSyntax::RootFiles)
505 .all(|k| k.syntax == PatternSyntax::RootFiles)
506 {
506 {
507 let dirs: HashSet<_> = ignore_patterns
507 let dirs: HashSet<_> = ignore_patterns
508 .iter()
508 .iter()
509 .map(|k| k.pattern.to_owned())
509 .map(|k| k.pattern.to_owned())
510 .collect();
510 .collect();
511 let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
511 let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
512
512
513 let match_func = move |path: &HgPath| -> bool {
513 let match_func = move |path: &HgPath| -> bool {
514 let path = path.as_bytes();
514 let path = path.as_bytes();
515 let i = path.iter().rfind(|a| **a == b'/');
515 let i = path.iter().rfind(|a| **a == b'/');
516 let dir = if let Some(i) = i {
516 let dir = if let Some(i) = i {
517 &path[..*i as usize]
517 &path[..*i as usize]
518 } else {
518 } else {
519 b"."
519 b"."
520 };
520 };
521 dirs.contains(dir.deref())
521 dirs.contains(dir.deref())
522 };
522 };
523 match_funcs.push(Box::new(match_func));
523 match_funcs.push(Box::new(match_func));
524
524
525 patterns.extend(b"rootfilesin: ");
525 patterns.extend(b"rootfilesin: ");
526 dirs_vec.sort();
526 dirs_vec.sort();
527 patterns.extend(dirs_vec.escaped_bytes());
527 patterns.extend(dirs_vec.escaped_bytes());
528 } else {
528 } else {
529 let (new_re, match_func) = build_regex_match(&ignore_patterns)?;
529 let (new_re, match_func) = build_regex_match(&ignore_patterns)?;
530 patterns = new_re;
530 patterns = new_re;
531 match_funcs.push(match_func)
531 match_funcs.push(match_func)
532 }
532 }
533 }
533 }
534
534
535 Ok(if match_funcs.len() == 1 {
535 Ok(if match_funcs.len() == 1 {
536 (patterns, match_funcs.remove(0), all_warnings)
536 (patterns, match_funcs.remove(0), all_warnings)
537 } else {
537 } else {
538 (
538 (
539 patterns,
539 patterns,
540 Box::new(move |f: &HgPath| -> bool {
540 Box::new(move |f: &HgPath| -> bool {
541 match_funcs.iter().any(|match_func| match_func(f))
541 match_funcs.iter().any(|match_func| match_func(f))
542 }),
542 }),
543 all_warnings,
543 all_warnings,
544 )
544 )
545 })
545 })
546 }
546 }
547
547
548 /// Parses all "ignore" files with their recursive includes and returns a
548 /// Parses all "ignore" files with their recursive includes and returns a
549 /// function that checks whether a given file (in the general sense) should be
549 /// function that checks whether a given file (in the general sense) should be
550 /// ignored.
550 /// ignored.
551 pub fn get_ignore_function<'a>(
551 pub fn get_ignore_function<'a>(
552 all_pattern_files: &[impl AsRef<Path>],
552 all_pattern_files: &[impl AsRef<Path>],
553 root_dir: impl AsRef<Path>,
553 root_dir: impl AsRef<Path>,
554 ) -> PatternResult<(
554 ) -> PatternResult<(
555 impl for<'r> Fn(&'r HgPath) -> bool + Sync,
555 impl for<'r> Fn(&'r HgPath) -> bool + Sync,
556 Vec<PatternFileWarning>,
556 Vec<PatternFileWarning>,
557 )> {
557 )> {
558 let mut all_patterns = vec![];
558 let mut all_patterns = vec![];
559 let mut all_warnings = vec![];
559 let mut all_warnings = vec![];
560
560
561 for pattern_file in all_pattern_files.into_iter() {
561 for pattern_file in all_pattern_files.into_iter() {
562 let (patterns, warnings) =
562 let (patterns, warnings) =
563 get_patterns_from_file(pattern_file, &root_dir)?;
563 get_patterns_from_file(pattern_file, &root_dir)?;
564
564
565 all_patterns.extend(patterns);
565 all_patterns.extend(patterns);
566 all_warnings.extend(warnings);
566 all_warnings.extend(warnings);
567 }
567 }
568 let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?;
568 let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?;
569 all_warnings.extend(warnings);
569 all_warnings.extend(warnings);
570 Ok((move |path: &HgPath| matcher.matches(path), all_warnings))
570 Ok((move |path: &HgPath| matcher.matches(path), all_warnings))
571 }
571 }
572
572
573 impl<'a> IncludeMatcher<'a> {
573 impl<'a> IncludeMatcher<'a> {
574 pub fn new(
574 pub fn new(
575 ignore_patterns: Vec<IgnorePattern>,
575 ignore_patterns: Vec<IgnorePattern>,
576 root_dir: impl AsRef<Path>,
576 root_dir: impl AsRef<Path>,
577 ) -> PatternResult<(Self, Vec<PatternFileWarning>)> {
577 ) -> PatternResult<(Self, Vec<PatternFileWarning>)> {
578 let (patterns, match_fn, warnings) =
578 let (patterns, match_fn, warnings) =
579 build_match(&ignore_patterns, root_dir)?;
579 build_match(&ignore_patterns, root_dir)?;
580 let RootsDirsAndParents {
580 let RootsDirsAndParents {
581 roots,
581 roots,
582 dirs,
582 dirs,
583 parents,
583 parents,
584 } = roots_dirs_and_parents(&ignore_patterns)?;
584 } = roots_dirs_and_parents(&ignore_patterns)?;
585
585
586 let prefix = ignore_patterns.iter().any(|k| match k.syntax {
586 let prefix = ignore_patterns.iter().any(|k| match k.syntax {
587 PatternSyntax::Path | PatternSyntax::RelPath => true,
587 PatternSyntax::Path | PatternSyntax::RelPath => true,
588 _ => false,
588 _ => false,
589 });
589 });
590
590
591 Ok((
591 Ok((
592 Self {
592 Self {
593 patterns,
593 patterns,
594 match_fn,
594 match_fn,
595 prefix,
595 prefix,
596 roots,
596 roots,
597 dirs,
597 dirs,
598 parents,
598 parents,
599 },
599 },
600 warnings,
600 warnings,
601 ))
601 ))
602 }
602 }
603
603
604 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
604 fn get_all_parents_children(&self) -> DirsChildrenMultiset {
605 // TODO cache
605 // TODO cache
606 let thing = self
606 let thing = self
607 .dirs
607 .dirs
608 .iter()
608 .iter()
609 .chain(self.roots.iter())
609 .chain(self.roots.iter())
610 .chain(self.parents.iter());
610 .chain(self.parents.iter());
611 DirsChildrenMultiset::new(thing, Some(&self.parents))
611 DirsChildrenMultiset::new(thing, Some(&self.parents))
612 }
612 }
613 }
613 }
614
614
615 impl<'a> Display for IncludeMatcher<'a> {
615 impl<'a> Display for IncludeMatcher<'a> {
616 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
616 fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
617 write!(
617 write!(
618 f,
618 f,
619 "IncludeMatcher(includes='{}')",
619 "IncludeMatcher(includes='{}')",
620 String::from_utf8_lossy(&self.patterns.escaped_bytes())
620 String::from_utf8_lossy(&self.patterns.escaped_bytes())
621 )
621 )
622 }
622 }
623 }
623 }
624
624
625 #[cfg(test)]
625 #[cfg(test)]
626 mod tests {
626 mod tests {
627 use super::*;
627 use super::*;
628 use pretty_assertions::assert_eq;
628 use pretty_assertions::assert_eq;
629 use std::path::Path;
629 use std::path::Path;
630
630
631 #[test]
631 #[test]
632 fn test_roots_and_dirs() {
632 fn test_roots_and_dirs() {
633 let pats = vec![
633 let pats = vec![
634 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
634 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
635 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
635 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
636 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
636 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
637 ];
637 ];
638 let (roots, dirs) = roots_and_dirs(&pats);
638 let (roots, dirs) = roots_and_dirs(&pats);
639
639
640 assert_eq!(
640 assert_eq!(
641 roots,
641 roots,
642 vec!(
642 vec!(
643 HgPathBuf::from_bytes(b"g/h"),
643 HgPathBuf::from_bytes(b"g/h"),
644 HgPathBuf::from_bytes(b"g/h"),
644 HgPathBuf::from_bytes(b"g/h"),
645 HgPathBuf::new()
645 HgPathBuf::new()
646 ),
646 ),
647 );
647 );
648 assert_eq!(dirs, vec!());
648 assert_eq!(dirs, vec!());
649 }
649 }
650
650
651 #[test]
651 #[test]
652 fn test_roots_dirs_and_parents() {
652 fn test_roots_dirs_and_parents() {
653 let pats = vec![
653 let pats = vec![
654 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
654 IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
655 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
655 IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
656 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
656 IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
657 ];
657 ];
658
658
659 let mut roots = HashSet::new();
659 let mut roots = HashSet::new();
660 roots.insert(HgPathBuf::from_bytes(b"g/h"));
660 roots.insert(HgPathBuf::from_bytes(b"g/h"));
661 roots.insert(HgPathBuf::new());
661 roots.insert(HgPathBuf::new());
662
662
663 let dirs = HashSet::new();
663 let dirs = HashSet::new();
664
664
665 let mut parents = HashSet::new();
665 let mut parents = HashSet::new();
666 parents.insert(HgPathBuf::new());
666 parents.insert(HgPathBuf::new());
667 parents.insert(HgPathBuf::from_bytes(b"g"));
667 parents.insert(HgPathBuf::from_bytes(b"g"));
668
668
669 assert_eq!(
669 assert_eq!(
670 roots_dirs_and_parents(&pats).unwrap(),
670 roots_dirs_and_parents(&pats).unwrap(),
671 RootsDirsAndParents {roots, dirs, parents}
671 RootsDirsAndParents {
672 roots,
673 dirs,
674 parents
675 }
672 );
676 );
673 }
677 }
674
678
675 #[test]
679 #[test]
676 fn test_filematcher_visit_children_set() {
680 fn test_filematcher_visit_children_set() {
677 // Visitchildrenset
681 // Visitchildrenset
678 let files = vec![HgPath::new(b"dir/subdir/foo.txt")];
682 let files = vec![HgPath::new(b"dir/subdir/foo.txt")];
679 let matcher = FileMatcher::new(&files).unwrap();
683 let matcher = FileMatcher::new(&files).unwrap();
680
684
681 let mut set = HashSet::new();
685 let mut set = HashSet::new();
682 set.insert(HgPath::new(b"dir"));
686 set.insert(HgPath::new(b"dir"));
683 assert_eq!(
687 assert_eq!(
684 matcher.visit_children_set(HgPath::new(b"")),
688 matcher.visit_children_set(HgPath::new(b"")),
685 VisitChildrenSet::Set(set)
689 VisitChildrenSet::Set(set)
686 );
690 );
687
691
688 let mut set = HashSet::new();
692 let mut set = HashSet::new();
689 set.insert(HgPath::new(b"subdir"));
693 set.insert(HgPath::new(b"subdir"));
690 assert_eq!(
694 assert_eq!(
691 matcher.visit_children_set(HgPath::new(b"dir")),
695 matcher.visit_children_set(HgPath::new(b"dir")),
692 VisitChildrenSet::Set(set)
696 VisitChildrenSet::Set(set)
693 );
697 );
694
698
695 let mut set = HashSet::new();
699 let mut set = HashSet::new();
696 set.insert(HgPath::new(b"foo.txt"));
700 set.insert(HgPath::new(b"foo.txt"));
697 assert_eq!(
701 assert_eq!(
698 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
702 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
699 VisitChildrenSet::Set(set)
703 VisitChildrenSet::Set(set)
700 );
704 );
701
705
702 assert_eq!(
706 assert_eq!(
703 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
707 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
704 VisitChildrenSet::Empty
708 VisitChildrenSet::Empty
705 );
709 );
706 assert_eq!(
710 assert_eq!(
707 matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
711 matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
708 VisitChildrenSet::Empty
712 VisitChildrenSet::Empty
709 );
713 );
710 assert_eq!(
714 assert_eq!(
711 matcher.visit_children_set(HgPath::new(b"folder")),
715 matcher.visit_children_set(HgPath::new(b"folder")),
712 VisitChildrenSet::Empty
716 VisitChildrenSet::Empty
713 );
717 );
714 }
718 }
715
719
716 #[test]
720 #[test]
717 fn test_filematcher_visit_children_set_files_and_dirs() {
721 fn test_filematcher_visit_children_set_files_and_dirs() {
718 let files = vec![
722 let files = vec![
719 HgPath::new(b"rootfile.txt"),
723 HgPath::new(b"rootfile.txt"),
720 HgPath::new(b"a/file1.txt"),
724 HgPath::new(b"a/file1.txt"),
721 HgPath::new(b"a/b/file2.txt"),
725 HgPath::new(b"a/b/file2.txt"),
722 // No file in a/b/c
726 // No file in a/b/c
723 HgPath::new(b"a/b/c/d/file4.txt"),
727 HgPath::new(b"a/b/c/d/file4.txt"),
724 ];
728 ];
725 let matcher = FileMatcher::new(&files).unwrap();
729 let matcher = FileMatcher::new(&files).unwrap();
726
730
727 let mut set = HashSet::new();
731 let mut set = HashSet::new();
728 set.insert(HgPath::new(b"a"));
732 set.insert(HgPath::new(b"a"));
729 set.insert(HgPath::new(b"rootfile.txt"));
733 set.insert(HgPath::new(b"rootfile.txt"));
730 assert_eq!(
734 assert_eq!(
731 matcher.visit_children_set(HgPath::new(b"")),
735 matcher.visit_children_set(HgPath::new(b"")),
732 VisitChildrenSet::Set(set)
736 VisitChildrenSet::Set(set)
733 );
737 );
734
738
735 let mut set = HashSet::new();
739 let mut set = HashSet::new();
736 set.insert(HgPath::new(b"b"));
740 set.insert(HgPath::new(b"b"));
737 set.insert(HgPath::new(b"file1.txt"));
741 set.insert(HgPath::new(b"file1.txt"));
738 assert_eq!(
742 assert_eq!(
739 matcher.visit_children_set(HgPath::new(b"a")),
743 matcher.visit_children_set(HgPath::new(b"a")),
740 VisitChildrenSet::Set(set)
744 VisitChildrenSet::Set(set)
741 );
745 );
742
746
743 let mut set = HashSet::new();
747 let mut set = HashSet::new();
744 set.insert(HgPath::new(b"c"));
748 set.insert(HgPath::new(b"c"));
745 set.insert(HgPath::new(b"file2.txt"));
749 set.insert(HgPath::new(b"file2.txt"));
746 assert_eq!(
750 assert_eq!(
747 matcher.visit_children_set(HgPath::new(b"a/b")),
751 matcher.visit_children_set(HgPath::new(b"a/b")),
748 VisitChildrenSet::Set(set)
752 VisitChildrenSet::Set(set)
749 );
753 );
750
754
751 let mut set = HashSet::new();
755 let mut set = HashSet::new();
752 set.insert(HgPath::new(b"d"));
756 set.insert(HgPath::new(b"d"));
753 assert_eq!(
757 assert_eq!(
754 matcher.visit_children_set(HgPath::new(b"a/b/c")),
758 matcher.visit_children_set(HgPath::new(b"a/b/c")),
755 VisitChildrenSet::Set(set)
759 VisitChildrenSet::Set(set)
756 );
760 );
757 let mut set = HashSet::new();
761 let mut set = HashSet::new();
758 set.insert(HgPath::new(b"file4.txt"));
762 set.insert(HgPath::new(b"file4.txt"));
759 assert_eq!(
763 assert_eq!(
760 matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
764 matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
761 VisitChildrenSet::Set(set)
765 VisitChildrenSet::Set(set)
762 );
766 );
763
767
764 assert_eq!(
768 assert_eq!(
765 matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
769 matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
766 VisitChildrenSet::Empty
770 VisitChildrenSet::Empty
767 );
771 );
768 assert_eq!(
772 assert_eq!(
769 matcher.visit_children_set(HgPath::new(b"folder")),
773 matcher.visit_children_set(HgPath::new(b"folder")),
770 VisitChildrenSet::Empty
774 VisitChildrenSet::Empty
771 );
775 );
772 }
776 }
773
777
774 #[cfg(feature = "with-re2")]
778 #[cfg(feature = "with-re2")]
775 #[test]
779 #[test]
776 fn test_includematcher() {
780 fn test_includematcher() {
777 // VisitchildrensetPrefix
781 // VisitchildrensetPrefix
778 let (matcher, _) = IncludeMatcher::new(
782 let (matcher, _) = IncludeMatcher::new(
779 vec![IgnorePattern::new(
783 vec![IgnorePattern::new(
780 PatternSyntax::RelPath,
784 PatternSyntax::RelPath,
781 b"dir/subdir",
785 b"dir/subdir",
782 Path::new(""),
786 Path::new(""),
783 )],
787 )],
784 "",
788 "",
785 )
789 )
786 .unwrap();
790 .unwrap();
787
791
788 let mut set = HashSet::new();
792 let mut set = HashSet::new();
789 set.insert(HgPath::new(b"dir"));
793 set.insert(HgPath::new(b"dir"));
790 assert_eq!(
794 assert_eq!(
791 matcher.visit_children_set(HgPath::new(b"")),
795 matcher.visit_children_set(HgPath::new(b"")),
792 VisitChildrenSet::Set(set)
796 VisitChildrenSet::Set(set)
793 );
797 );
794
798
795 let mut set = HashSet::new();
799 let mut set = HashSet::new();
796 set.insert(HgPath::new(b"subdir"));
800 set.insert(HgPath::new(b"subdir"));
797 assert_eq!(
801 assert_eq!(
798 matcher.visit_children_set(HgPath::new(b"dir")),
802 matcher.visit_children_set(HgPath::new(b"dir")),
799 VisitChildrenSet::Set(set)
803 VisitChildrenSet::Set(set)
800 );
804 );
801 assert_eq!(
805 assert_eq!(
802 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
806 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
803 VisitChildrenSet::Recursive
807 VisitChildrenSet::Recursive
804 );
808 );
805 // OPT: This should probably be 'all' if its parent is?
809 // OPT: This should probably be 'all' if its parent is?
806 assert_eq!(
810 assert_eq!(
807 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
811 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
808 VisitChildrenSet::This
812 VisitChildrenSet::This
809 );
813 );
810 assert_eq!(
814 assert_eq!(
811 matcher.visit_children_set(HgPath::new(b"folder")),
815 matcher.visit_children_set(HgPath::new(b"folder")),
812 VisitChildrenSet::Empty
816 VisitChildrenSet::Empty
813 );
817 );
814
818
815 // VisitchildrensetRootfilesin
819 // VisitchildrensetRootfilesin
816 let (matcher, _) = IncludeMatcher::new(
820 let (matcher, _) = IncludeMatcher::new(
817 vec![IgnorePattern::new(
821 vec![IgnorePattern::new(
818 PatternSyntax::RootFiles,
822 PatternSyntax::RootFiles,
819 b"dir/subdir",
823 b"dir/subdir",
820 Path::new(""),
824 Path::new(""),
821 )],
825 )],
822 "",
826 "",
823 )
827 )
824 .unwrap();
828 .unwrap();
825
829
826 let mut set = HashSet::new();
830 let mut set = HashSet::new();
827 set.insert(HgPath::new(b"dir"));
831 set.insert(HgPath::new(b"dir"));
828 assert_eq!(
832 assert_eq!(
829 matcher.visit_children_set(HgPath::new(b"")),
833 matcher.visit_children_set(HgPath::new(b"")),
830 VisitChildrenSet::Set(set)
834 VisitChildrenSet::Set(set)
831 );
835 );
832
836
833 let mut set = HashSet::new();
837 let mut set = HashSet::new();
834 set.insert(HgPath::new(b"subdir"));
838 set.insert(HgPath::new(b"subdir"));
835 assert_eq!(
839 assert_eq!(
836 matcher.visit_children_set(HgPath::new(b"dir")),
840 matcher.visit_children_set(HgPath::new(b"dir")),
837 VisitChildrenSet::Set(set)
841 VisitChildrenSet::Set(set)
838 );
842 );
839
843
840 assert_eq!(
844 assert_eq!(
841 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
845 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
842 VisitChildrenSet::This
846 VisitChildrenSet::This
843 );
847 );
844 assert_eq!(
848 assert_eq!(
845 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
849 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
846 VisitChildrenSet::Empty
850 VisitChildrenSet::Empty
847 );
851 );
848 assert_eq!(
852 assert_eq!(
849 matcher.visit_children_set(HgPath::new(b"folder")),
853 matcher.visit_children_set(HgPath::new(b"folder")),
850 VisitChildrenSet::Empty
854 VisitChildrenSet::Empty
851 );
855 );
852
856
853 // VisitchildrensetGlob
857 // VisitchildrensetGlob
854 let (matcher, _) = IncludeMatcher::new(
858 let (matcher, _) = IncludeMatcher::new(
855 vec![IgnorePattern::new(
859 vec![IgnorePattern::new(
856 PatternSyntax::Glob,
860 PatternSyntax::Glob,
857 b"dir/z*",
861 b"dir/z*",
858 Path::new(""),
862 Path::new(""),
859 )],
863 )],
860 "",
864 "",
861 )
865 )
862 .unwrap();
866 .unwrap();
863
867
864 let mut set = HashSet::new();
868 let mut set = HashSet::new();
865 set.insert(HgPath::new(b"dir"));
869 set.insert(HgPath::new(b"dir"));
866 assert_eq!(
870 assert_eq!(
867 matcher.visit_children_set(HgPath::new(b"")),
871 matcher.visit_children_set(HgPath::new(b"")),
868 VisitChildrenSet::Set(set)
872 VisitChildrenSet::Set(set)
869 );
873 );
870 assert_eq!(
874 assert_eq!(
871 matcher.visit_children_set(HgPath::new(b"folder")),
875 matcher.visit_children_set(HgPath::new(b"folder")),
872 VisitChildrenSet::Empty
876 VisitChildrenSet::Empty
873 );
877 );
874 assert_eq!(
878 assert_eq!(
875 matcher.visit_children_set(HgPath::new(b"dir")),
879 matcher.visit_children_set(HgPath::new(b"dir")),
876 VisitChildrenSet::This
880 VisitChildrenSet::This
877 );
881 );
878 // OPT: these should probably be set().
882 // OPT: these should probably be set().
879 assert_eq!(
883 assert_eq!(
880 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
884 matcher.visit_children_set(HgPath::new(b"dir/subdir")),
881 VisitChildrenSet::This
885 VisitChildrenSet::This
882 );
886 );
883 assert_eq!(
887 assert_eq!(
884 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
888 matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
885 VisitChildrenSet::This
889 VisitChildrenSet::This
886 );
890 );
887 }
891 }
888 }
892 }
General Comments 0
You need to be logged in to leave comments. Login now