##// END OF EJS Templates
rust-filepatterns: allow overriding default syntax...
Raphaël Gomès -
r50377:5fbdd888 default
parent child Browse files
Show More
@@ -1,687 +1,704 b''
1 // filepatterns.rs
1 // filepatterns.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Handling of Mercurial-specific patterns.
8 //! Handling of Mercurial-specific patterns.
9
9
10 use crate::{
10 use crate::{
11 utils::{
11 utils::{
12 files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
12 files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
13 hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
13 hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
14 SliceExt,
14 SliceExt,
15 },
15 },
16 FastHashMap, PatternError,
16 FastHashMap, PatternError,
17 };
17 };
18 use lazy_static::lazy_static;
18 use lazy_static::lazy_static;
19 use regex::bytes::{NoExpand, Regex};
19 use regex::bytes::{NoExpand, Regex};
20 use std::ops::Deref;
20 use std::ops::Deref;
21 use std::path::{Path, PathBuf};
21 use std::path::{Path, PathBuf};
22 use std::vec::Vec;
22 use std::vec::Vec;
23
23
24 lazy_static! {
24 lazy_static! {
25 static ref RE_ESCAPE: Vec<Vec<u8>> = {
25 static ref RE_ESCAPE: Vec<Vec<u8>> = {
26 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
26 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
27 let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
27 let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
28 for byte in to_escape {
28 for byte in to_escape {
29 v[*byte as usize].insert(0, b'\\');
29 v[*byte as usize].insert(0, b'\\');
30 }
30 }
31 v
31 v
32 };
32 };
33 }
33 }
34
34
35 /// These are matched in order
35 /// These are matched in order
36 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
36 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
37 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
37 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
38
38
39 /// Appended to the regexp of globs
39 /// Appended to the regexp of globs
40 const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";
40 const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";
41
41
42 #[derive(Debug, Clone, PartialEq, Eq)]
42 #[derive(Debug, Clone, PartialEq, Eq)]
43 pub enum PatternSyntax {
43 pub enum PatternSyntax {
44 /// A regular expression
44 /// A regular expression
45 Regexp,
45 Regexp,
46 /// Glob that matches at the front of the path
46 /// Glob that matches at the front of the path
47 RootGlob,
47 RootGlob,
48 /// Glob that matches at any suffix of the path (still anchored at
48 /// Glob that matches at any suffix of the path (still anchored at
49 /// slashes)
49 /// slashes)
50 Glob,
50 Glob,
51 /// a path relative to repository root, which is matched recursively
51 /// a path relative to repository root, which is matched recursively
52 Path,
52 Path,
53 /// A path relative to cwd
53 /// A path relative to cwd
54 RelPath,
54 RelPath,
55 /// an unrooted glob (*.rs matches Rust files in all dirs)
55 /// an unrooted glob (*.rs matches Rust files in all dirs)
56 RelGlob,
56 RelGlob,
57 /// A regexp that needn't match the start of a name
57 /// A regexp that needn't match the start of a name
58 RelRegexp,
58 RelRegexp,
59 /// A path relative to repository root, which is matched non-recursively
59 /// A path relative to repository root, which is matched non-recursively
60 /// (will not match subdirectories)
60 /// (will not match subdirectories)
61 RootFiles,
61 RootFiles,
62 /// A file of patterns to read and include
62 /// A file of patterns to read and include
63 Include,
63 Include,
64 /// A file of patterns to match against files under the same directory
64 /// A file of patterns to match against files under the same directory
65 SubInclude,
65 SubInclude,
66 /// SubInclude with the result of parsing the included file
66 /// SubInclude with the result of parsing the included file
67 ///
67 ///
68 /// Note: there is no ExpandedInclude because that expansion can be done
68 /// Note: there is no ExpandedInclude because that expansion can be done
69 /// in place by replacing the Include pattern by the included patterns.
69 /// in place by replacing the Include pattern by the included patterns.
70 /// SubInclude requires more handling.
70 /// SubInclude requires more handling.
71 ///
71 ///
72 /// Note: `Box` is used to minimize size impact on other enum variants
72 /// Note: `Box` is used to minimize size impact on other enum variants
73 ExpandedSubInclude(Box<SubInclude>),
73 ExpandedSubInclude(Box<SubInclude>),
74 }
74 }
75
75
76 /// Transforms a glob pattern into a regex
76 /// Transforms a glob pattern into a regex
77 fn glob_to_re(pat: &[u8]) -> Vec<u8> {
77 fn glob_to_re(pat: &[u8]) -> Vec<u8> {
78 let mut input = pat;
78 let mut input = pat;
79 let mut res: Vec<u8> = vec![];
79 let mut res: Vec<u8> = vec![];
80 let mut group_depth = 0;
80 let mut group_depth = 0;
81
81
82 while let Some((c, rest)) = input.split_first() {
82 while let Some((c, rest)) = input.split_first() {
83 input = rest;
83 input = rest;
84
84
85 match c {
85 match c {
86 b'*' => {
86 b'*' => {
87 for (source, repl) in GLOB_REPLACEMENTS {
87 for (source, repl) in GLOB_REPLACEMENTS {
88 if let Some(rest) = input.drop_prefix(source) {
88 if let Some(rest) = input.drop_prefix(source) {
89 input = rest;
89 input = rest;
90 res.extend(*repl);
90 res.extend(*repl);
91 break;
91 break;
92 }
92 }
93 }
93 }
94 }
94 }
95 b'?' => res.extend(b"."),
95 b'?' => res.extend(b"."),
96 b'[' => {
96 b'[' => {
97 match input.iter().skip(1).position(|b| *b == b']') {
97 match input.iter().skip(1).position(|b| *b == b']') {
98 None => res.extend(b"\\["),
98 None => res.extend(b"\\["),
99 Some(end) => {
99 Some(end) => {
100 // Account for the one we skipped
100 // Account for the one we skipped
101 let end = end + 1;
101 let end = end + 1;
102
102
103 res.extend(b"[");
103 res.extend(b"[");
104
104
105 for (i, b) in input[..end].iter().enumerate() {
105 for (i, b) in input[..end].iter().enumerate() {
106 if *b == b'!' && i == 0 {
106 if *b == b'!' && i == 0 {
107 res.extend(b"^")
107 res.extend(b"^")
108 } else if *b == b'^' && i == 0 {
108 } else if *b == b'^' && i == 0 {
109 res.extend(b"\\^")
109 res.extend(b"\\^")
110 } else if *b == b'\\' {
110 } else if *b == b'\\' {
111 res.extend(b"\\\\")
111 res.extend(b"\\\\")
112 } else {
112 } else {
113 res.push(*b)
113 res.push(*b)
114 }
114 }
115 }
115 }
116 res.extend(b"]");
116 res.extend(b"]");
117 input = &input[end + 1..];
117 input = &input[end + 1..];
118 }
118 }
119 }
119 }
120 }
120 }
121 b'{' => {
121 b'{' => {
122 group_depth += 1;
122 group_depth += 1;
123 res.extend(b"(?:")
123 res.extend(b"(?:")
124 }
124 }
125 b'}' if group_depth > 0 => {
125 b'}' if group_depth > 0 => {
126 group_depth -= 1;
126 group_depth -= 1;
127 res.extend(b")");
127 res.extend(b")");
128 }
128 }
129 b',' if group_depth > 0 => res.extend(b"|"),
129 b',' if group_depth > 0 => res.extend(b"|"),
130 b'\\' => {
130 b'\\' => {
131 let c = {
131 let c = {
132 if let Some((c, rest)) = input.split_first() {
132 if let Some((c, rest)) = input.split_first() {
133 input = rest;
133 input = rest;
134 c
134 c
135 } else {
135 } else {
136 c
136 c
137 }
137 }
138 };
138 };
139 res.extend(&RE_ESCAPE[*c as usize])
139 res.extend(&RE_ESCAPE[*c as usize])
140 }
140 }
141 _ => res.extend(&RE_ESCAPE[*c as usize]),
141 _ => res.extend(&RE_ESCAPE[*c as usize]),
142 }
142 }
143 }
143 }
144 res
144 res
145 }
145 }
146
146
147 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
147 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
148 pattern
148 pattern
149 .iter()
149 .iter()
150 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
150 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
151 .collect()
151 .collect()
152 }
152 }
153
153
154 pub fn parse_pattern_syntax(
154 pub fn parse_pattern_syntax(
155 kind: &[u8],
155 kind: &[u8],
156 ) -> Result<PatternSyntax, PatternError> {
156 ) -> Result<PatternSyntax, PatternError> {
157 match kind {
157 match kind {
158 b"re:" => Ok(PatternSyntax::Regexp),
158 b"re:" => Ok(PatternSyntax::Regexp),
159 b"path:" => Ok(PatternSyntax::Path),
159 b"path:" => Ok(PatternSyntax::Path),
160 b"relpath:" => Ok(PatternSyntax::RelPath),
160 b"relpath:" => Ok(PatternSyntax::RelPath),
161 b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
161 b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
162 b"relglob:" => Ok(PatternSyntax::RelGlob),
162 b"relglob:" => Ok(PatternSyntax::RelGlob),
163 b"relre:" => Ok(PatternSyntax::RelRegexp),
163 b"relre:" => Ok(PatternSyntax::RelRegexp),
164 b"glob:" => Ok(PatternSyntax::Glob),
164 b"glob:" => Ok(PatternSyntax::Glob),
165 b"rootglob:" => Ok(PatternSyntax::RootGlob),
165 b"rootglob:" => Ok(PatternSyntax::RootGlob),
166 b"include:" => Ok(PatternSyntax::Include),
166 b"include:" => Ok(PatternSyntax::Include),
167 b"subinclude:" => Ok(PatternSyntax::SubInclude),
167 b"subinclude:" => Ok(PatternSyntax::SubInclude),
168 _ => Err(PatternError::UnsupportedSyntax(
168 _ => Err(PatternError::UnsupportedSyntax(
169 String::from_utf8_lossy(kind).to_string(),
169 String::from_utf8_lossy(kind).to_string(),
170 )),
170 )),
171 }
171 }
172 }
172 }
173
173
174 /// Builds the regex that corresponds to the given pattern.
174 /// Builds the regex that corresponds to the given pattern.
175 /// If within a `syntax: regexp` context, returns the pattern,
175 /// If within a `syntax: regexp` context, returns the pattern,
176 /// otherwise, returns the corresponding regex.
176 /// otherwise, returns the corresponding regex.
177 fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> {
177 fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> {
178 let IgnorePattern {
178 let IgnorePattern {
179 syntax, pattern, ..
179 syntax, pattern, ..
180 } = entry;
180 } = entry;
181 if pattern.is_empty() {
181 if pattern.is_empty() {
182 return vec![];
182 return vec![];
183 }
183 }
184 match syntax {
184 match syntax {
185 PatternSyntax::Regexp => pattern.to_owned(),
185 PatternSyntax::Regexp => pattern.to_owned(),
186 PatternSyntax::RelRegexp => {
186 PatternSyntax::RelRegexp => {
187 // The `regex` crate accepts `**` while `re2` and Python's `re`
187 // The `regex` crate accepts `**` while `re2` and Python's `re`
188 // do not. Checking for `*` correctly triggers the same error all
188 // do not. Checking for `*` correctly triggers the same error all
189 // engines.
189 // engines.
190 if pattern[0] == b'^'
190 if pattern[0] == b'^'
191 || pattern[0] == b'*'
191 || pattern[0] == b'*'
192 || pattern.starts_with(b".*")
192 || pattern.starts_with(b".*")
193 {
193 {
194 return pattern.to_owned();
194 return pattern.to_owned();
195 }
195 }
196 [&b".*"[..], pattern].concat()
196 [&b".*"[..], pattern].concat()
197 }
197 }
198 PatternSyntax::Path | PatternSyntax::RelPath => {
198 PatternSyntax::Path | PatternSyntax::RelPath => {
199 if pattern == b"." {
199 if pattern == b"." {
200 return vec![];
200 return vec![];
201 }
201 }
202 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
202 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
203 }
203 }
204 PatternSyntax::RootFiles => {
204 PatternSyntax::RootFiles => {
205 let mut res = if pattern == b"." {
205 let mut res = if pattern == b"." {
206 vec![]
206 vec![]
207 } else {
207 } else {
208 // Pattern is a directory name.
208 // Pattern is a directory name.
209 [escape_pattern(pattern).as_slice(), b"/"].concat()
209 [escape_pattern(pattern).as_slice(), b"/"].concat()
210 };
210 };
211
211
212 // Anything after the pattern must be a non-directory.
212 // Anything after the pattern must be a non-directory.
213 res.extend(b"[^/]+$");
213 res.extend(b"[^/]+$");
214 res
214 res
215 }
215 }
216 PatternSyntax::RelGlob => {
216 PatternSyntax::RelGlob => {
217 let glob_re = glob_to_re(pattern);
217 let glob_re = glob_to_re(pattern);
218 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
218 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
219 [b".*", rest, GLOB_SUFFIX].concat()
219 [b".*", rest, GLOB_SUFFIX].concat()
220 } else {
220 } else {
221 [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat()
221 [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat()
222 }
222 }
223 }
223 }
224 PatternSyntax::Glob | PatternSyntax::RootGlob => {
224 PatternSyntax::Glob | PatternSyntax::RootGlob => {
225 [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
225 [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
226 }
226 }
227 PatternSyntax::Include
227 PatternSyntax::Include
228 | PatternSyntax::SubInclude
228 | PatternSyntax::SubInclude
229 | PatternSyntax::ExpandedSubInclude(_) => unreachable!(),
229 | PatternSyntax::ExpandedSubInclude(_) => unreachable!(),
230 }
230 }
231 }
231 }
232
232
233 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
233 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
234 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
234 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
235
235
236 /// TODO support other platforms
236 /// TODO support other platforms
237 #[cfg(unix)]
237 #[cfg(unix)]
238 pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
238 pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
239 if bytes.is_empty() {
239 if bytes.is_empty() {
240 return b".".to_vec();
240 return b".".to_vec();
241 }
241 }
242 let sep = b'/';
242 let sep = b'/';
243
243
244 let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
244 let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
245 if initial_slashes > 2 {
245 if initial_slashes > 2 {
246 // POSIX allows one or two initial slashes, but treats three or more
246 // POSIX allows one or two initial slashes, but treats three or more
247 // as single slash.
247 // as single slash.
248 initial_slashes = 1;
248 initial_slashes = 1;
249 }
249 }
250 let components = bytes
250 let components = bytes
251 .split(|b| *b == sep)
251 .split(|b| *b == sep)
252 .filter(|c| !(c.is_empty() || c == b"."))
252 .filter(|c| !(c.is_empty() || c == b"."))
253 .fold(vec![], |mut acc, component| {
253 .fold(vec![], |mut acc, component| {
254 if component != b".."
254 if component != b".."
255 || (initial_slashes == 0 && acc.is_empty())
255 || (initial_slashes == 0 && acc.is_empty())
256 || (!acc.is_empty() && acc[acc.len() - 1] == b"..")
256 || (!acc.is_empty() && acc[acc.len() - 1] == b"..")
257 {
257 {
258 acc.push(component)
258 acc.push(component)
259 } else if !acc.is_empty() {
259 } else if !acc.is_empty() {
260 acc.pop();
260 acc.pop();
261 }
261 }
262 acc
262 acc
263 });
263 });
264 let mut new_bytes = components.join(&sep);
264 let mut new_bytes = components.join(&sep);
265
265
266 if initial_slashes > 0 {
266 if initial_slashes > 0 {
267 let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
267 let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
268 buf.extend(new_bytes);
268 buf.extend(new_bytes);
269 new_bytes = buf;
269 new_bytes = buf;
270 }
270 }
271 if new_bytes.is_empty() {
271 if new_bytes.is_empty() {
272 b".".to_vec()
272 b".".to_vec()
273 } else {
273 } else {
274 new_bytes
274 new_bytes
275 }
275 }
276 }
276 }
277
277
278 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
278 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
279 /// that don't need to be transformed into a regex.
279 /// that don't need to be transformed into a regex.
280 pub fn build_single_regex(
280 pub fn build_single_regex(
281 entry: &IgnorePattern,
281 entry: &IgnorePattern,
282 ) -> Result<Option<Vec<u8>>, PatternError> {
282 ) -> Result<Option<Vec<u8>>, PatternError> {
283 let IgnorePattern {
283 let IgnorePattern {
284 pattern, syntax, ..
284 pattern, syntax, ..
285 } = entry;
285 } = entry;
286 let pattern = match syntax {
286 let pattern = match syntax {
287 PatternSyntax::RootGlob
287 PatternSyntax::RootGlob
288 | PatternSyntax::Path
288 | PatternSyntax::Path
289 | PatternSyntax::RelGlob
289 | PatternSyntax::RelGlob
290 | PatternSyntax::RootFiles => normalize_path_bytes(&pattern),
290 | PatternSyntax::RootFiles => normalize_path_bytes(&pattern),
291 PatternSyntax::Include | PatternSyntax::SubInclude => {
291 PatternSyntax::Include | PatternSyntax::SubInclude => {
292 return Err(PatternError::NonRegexPattern(entry.clone()))
292 return Err(PatternError::NonRegexPattern(entry.clone()))
293 }
293 }
294 _ => pattern.to_owned(),
294 _ => pattern.to_owned(),
295 };
295 };
296 if *syntax == PatternSyntax::RootGlob
296 if *syntax == PatternSyntax::RootGlob
297 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
297 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
298 {
298 {
299 Ok(None)
299 Ok(None)
300 } else {
300 } else {
301 let mut entry = entry.clone();
301 let mut entry = entry.clone();
302 entry.pattern = pattern;
302 entry.pattern = pattern;
303 Ok(Some(_build_single_regex(&entry)))
303 Ok(Some(_build_single_regex(&entry)))
304 }
304 }
305 }
305 }
306
306
307 lazy_static! {
307 lazy_static! {
308 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
308 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
309 let mut m = FastHashMap::default();
309 let mut m = FastHashMap::default();
310
310
311 m.insert(b"re".as_ref(), b"relre:".as_ref());
311 m.insert(b"re".as_ref(), b"relre:".as_ref());
312 m.insert(b"regexp".as_ref(), b"relre:".as_ref());
312 m.insert(b"regexp".as_ref(), b"relre:".as_ref());
313 m.insert(b"glob".as_ref(), b"relglob:".as_ref());
313 m.insert(b"glob".as_ref(), b"relglob:".as_ref());
314 m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
314 m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
315 m.insert(b"include".as_ref(), b"include:".as_ref());
315 m.insert(b"include".as_ref(), b"include:".as_ref());
316 m.insert(b"subinclude".as_ref(), b"subinclude:".as_ref());
316 m.insert(b"subinclude".as_ref(), b"subinclude:".as_ref());
317 m
317 m
318 };
318 };
319 }
319 }
320
320
321 #[derive(Debug)]
321 #[derive(Debug)]
322 pub enum PatternFileWarning {
322 pub enum PatternFileWarning {
323 /// (file path, syntax bytes)
323 /// (file path, syntax bytes)
324 InvalidSyntax(PathBuf, Vec<u8>),
324 InvalidSyntax(PathBuf, Vec<u8>),
325 /// File path
325 /// File path
326 NoSuchFile(PathBuf),
326 NoSuchFile(PathBuf),
327 }
327 }
328
328
329 pub fn parse_pattern_file_contents(
329 pub fn parse_pattern_file_contents(
330 lines: &[u8],
330 lines: &[u8],
331 file_path: &Path,
331 file_path: &Path,
332 default_syntax_override: Option<&[u8]>,
332 warn: bool,
333 warn: bool,
333 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
334 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
334 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
335 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
335
336
336 #[allow(clippy::trivial_regex)]
337 #[allow(clippy::trivial_regex)]
337 let comment_escape_regex = Regex::new(r"\\#").unwrap();
338 let comment_escape_regex = Regex::new(r"\\#").unwrap();
338 let mut inputs: Vec<IgnorePattern> = vec![];
339 let mut inputs: Vec<IgnorePattern> = vec![];
339 let mut warnings: Vec<PatternFileWarning> = vec![];
340 let mut warnings: Vec<PatternFileWarning> = vec![];
340
341
341 let mut current_syntax = b"relre:".as_ref();
342 let mut current_syntax =
343 default_syntax_override.unwrap_or(b"relre:".as_ref());
342
344
343 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
345 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
344 let line_number = line_number + 1;
346 let line_number = line_number + 1;
345
347
346 let line_buf;
348 let line_buf;
347 if line.contains(&b'#') {
349 if line.contains(&b'#') {
348 if let Some(cap) = comment_regex.captures(line) {
350 if let Some(cap) = comment_regex.captures(line) {
349 line = &line[..cap.get(1).unwrap().end()]
351 line = &line[..cap.get(1).unwrap().end()]
350 }
352 }
351 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
353 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
352 line = &line_buf;
354 line = &line_buf;
353 }
355 }
354
356
355 let mut line = line.trim_end();
357 let mut line = line.trim_end();
356
358
357 if line.is_empty() {
359 if line.is_empty() {
358 continue;
360 continue;
359 }
361 }
360
362
361 if let Some(syntax) = line.drop_prefix(b"syntax:") {
363 if let Some(syntax) = line.drop_prefix(b"syntax:") {
362 let syntax = syntax.trim();
364 let syntax = syntax.trim();
363
365
364 if let Some(rel_syntax) = SYNTAXES.get(syntax) {
366 if let Some(rel_syntax) = SYNTAXES.get(syntax) {
365 current_syntax = rel_syntax;
367 current_syntax = rel_syntax;
366 } else if warn {
368 } else if warn {
367 warnings.push(PatternFileWarning::InvalidSyntax(
369 warnings.push(PatternFileWarning::InvalidSyntax(
368 file_path.to_owned(),
370 file_path.to_owned(),
369 syntax.to_owned(),
371 syntax.to_owned(),
370 ));
372 ));
371 }
373 }
372 continue;
374 continue;
373 }
375 }
374
376
375 let mut line_syntax: &[u8] = &current_syntax;
377 let mut line_syntax: &[u8] = &current_syntax;
376
378
377 for (s, rels) in SYNTAXES.iter() {
379 for (s, rels) in SYNTAXES.iter() {
378 if let Some(rest) = line.drop_prefix(rels) {
380 if let Some(rest) = line.drop_prefix(rels) {
379 line_syntax = rels;
381 line_syntax = rels;
380 line = rest;
382 line = rest;
381 break;
383 break;
382 }
384 }
383 if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) {
385 if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) {
384 line_syntax = rels;
386 line_syntax = rels;
385 line = rest;
387 line = rest;
386 break;
388 break;
387 }
389 }
388 }
390 }
389
391
390 inputs.push(IgnorePattern::new(
392 inputs.push(IgnorePattern::new(
391 parse_pattern_syntax(&line_syntax).map_err(|e| match e {
393 parse_pattern_syntax(&line_syntax).map_err(|e| match e {
392 PatternError::UnsupportedSyntax(syntax) => {
394 PatternError::UnsupportedSyntax(syntax) => {
393 PatternError::UnsupportedSyntaxInFile(
395 PatternError::UnsupportedSyntaxInFile(
394 syntax,
396 syntax,
395 file_path.to_string_lossy().into(),
397 file_path.to_string_lossy().into(),
396 line_number,
398 line_number,
397 )
399 )
398 }
400 }
399 _ => e,
401 _ => e,
400 })?,
402 })?,
401 &line,
403 &line,
402 file_path,
404 file_path,
403 ));
405 ));
404 }
406 }
405 Ok((inputs, warnings))
407 Ok((inputs, warnings))
406 }
408 }
407
409
408 pub fn read_pattern_file(
410 pub fn read_pattern_file(
409 file_path: &Path,
411 file_path: &Path,
410 warn: bool,
412 warn: bool,
411 inspect_pattern_bytes: &mut impl FnMut(&[u8]),
413 inspect_pattern_bytes: &mut impl FnMut(&[u8]),
412 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
414 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
413 match std::fs::read(file_path) {
415 match std::fs::read(file_path) {
414 Ok(contents) => {
416 Ok(contents) => {
415 inspect_pattern_bytes(&contents);
417 inspect_pattern_bytes(&contents);
416 parse_pattern_file_contents(&contents, file_path, warn)
418 parse_pattern_file_contents(&contents, file_path, None, warn)
417 }
419 }
418 Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok((
420 Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok((
419 vec![],
421 vec![],
420 vec![PatternFileWarning::NoSuchFile(file_path.to_owned())],
422 vec![PatternFileWarning::NoSuchFile(file_path.to_owned())],
421 )),
423 )),
422 Err(e) => Err(e.into()),
424 Err(e) => Err(e.into()),
423 }
425 }
424 }
426 }
425
427
426 /// Represents an entry in an "ignore" file.
428 /// Represents an entry in an "ignore" file.
427 #[derive(Debug, Eq, PartialEq, Clone)]
429 #[derive(Debug, Eq, PartialEq, Clone)]
428 pub struct IgnorePattern {
430 pub struct IgnorePattern {
429 pub syntax: PatternSyntax,
431 pub syntax: PatternSyntax,
430 pub pattern: Vec<u8>,
432 pub pattern: Vec<u8>,
431 pub source: PathBuf,
433 pub source: PathBuf,
432 }
434 }
433
435
434 impl IgnorePattern {
436 impl IgnorePattern {
435 pub fn new(syntax: PatternSyntax, pattern: &[u8], source: &Path) -> Self {
437 pub fn new(syntax: PatternSyntax, pattern: &[u8], source: &Path) -> Self {
436 Self {
438 Self {
437 syntax,
439 syntax,
438 pattern: pattern.to_owned(),
440 pattern: pattern.to_owned(),
439 source: source.to_owned(),
441 source: source.to_owned(),
440 }
442 }
441 }
443 }
442 }
444 }
443
445
444 pub type PatternResult<T> = Result<T, PatternError>;
446 pub type PatternResult<T> = Result<T, PatternError>;
445
447
446 /// Wrapper for `read_pattern_file` that also recursively expands `include:`
448 /// Wrapper for `read_pattern_file` that also recursively expands `include:`
447 /// and `subinclude:` patterns.
449 /// and `subinclude:` patterns.
448 ///
450 ///
449 /// The former are expanded in place, while `PatternSyntax::ExpandedSubInclude`
451 /// The former are expanded in place, while `PatternSyntax::ExpandedSubInclude`
450 /// is used for the latter to form a tree of patterns.
452 /// is used for the latter to form a tree of patterns.
451 pub fn get_patterns_from_file(
453 pub fn get_patterns_from_file(
452 pattern_file: &Path,
454 pattern_file: &Path,
453 root_dir: &Path,
455 root_dir: &Path,
454 inspect_pattern_bytes: &mut impl FnMut(&[u8]),
456 inspect_pattern_bytes: &mut impl FnMut(&[u8]),
455 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
457 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
456 let (patterns, mut warnings) =
458 let (patterns, mut warnings) =
457 read_pattern_file(pattern_file, true, inspect_pattern_bytes)?;
459 read_pattern_file(pattern_file, true, inspect_pattern_bytes)?;
458 let patterns = patterns
460 let patterns = patterns
459 .into_iter()
461 .into_iter()
460 .flat_map(|entry| -> PatternResult<_> {
462 .flat_map(|entry| -> PatternResult<_> {
461 Ok(match &entry.syntax {
463 Ok(match &entry.syntax {
462 PatternSyntax::Include => {
464 PatternSyntax::Include => {
463 let inner_include =
465 let inner_include =
464 root_dir.join(get_path_from_bytes(&entry.pattern));
466 root_dir.join(get_path_from_bytes(&entry.pattern));
465 let (inner_pats, inner_warnings) = get_patterns_from_file(
467 let (inner_pats, inner_warnings) = get_patterns_from_file(
466 &inner_include,
468 &inner_include,
467 root_dir,
469 root_dir,
468 inspect_pattern_bytes,
470 inspect_pattern_bytes,
469 )?;
471 )?;
470 warnings.extend(inner_warnings);
472 warnings.extend(inner_warnings);
471 inner_pats
473 inner_pats
472 }
474 }
473 PatternSyntax::SubInclude => {
475 PatternSyntax::SubInclude => {
474 let mut sub_include = SubInclude::new(
476 let mut sub_include = SubInclude::new(
475 &root_dir,
477 &root_dir,
476 &entry.pattern,
478 &entry.pattern,
477 &entry.source,
479 &entry.source,
478 )?;
480 )?;
479 let (inner_patterns, inner_warnings) =
481 let (inner_patterns, inner_warnings) =
480 get_patterns_from_file(
482 get_patterns_from_file(
481 &sub_include.path,
483 &sub_include.path,
482 &sub_include.root,
484 &sub_include.root,
483 inspect_pattern_bytes,
485 inspect_pattern_bytes,
484 )?;
486 )?;
485 sub_include.included_patterns = inner_patterns;
487 sub_include.included_patterns = inner_patterns;
486 warnings.extend(inner_warnings);
488 warnings.extend(inner_warnings);
487 vec![IgnorePattern {
489 vec![IgnorePattern {
488 syntax: PatternSyntax::ExpandedSubInclude(Box::new(
490 syntax: PatternSyntax::ExpandedSubInclude(Box::new(
489 sub_include,
491 sub_include,
490 )),
492 )),
491 ..entry
493 ..entry
492 }]
494 }]
493 }
495 }
494 _ => vec![entry],
496 _ => vec![entry],
495 })
497 })
496 })
498 })
497 .flatten()
499 .flatten()
498 .collect();
500 .collect();
499
501
500 Ok((patterns, warnings))
502 Ok((patterns, warnings))
501 }
503 }
502
504
503 /// Holds all the information needed to handle a `subinclude:` pattern.
505 /// Holds all the information needed to handle a `subinclude:` pattern.
504 #[derive(Debug, PartialEq, Eq, Clone)]
506 #[derive(Debug, PartialEq, Eq, Clone)]
505 pub struct SubInclude {
507 pub struct SubInclude {
506 /// Will be used for repository (hg) paths that start with this prefix.
508 /// Will be used for repository (hg) paths that start with this prefix.
507 /// It is relative to the current working directory, so comparing against
509 /// It is relative to the current working directory, so comparing against
508 /// repository paths is painless.
510 /// repository paths is painless.
509 pub prefix: HgPathBuf,
511 pub prefix: HgPathBuf,
510 /// The file itself, containing the patterns
512 /// The file itself, containing the patterns
511 pub path: PathBuf,
513 pub path: PathBuf,
512 /// Folder in the filesystem where this it applies
514 /// Folder in the filesystem where this it applies
513 pub root: PathBuf,
515 pub root: PathBuf,
514
516
515 pub included_patterns: Vec<IgnorePattern>,
517 pub included_patterns: Vec<IgnorePattern>,
516 }
518 }
517
519
518 impl SubInclude {
520 impl SubInclude {
519 pub fn new(
521 pub fn new(
520 root_dir: &Path,
522 root_dir: &Path,
521 pattern: &[u8],
523 pattern: &[u8],
522 source: &Path,
524 source: &Path,
523 ) -> Result<SubInclude, HgPathError> {
525 ) -> Result<SubInclude, HgPathError> {
524 let normalized_source =
526 let normalized_source =
525 normalize_path_bytes(&get_bytes_from_path(source));
527 normalize_path_bytes(&get_bytes_from_path(source));
526
528
527 let source_root = get_path_from_bytes(&normalized_source);
529 let source_root = get_path_from_bytes(&normalized_source);
528 let source_root =
530 let source_root =
529 source_root.parent().unwrap_or_else(|| source_root.deref());
531 source_root.parent().unwrap_or_else(|| source_root.deref());
530
532
531 let path = source_root.join(get_path_from_bytes(pattern));
533 let path = source_root.join(get_path_from_bytes(pattern));
532 let new_root = path.parent().unwrap_or_else(|| path.deref());
534 let new_root = path.parent().unwrap_or_else(|| path.deref());
533
535
534 let prefix = canonical_path(root_dir, root_dir, new_root)?;
536 let prefix = canonical_path(root_dir, root_dir, new_root)?;
535
537
536 Ok(Self {
538 Ok(Self {
537 prefix: path_to_hg_path_buf(prefix).and_then(|mut p| {
539 prefix: path_to_hg_path_buf(prefix).and_then(|mut p| {
538 if !p.is_empty() {
540 if !p.is_empty() {
539 p.push_byte(b'/');
541 p.push_byte(b'/');
540 }
542 }
541 Ok(p)
543 Ok(p)
542 })?,
544 })?,
543 path: path.to_owned(),
545 path: path.to_owned(),
544 root: new_root.to_owned(),
546 root: new_root.to_owned(),
545 included_patterns: Vec::new(),
547 included_patterns: Vec::new(),
546 })
548 })
547 }
549 }
548 }
550 }
549
551
550 /// Separate and pre-process subincludes from other patterns for the "ignore"
552 /// Separate and pre-process subincludes from other patterns for the "ignore"
551 /// phase.
553 /// phase.
552 pub fn filter_subincludes(
554 pub fn filter_subincludes(
553 ignore_patterns: Vec<IgnorePattern>,
555 ignore_patterns: Vec<IgnorePattern>,
554 ) -> Result<(Vec<Box<SubInclude>>, Vec<IgnorePattern>), HgPathError> {
556 ) -> Result<(Vec<Box<SubInclude>>, Vec<IgnorePattern>), HgPathError> {
555 let mut subincludes = vec![];
557 let mut subincludes = vec![];
556 let mut others = vec![];
558 let mut others = vec![];
557
559
558 for pattern in ignore_patterns {
560 for pattern in ignore_patterns {
559 if let PatternSyntax::ExpandedSubInclude(sub_include) = pattern.syntax
561 if let PatternSyntax::ExpandedSubInclude(sub_include) = pattern.syntax
560 {
562 {
561 subincludes.push(sub_include);
563 subincludes.push(sub_include);
562 } else {
564 } else {
563 others.push(pattern)
565 others.push(pattern)
564 }
566 }
565 }
567 }
566 Ok((subincludes, others))
568 Ok((subincludes, others))
567 }
569 }
568
570
569 #[cfg(test)]
571 #[cfg(test)]
570 mod tests {
572 mod tests {
571 use super::*;
573 use super::*;
572 use pretty_assertions::assert_eq;
574 use pretty_assertions::assert_eq;
573
575
574 #[test]
576 #[test]
575 fn escape_pattern_test() {
577 fn escape_pattern_test() {
576 let untouched =
578 let untouched =
577 br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
579 br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
578 assert_eq!(escape_pattern(untouched), untouched.to_vec());
580 assert_eq!(escape_pattern(untouched), untouched.to_vec());
579 // All escape codes
581 // All escape codes
580 assert_eq!(
582 assert_eq!(
581 escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
583 escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
582 br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
584 br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
583 .to_vec()
585 .to_vec()
584 );
586 );
585 }
587 }
586
588
587 #[test]
589 #[test]
588 fn glob_test() {
590 fn glob_test() {
589 assert_eq!(glob_to_re(br#"?"#), br#"."#);
591 assert_eq!(glob_to_re(br#"?"#), br#"."#);
590 assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
592 assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
591 assert_eq!(glob_to_re(br#"**"#), br#".*"#);
593 assert_eq!(glob_to_re(br#"**"#), br#".*"#);
592 assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
594 assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
593 assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
595 assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
594 assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
596 assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
595 assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
597 assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
596 assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
598 assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
597 }
599 }
598
600
599 #[test]
601 #[test]
600 fn test_parse_pattern_file_contents() {
602 fn test_parse_pattern_file_contents() {
601 let lines = b"syntax: glob\n*.elc";
603 let lines = b"syntax: glob\n*.elc";
602
604
603 assert_eq!(
605 assert_eq!(
604 parse_pattern_file_contents(lines, Path::new("file_path"), false)
606 parse_pattern_file_contents(
605 .unwrap()
607 lines,
606 .0,
608 Path::new("file_path"),
609 None,
610 false
611 )
612 .unwrap()
613 .0,
607 vec![IgnorePattern::new(
614 vec![IgnorePattern::new(
608 PatternSyntax::RelGlob,
615 PatternSyntax::RelGlob,
609 b"*.elc",
616 b"*.elc",
610 Path::new("file_path")
617 Path::new("file_path")
611 )],
618 )],
612 );
619 );
613
620
614 let lines = b"syntax: include\nsyntax: glob";
621 let lines = b"syntax: include\nsyntax: glob";
615
622
616 assert_eq!(
623 assert_eq!(
617 parse_pattern_file_contents(lines, Path::new("file_path"), false)
624 parse_pattern_file_contents(
618 .unwrap()
625 lines,
619 .0,
626 Path::new("file_path"),
627 None,
628 false
629 )
630 .unwrap()
631 .0,
620 vec![]
632 vec![]
621 );
633 );
622 let lines = b"glob:**.o";
634 let lines = b"glob:**.o";
623 assert_eq!(
635 assert_eq!(
624 parse_pattern_file_contents(lines, Path::new("file_path"), false)
636 parse_pattern_file_contents(
625 .unwrap()
637 lines,
626 .0,
638 Path::new("file_path"),
639 None,
640 false
641 )
642 .unwrap()
643 .0,
627 vec![IgnorePattern::new(
644 vec![IgnorePattern::new(
628 PatternSyntax::RelGlob,
645 PatternSyntax::RelGlob,
629 b"**.o",
646 b"**.o",
630 Path::new("file_path")
647 Path::new("file_path")
631 )]
648 )]
632 );
649 );
633 }
650 }
634
651
635 #[test]
652 #[test]
636 fn test_build_single_regex() {
653 fn test_build_single_regex() {
637 assert_eq!(
654 assert_eq!(
638 build_single_regex(&IgnorePattern::new(
655 build_single_regex(&IgnorePattern::new(
639 PatternSyntax::RelGlob,
656 PatternSyntax::RelGlob,
640 b"rust/target/",
657 b"rust/target/",
641 Path::new("")
658 Path::new("")
642 ))
659 ))
643 .unwrap(),
660 .unwrap(),
644 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
661 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
645 );
662 );
646 assert_eq!(
663 assert_eq!(
647 build_single_regex(&IgnorePattern::new(
664 build_single_regex(&IgnorePattern::new(
648 PatternSyntax::Regexp,
665 PatternSyntax::Regexp,
649 br"rust/target/\d+",
666 br"rust/target/\d+",
650 Path::new("")
667 Path::new("")
651 ))
668 ))
652 .unwrap(),
669 .unwrap(),
653 Some(br"rust/target/\d+".to_vec()),
670 Some(br"rust/target/\d+".to_vec()),
654 );
671 );
655 }
672 }
656
673
657 #[test]
674 #[test]
658 fn test_build_single_regex_shortcut() {
675 fn test_build_single_regex_shortcut() {
659 assert_eq!(
676 assert_eq!(
660 build_single_regex(&IgnorePattern::new(
677 build_single_regex(&IgnorePattern::new(
661 PatternSyntax::RootGlob,
678 PatternSyntax::RootGlob,
662 b"",
679 b"",
663 Path::new("")
680 Path::new("")
664 ))
681 ))
665 .unwrap(),
682 .unwrap(),
666 None,
683 None,
667 );
684 );
668 assert_eq!(
685 assert_eq!(
669 build_single_regex(&IgnorePattern::new(
686 build_single_regex(&IgnorePattern::new(
670 PatternSyntax::RootGlob,
687 PatternSyntax::RootGlob,
671 b"whatever",
688 b"whatever",
672 Path::new("")
689 Path::new("")
673 ))
690 ))
674 .unwrap(),
691 .unwrap(),
675 None,
692 None,
676 );
693 );
677 assert_eq!(
694 assert_eq!(
678 build_single_regex(&IgnorePattern::new(
695 build_single_regex(&IgnorePattern::new(
679 PatternSyntax::RootGlob,
696 PatternSyntax::RootGlob,
680 b"*.o",
697 b"*.o",
681 Path::new("")
698 Path::new("")
682 ))
699 ))
683 .unwrap(),
700 .unwrap(),
684 Some(br"[^/]*\.o(?:/|$)".to_vec()),
701 Some(br"[^/]*\.o(?:/|$)".to_vec()),
685 );
702 );
686 }
703 }
687 }
704 }
General Comments 0
You need to be logged in to leave comments. Login now