##// END OF EJS Templates
rust-regex: add test for verbatim regex syntax...
Raphaël Gomès -
r45344:2dd60a9f default draft
parent child Browse files
Show More
@@ -1,660 +1,669 b''
1 // filepatterns.rs
1 // filepatterns.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Handling of Mercurial-specific patterns.
8 //! Handling of Mercurial-specific patterns.
9
9
10 use crate::{
10 use crate::{
11 utils::{
11 utils::{
12 files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
12 files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
13 hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
13 hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
14 SliceExt,
14 SliceExt,
15 },
15 },
16 FastHashMap, PatternError,
16 FastHashMap, PatternError,
17 };
17 };
18 use lazy_static::lazy_static;
18 use lazy_static::lazy_static;
19 use regex::bytes::{NoExpand, Regex};
19 use regex::bytes::{NoExpand, Regex};
20 use std::fs::File;
20 use std::fs::File;
21 use std::io::Read;
21 use std::io::Read;
22 use std::ops::Deref;
22 use std::ops::Deref;
23 use std::path::{Path, PathBuf};
23 use std::path::{Path, PathBuf};
24 use std::vec::Vec;
24 use std::vec::Vec;
25
25
26 lazy_static! {
26 lazy_static! {
27 static ref RE_ESCAPE: Vec<Vec<u8>> = {
27 static ref RE_ESCAPE: Vec<Vec<u8>> = {
28 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
28 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
29 let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
29 let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
30 for byte in to_escape {
30 for byte in to_escape {
31 v[*byte as usize].insert(0, b'\\');
31 v[*byte as usize].insert(0, b'\\');
32 }
32 }
33 v
33 v
34 };
34 };
35 }
35 }
36
36
37 /// These are matched in order
37 /// These are matched in order
38 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
38 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
39 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
39 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
40
40
41 /// Appended to the regexp of globs
41 /// Appended to the regexp of globs
42 const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";
42 const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";
43
43
44 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
44 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
45 pub enum PatternSyntax {
45 pub enum PatternSyntax {
46 /// A regular expression
46 /// A regular expression
47 Regexp,
47 Regexp,
48 /// Glob that matches at the front of the path
48 /// Glob that matches at the front of the path
49 RootGlob,
49 RootGlob,
50 /// Glob that matches at any suffix of the path (still anchored at
50 /// Glob that matches at any suffix of the path (still anchored at
51 /// slashes)
51 /// slashes)
52 Glob,
52 Glob,
53 /// a path relative to repository root, which is matched recursively
53 /// a path relative to repository root, which is matched recursively
54 Path,
54 Path,
55 /// A path relative to cwd
55 /// A path relative to cwd
56 RelPath,
56 RelPath,
57 /// an unrooted glob (*.rs matches Rust files in all dirs)
57 /// an unrooted glob (*.rs matches Rust files in all dirs)
58 RelGlob,
58 RelGlob,
59 /// A regexp that needn't match the start of a name
59 /// A regexp that needn't match the start of a name
60 RelRegexp,
60 RelRegexp,
61 /// A path relative to repository root, which is matched non-recursively
61 /// A path relative to repository root, which is matched non-recursively
62 /// (will not match subdirectories)
62 /// (will not match subdirectories)
63 RootFiles,
63 RootFiles,
64 /// A file of patterns to read and include
64 /// A file of patterns to read and include
65 Include,
65 Include,
66 /// A file of patterns to match against files under the same directory
66 /// A file of patterns to match against files under the same directory
67 SubInclude,
67 SubInclude,
68 }
68 }
69
69
70 /// Transforms a glob pattern into a regex
70 /// Transforms a glob pattern into a regex
71 fn glob_to_re(pat: &[u8]) -> Vec<u8> {
71 fn glob_to_re(pat: &[u8]) -> Vec<u8> {
72 let mut input = pat;
72 let mut input = pat;
73 let mut res: Vec<u8> = vec![];
73 let mut res: Vec<u8> = vec![];
74 let mut group_depth = 0;
74 let mut group_depth = 0;
75
75
76 while let Some((c, rest)) = input.split_first() {
76 while let Some((c, rest)) = input.split_first() {
77 input = rest;
77 input = rest;
78
78
79 match c {
79 match c {
80 b'*' => {
80 b'*' => {
81 for (source, repl) in GLOB_REPLACEMENTS {
81 for (source, repl) in GLOB_REPLACEMENTS {
82 if let Some(rest) = input.drop_prefix(source) {
82 if let Some(rest) = input.drop_prefix(source) {
83 input = rest;
83 input = rest;
84 res.extend(*repl);
84 res.extend(*repl);
85 break;
85 break;
86 }
86 }
87 }
87 }
88 }
88 }
89 b'?' => res.extend(b"."),
89 b'?' => res.extend(b"."),
90 b'[' => {
90 b'[' => {
91 match input.iter().skip(1).position(|b| *b == b']') {
91 match input.iter().skip(1).position(|b| *b == b']') {
92 None => res.extend(b"\\["),
92 None => res.extend(b"\\["),
93 Some(end) => {
93 Some(end) => {
94 // Account for the one we skipped
94 // Account for the one we skipped
95 let end = end + 1;
95 let end = end + 1;
96
96
97 res.extend(b"[");
97 res.extend(b"[");
98
98
99 for (i, b) in input[..end].iter().enumerate() {
99 for (i, b) in input[..end].iter().enumerate() {
100 if *b == b'!' && i == 0 {
100 if *b == b'!' && i == 0 {
101 res.extend(b"^")
101 res.extend(b"^")
102 } else if *b == b'^' && i == 0 {
102 } else if *b == b'^' && i == 0 {
103 res.extend(b"\\^")
103 res.extend(b"\\^")
104 } else if *b == b'\\' {
104 } else if *b == b'\\' {
105 res.extend(b"\\\\")
105 res.extend(b"\\\\")
106 } else {
106 } else {
107 res.push(*b)
107 res.push(*b)
108 }
108 }
109 }
109 }
110 res.extend(b"]");
110 res.extend(b"]");
111 input = &input[end + 1..];
111 input = &input[end + 1..];
112 }
112 }
113 }
113 }
114 }
114 }
115 b'{' => {
115 b'{' => {
116 group_depth += 1;
116 group_depth += 1;
117 res.extend(b"(?:")
117 res.extend(b"(?:")
118 }
118 }
119 b'}' if group_depth > 0 => {
119 b'}' if group_depth > 0 => {
120 group_depth -= 1;
120 group_depth -= 1;
121 res.extend(b")");
121 res.extend(b")");
122 }
122 }
123 b',' if group_depth > 0 => res.extend(b"|"),
123 b',' if group_depth > 0 => res.extend(b"|"),
124 b'\\' => {
124 b'\\' => {
125 let c = {
125 let c = {
126 if let Some((c, rest)) = input.split_first() {
126 if let Some((c, rest)) = input.split_first() {
127 input = rest;
127 input = rest;
128 c
128 c
129 } else {
129 } else {
130 c
130 c
131 }
131 }
132 };
132 };
133 res.extend(&RE_ESCAPE[*c as usize])
133 res.extend(&RE_ESCAPE[*c as usize])
134 }
134 }
135 _ => res.extend(&RE_ESCAPE[*c as usize]),
135 _ => res.extend(&RE_ESCAPE[*c as usize]),
136 }
136 }
137 }
137 }
138 res
138 res
139 }
139 }
140
140
141 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
141 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
142 pattern
142 pattern
143 .iter()
143 .iter()
144 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
144 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
145 .collect()
145 .collect()
146 }
146 }
147
147
148 pub fn parse_pattern_syntax(
148 pub fn parse_pattern_syntax(
149 kind: &[u8],
149 kind: &[u8],
150 ) -> Result<PatternSyntax, PatternError> {
150 ) -> Result<PatternSyntax, PatternError> {
151 match kind {
151 match kind {
152 b"re:" => Ok(PatternSyntax::Regexp),
152 b"re:" => Ok(PatternSyntax::Regexp),
153 b"path:" => Ok(PatternSyntax::Path),
153 b"path:" => Ok(PatternSyntax::Path),
154 b"relpath:" => Ok(PatternSyntax::RelPath),
154 b"relpath:" => Ok(PatternSyntax::RelPath),
155 b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
155 b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
156 b"relglob:" => Ok(PatternSyntax::RelGlob),
156 b"relglob:" => Ok(PatternSyntax::RelGlob),
157 b"relre:" => Ok(PatternSyntax::RelRegexp),
157 b"relre:" => Ok(PatternSyntax::RelRegexp),
158 b"glob:" => Ok(PatternSyntax::Glob),
158 b"glob:" => Ok(PatternSyntax::Glob),
159 b"rootglob:" => Ok(PatternSyntax::RootGlob),
159 b"rootglob:" => Ok(PatternSyntax::RootGlob),
160 b"include:" => Ok(PatternSyntax::Include),
160 b"include:" => Ok(PatternSyntax::Include),
161 b"subinclude:" => Ok(PatternSyntax::SubInclude),
161 b"subinclude:" => Ok(PatternSyntax::SubInclude),
162 _ => Err(PatternError::UnsupportedSyntax(
162 _ => Err(PatternError::UnsupportedSyntax(
163 String::from_utf8_lossy(kind).to_string(),
163 String::from_utf8_lossy(kind).to_string(),
164 )),
164 )),
165 }
165 }
166 }
166 }
167
167
168 /// Builds the regex that corresponds to the given pattern.
168 /// Builds the regex that corresponds to the given pattern.
169 /// If within a `syntax: regexp` context, returns the pattern,
169 /// If within a `syntax: regexp` context, returns the pattern,
170 /// otherwise, returns the corresponding regex.
170 /// otherwise, returns the corresponding regex.
171 fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> {
171 fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> {
172 let IgnorePattern {
172 let IgnorePattern {
173 syntax, pattern, ..
173 syntax, pattern, ..
174 } = entry;
174 } = entry;
175 if pattern.is_empty() {
175 if pattern.is_empty() {
176 return vec![];
176 return vec![];
177 }
177 }
178 match syntax {
178 match syntax {
179 PatternSyntax::Regexp => pattern.to_owned(),
179 PatternSyntax::Regexp => pattern.to_owned(),
180 PatternSyntax::RelRegexp => {
180 PatternSyntax::RelRegexp => {
181 // The `regex` crate accepts `**` while `re2` and Python's `re`
181 // The `regex` crate accepts `**` while `re2` and Python's `re`
182 // do not. Checking for `*` correctly triggers the same error all
182 // do not. Checking for `*` correctly triggers the same error all
183 // engines.
183 // engines.
184 if pattern[0] == b'^'
184 if pattern[0] == b'^'
185 || pattern[0] == b'*'
185 || pattern[0] == b'*'
186 || pattern.starts_with(b".*")
186 || pattern.starts_with(b".*")
187 {
187 {
188 return pattern.to_owned();
188 return pattern.to_owned();
189 }
189 }
190 [&b".*"[..], pattern].concat()
190 [&b".*"[..], pattern].concat()
191 }
191 }
192 PatternSyntax::Path | PatternSyntax::RelPath => {
192 PatternSyntax::Path | PatternSyntax::RelPath => {
193 if pattern == b"." {
193 if pattern == b"." {
194 return vec![];
194 return vec![];
195 }
195 }
196 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
196 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
197 }
197 }
198 PatternSyntax::RootFiles => {
198 PatternSyntax::RootFiles => {
199 let mut res = if pattern == b"." {
199 let mut res = if pattern == b"." {
200 vec![]
200 vec![]
201 } else {
201 } else {
202 // Pattern is a directory name.
202 // Pattern is a directory name.
203 [escape_pattern(pattern).as_slice(), b"/"].concat()
203 [escape_pattern(pattern).as_slice(), b"/"].concat()
204 };
204 };
205
205
206 // Anything after the pattern must be a non-directory.
206 // Anything after the pattern must be a non-directory.
207 res.extend(b"[^/]+$");
207 res.extend(b"[^/]+$");
208 res
208 res
209 }
209 }
210 PatternSyntax::RelGlob => {
210 PatternSyntax::RelGlob => {
211 let glob_re = glob_to_re(pattern);
211 let glob_re = glob_to_re(pattern);
212 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
212 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
213 [b".*", rest, GLOB_SUFFIX].concat()
213 [b".*", rest, GLOB_SUFFIX].concat()
214 } else {
214 } else {
215 [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat()
215 [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat()
216 }
216 }
217 }
217 }
218 PatternSyntax::Glob | PatternSyntax::RootGlob => {
218 PatternSyntax::Glob | PatternSyntax::RootGlob => {
219 [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
219 [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
220 }
220 }
221 PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(),
221 PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(),
222 }
222 }
223 }
223 }
224
224
225 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
225 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
226 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
226 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
227
227
228 /// TODO support other platforms
228 /// TODO support other platforms
229 #[cfg(unix)]
229 #[cfg(unix)]
230 pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
230 pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
231 if bytes.is_empty() {
231 if bytes.is_empty() {
232 return b".".to_vec();
232 return b".".to_vec();
233 }
233 }
234 let sep = b'/';
234 let sep = b'/';
235
235
236 let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
236 let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
237 if initial_slashes > 2 {
237 if initial_slashes > 2 {
238 // POSIX allows one or two initial slashes, but treats three or more
238 // POSIX allows one or two initial slashes, but treats three or more
239 // as single slash.
239 // as single slash.
240 initial_slashes = 1;
240 initial_slashes = 1;
241 }
241 }
242 let components = bytes
242 let components = bytes
243 .split(|b| *b == sep)
243 .split(|b| *b == sep)
244 .filter(|c| !(c.is_empty() || c == b"."))
244 .filter(|c| !(c.is_empty() || c == b"."))
245 .fold(vec![], |mut acc, component| {
245 .fold(vec![], |mut acc, component| {
246 if component != b".."
246 if component != b".."
247 || (initial_slashes == 0 && acc.is_empty())
247 || (initial_slashes == 0 && acc.is_empty())
248 || (!acc.is_empty() && acc[acc.len() - 1] == b"..")
248 || (!acc.is_empty() && acc[acc.len() - 1] == b"..")
249 {
249 {
250 acc.push(component)
250 acc.push(component)
251 } else if !acc.is_empty() {
251 } else if !acc.is_empty() {
252 acc.pop();
252 acc.pop();
253 }
253 }
254 acc
254 acc
255 });
255 });
256 let mut new_bytes = components.join(&sep);
256 let mut new_bytes = components.join(&sep);
257
257
258 if initial_slashes > 0 {
258 if initial_slashes > 0 {
259 let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
259 let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
260 buf.extend(new_bytes);
260 buf.extend(new_bytes);
261 new_bytes = buf;
261 new_bytes = buf;
262 }
262 }
263 if new_bytes.is_empty() {
263 if new_bytes.is_empty() {
264 b".".to_vec()
264 b".".to_vec()
265 } else {
265 } else {
266 new_bytes
266 new_bytes
267 }
267 }
268 }
268 }
269
269
270 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
270 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
271 /// that don't need to be transformed into a regex.
271 /// that don't need to be transformed into a regex.
272 pub fn build_single_regex(
272 pub fn build_single_regex(
273 entry: &IgnorePattern,
273 entry: &IgnorePattern,
274 ) -> Result<Option<Vec<u8>>, PatternError> {
274 ) -> Result<Option<Vec<u8>>, PatternError> {
275 let IgnorePattern {
275 let IgnorePattern {
276 pattern, syntax, ..
276 pattern, syntax, ..
277 } = entry;
277 } = entry;
278 let pattern = match syntax {
278 let pattern = match syntax {
279 PatternSyntax::RootGlob
279 PatternSyntax::RootGlob
280 | PatternSyntax::Path
280 | PatternSyntax::Path
281 | PatternSyntax::RelGlob
281 | PatternSyntax::RelGlob
282 | PatternSyntax::RootFiles => normalize_path_bytes(&pattern),
282 | PatternSyntax::RootFiles => normalize_path_bytes(&pattern),
283 PatternSyntax::Include | PatternSyntax::SubInclude => {
283 PatternSyntax::Include | PatternSyntax::SubInclude => {
284 return Err(PatternError::NonRegexPattern(entry.clone()))
284 return Err(PatternError::NonRegexPattern(entry.clone()))
285 }
285 }
286 _ => pattern.to_owned(),
286 _ => pattern.to_owned(),
287 };
287 };
288 if *syntax == PatternSyntax::RootGlob
288 if *syntax == PatternSyntax::RootGlob
289 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
289 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
290 {
290 {
291 Ok(None)
291 Ok(None)
292 } else {
292 } else {
293 let mut entry = entry.clone();
293 let mut entry = entry.clone();
294 entry.pattern = pattern;
294 entry.pattern = pattern;
295 Ok(Some(_build_single_regex(&entry)))
295 Ok(Some(_build_single_regex(&entry)))
296 }
296 }
297 }
297 }
298
298
299 lazy_static! {
299 lazy_static! {
300 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
300 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
301 let mut m = FastHashMap::default();
301 let mut m = FastHashMap::default();
302
302
303 m.insert(b"re".as_ref(), b"relre:".as_ref());
303 m.insert(b"re".as_ref(), b"relre:".as_ref());
304 m.insert(b"regexp".as_ref(), b"relre:".as_ref());
304 m.insert(b"regexp".as_ref(), b"relre:".as_ref());
305 m.insert(b"glob".as_ref(), b"relglob:".as_ref());
305 m.insert(b"glob".as_ref(), b"relglob:".as_ref());
306 m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
306 m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
307 m.insert(b"include".as_ref(), b"include:".as_ref());
307 m.insert(b"include".as_ref(), b"include:".as_ref());
308 m.insert(b"subinclude".as_ref(), b"subinclude:".as_ref());
308 m.insert(b"subinclude".as_ref(), b"subinclude:".as_ref());
309 m
309 m
310 };
310 };
311 }
311 }
312
312
313 #[derive(Debug)]
313 #[derive(Debug)]
314 pub enum PatternFileWarning {
314 pub enum PatternFileWarning {
315 /// (file path, syntax bytes)
315 /// (file path, syntax bytes)
316 InvalidSyntax(PathBuf, Vec<u8>),
316 InvalidSyntax(PathBuf, Vec<u8>),
317 /// File path
317 /// File path
318 NoSuchFile(PathBuf),
318 NoSuchFile(PathBuf),
319 }
319 }
320
320
321 pub fn parse_pattern_file_contents<P: AsRef<Path>>(
321 pub fn parse_pattern_file_contents<P: AsRef<Path>>(
322 lines: &[u8],
322 lines: &[u8],
323 file_path: P,
323 file_path: P,
324 warn: bool,
324 warn: bool,
325 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
325 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
326 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
326 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
327 let comment_escape_regex = Regex::new(r"\\#").unwrap();
327 let comment_escape_regex = Regex::new(r"\\#").unwrap();
328 let mut inputs: Vec<IgnorePattern> = vec![];
328 let mut inputs: Vec<IgnorePattern> = vec![];
329 let mut warnings: Vec<PatternFileWarning> = vec![];
329 let mut warnings: Vec<PatternFileWarning> = vec![];
330
330
331 let mut current_syntax = b"relre:".as_ref();
331 let mut current_syntax = b"relre:".as_ref();
332
332
333 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
333 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
334 let line_number = line_number + 1;
334 let line_number = line_number + 1;
335
335
336 let line_buf;
336 let line_buf;
337 if line.contains(&b'#') {
337 if line.contains(&b'#') {
338 if let Some(cap) = comment_regex.captures(line) {
338 if let Some(cap) = comment_regex.captures(line) {
339 line = &line[..cap.get(1).unwrap().end()]
339 line = &line[..cap.get(1).unwrap().end()]
340 }
340 }
341 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
341 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
342 line = &line_buf;
342 line = &line_buf;
343 }
343 }
344
344
345 let mut line = line.trim_end();
345 let mut line = line.trim_end();
346
346
347 if line.is_empty() {
347 if line.is_empty() {
348 continue;
348 continue;
349 }
349 }
350
350
351 if let Some(syntax) = line.drop_prefix(b"syntax:") {
351 if let Some(syntax) = line.drop_prefix(b"syntax:") {
352 let syntax = syntax.trim();
352 let syntax = syntax.trim();
353
353
354 if let Some(rel_syntax) = SYNTAXES.get(syntax) {
354 if let Some(rel_syntax) = SYNTAXES.get(syntax) {
355 current_syntax = rel_syntax;
355 current_syntax = rel_syntax;
356 } else if warn {
356 } else if warn {
357 warnings.push(PatternFileWarning::InvalidSyntax(
357 warnings.push(PatternFileWarning::InvalidSyntax(
358 file_path.as_ref().to_owned(),
358 file_path.as_ref().to_owned(),
359 syntax.to_owned(),
359 syntax.to_owned(),
360 ));
360 ));
361 }
361 }
362 continue;
362 continue;
363 }
363 }
364
364
365 let mut line_syntax: &[u8] = &current_syntax;
365 let mut line_syntax: &[u8] = &current_syntax;
366
366
367 for (s, rels) in SYNTAXES.iter() {
367 for (s, rels) in SYNTAXES.iter() {
368 if let Some(rest) = line.drop_prefix(rels) {
368 if let Some(rest) = line.drop_prefix(rels) {
369 line_syntax = rels;
369 line_syntax = rels;
370 line = rest;
370 line = rest;
371 break;
371 break;
372 }
372 }
373 if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) {
373 if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) {
374 line_syntax = rels;
374 line_syntax = rels;
375 line = rest;
375 line = rest;
376 break;
376 break;
377 }
377 }
378 }
378 }
379
379
380 inputs.push(IgnorePattern::new(
380 inputs.push(IgnorePattern::new(
381 parse_pattern_syntax(&line_syntax).map_err(|e| match e {
381 parse_pattern_syntax(&line_syntax).map_err(|e| match e {
382 PatternError::UnsupportedSyntax(syntax) => {
382 PatternError::UnsupportedSyntax(syntax) => {
383 PatternError::UnsupportedSyntaxInFile(
383 PatternError::UnsupportedSyntaxInFile(
384 syntax,
384 syntax,
385 file_path.as_ref().to_string_lossy().into(),
385 file_path.as_ref().to_string_lossy().into(),
386 line_number,
386 line_number,
387 )
387 )
388 }
388 }
389 _ => e,
389 _ => e,
390 })?,
390 })?,
391 &line,
391 &line,
392 &file_path,
392 &file_path,
393 ));
393 ));
394 }
394 }
395 Ok((inputs, warnings))
395 Ok((inputs, warnings))
396 }
396 }
397
397
398 pub fn read_pattern_file<P: AsRef<Path>>(
398 pub fn read_pattern_file<P: AsRef<Path>>(
399 file_path: P,
399 file_path: P,
400 warn: bool,
400 warn: bool,
401 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
401 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
402 let mut f = match File::open(file_path.as_ref()) {
402 let mut f = match File::open(file_path.as_ref()) {
403 Ok(f) => Ok(f),
403 Ok(f) => Ok(f),
404 Err(e) => match e.kind() {
404 Err(e) => match e.kind() {
405 std::io::ErrorKind::NotFound => {
405 std::io::ErrorKind::NotFound => {
406 return Ok((
406 return Ok((
407 vec![],
407 vec![],
408 vec![PatternFileWarning::NoSuchFile(
408 vec![PatternFileWarning::NoSuchFile(
409 file_path.as_ref().to_owned(),
409 file_path.as_ref().to_owned(),
410 )],
410 )],
411 ))
411 ))
412 }
412 }
413 _ => Err(e),
413 _ => Err(e),
414 },
414 },
415 }?;
415 }?;
416 let mut contents = Vec::new();
416 let mut contents = Vec::new();
417
417
418 f.read_to_end(&mut contents)?;
418 f.read_to_end(&mut contents)?;
419
419
420 Ok(parse_pattern_file_contents(&contents, file_path, warn)?)
420 Ok(parse_pattern_file_contents(&contents, file_path, warn)?)
421 }
421 }
422
422
423 /// Represents an entry in an "ignore" file.
423 /// Represents an entry in an "ignore" file.
424 #[derive(Debug, Eq, PartialEq, Clone)]
424 #[derive(Debug, Eq, PartialEq, Clone)]
425 pub struct IgnorePattern {
425 pub struct IgnorePattern {
426 pub syntax: PatternSyntax,
426 pub syntax: PatternSyntax,
427 pub pattern: Vec<u8>,
427 pub pattern: Vec<u8>,
428 pub source: PathBuf,
428 pub source: PathBuf,
429 }
429 }
430
430
431 impl IgnorePattern {
431 impl IgnorePattern {
432 pub fn new(
432 pub fn new(
433 syntax: PatternSyntax,
433 syntax: PatternSyntax,
434 pattern: &[u8],
434 pattern: &[u8],
435 source: impl AsRef<Path>,
435 source: impl AsRef<Path>,
436 ) -> Self {
436 ) -> Self {
437 Self {
437 Self {
438 syntax,
438 syntax,
439 pattern: pattern.to_owned(),
439 pattern: pattern.to_owned(),
440 source: source.as_ref().to_owned(),
440 source: source.as_ref().to_owned(),
441 }
441 }
442 }
442 }
443 }
443 }
444
444
445 pub type PatternResult<T> = Result<T, PatternError>;
445 pub type PatternResult<T> = Result<T, PatternError>;
446
446
447 /// Wrapper for `read_pattern_file` that also recursively expands `include:`
447 /// Wrapper for `read_pattern_file` that also recursively expands `include:`
448 /// patterns.
448 /// patterns.
449 ///
449 ///
450 /// `subinclude:` is not treated as a special pattern here: unraveling them
450 /// `subinclude:` is not treated as a special pattern here: unraveling them
451 /// needs to occur in the "ignore" phase.
451 /// needs to occur in the "ignore" phase.
452 pub fn get_patterns_from_file(
452 pub fn get_patterns_from_file(
453 pattern_file: impl AsRef<Path>,
453 pattern_file: impl AsRef<Path>,
454 root_dir: impl AsRef<Path>,
454 root_dir: impl AsRef<Path>,
455 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
455 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
456 let (patterns, mut warnings) = read_pattern_file(&pattern_file, true)?;
456 let (patterns, mut warnings) = read_pattern_file(&pattern_file, true)?;
457 let patterns = patterns
457 let patterns = patterns
458 .into_iter()
458 .into_iter()
459 .flat_map(|entry| -> PatternResult<_> {
459 .flat_map(|entry| -> PatternResult<_> {
460 let IgnorePattern {
460 let IgnorePattern {
461 syntax,
461 syntax,
462 pattern,
462 pattern,
463 source: _,
463 source: _,
464 } = &entry;
464 } = &entry;
465 Ok(match syntax {
465 Ok(match syntax {
466 PatternSyntax::Include => {
466 PatternSyntax::Include => {
467 let inner_include =
467 let inner_include =
468 root_dir.as_ref().join(get_path_from_bytes(&pattern));
468 root_dir.as_ref().join(get_path_from_bytes(&pattern));
469 let (inner_pats, inner_warnings) = get_patterns_from_file(
469 let (inner_pats, inner_warnings) = get_patterns_from_file(
470 &inner_include,
470 &inner_include,
471 root_dir.as_ref(),
471 root_dir.as_ref(),
472 )?;
472 )?;
473 warnings.extend(inner_warnings);
473 warnings.extend(inner_warnings);
474 inner_pats
474 inner_pats
475 }
475 }
476 _ => vec![entry],
476 _ => vec![entry],
477 })
477 })
478 })
478 })
479 .flatten()
479 .flatten()
480 .collect();
480 .collect();
481
481
482 Ok((patterns, warnings))
482 Ok((patterns, warnings))
483 }
483 }
484
484
485 /// Holds all the information needed to handle a `subinclude:` pattern.
485 /// Holds all the information needed to handle a `subinclude:` pattern.
486 pub struct SubInclude {
486 pub struct SubInclude {
487 /// Will be used for repository (hg) paths that start with this prefix.
487 /// Will be used for repository (hg) paths that start with this prefix.
488 /// It is relative to the current working directory, so comparing against
488 /// It is relative to the current working directory, so comparing against
489 /// repository paths is painless.
489 /// repository paths is painless.
490 pub prefix: HgPathBuf,
490 pub prefix: HgPathBuf,
491 /// The file itself, containing the patterns
491 /// The file itself, containing the patterns
492 pub path: PathBuf,
492 pub path: PathBuf,
493 /// Folder in the filesystem where this it applies
493 /// Folder in the filesystem where this it applies
494 pub root: PathBuf,
494 pub root: PathBuf,
495 }
495 }
496
496
497 impl SubInclude {
497 impl SubInclude {
498 pub fn new(
498 pub fn new(
499 root_dir: impl AsRef<Path>,
499 root_dir: impl AsRef<Path>,
500 pattern: &[u8],
500 pattern: &[u8],
501 source: impl AsRef<Path>,
501 source: impl AsRef<Path>,
502 ) -> Result<SubInclude, HgPathError> {
502 ) -> Result<SubInclude, HgPathError> {
503 let normalized_source =
503 let normalized_source =
504 normalize_path_bytes(&get_bytes_from_path(source));
504 normalize_path_bytes(&get_bytes_from_path(source));
505
505
506 let source_root = get_path_from_bytes(&normalized_source);
506 let source_root = get_path_from_bytes(&normalized_source);
507 let source_root = source_root.parent().unwrap_or(source_root.deref());
507 let source_root = source_root.parent().unwrap_or(source_root.deref());
508
508
509 let path = source_root.join(get_path_from_bytes(pattern));
509 let path = source_root.join(get_path_from_bytes(pattern));
510 let new_root = path.parent().unwrap_or(path.deref());
510 let new_root = path.parent().unwrap_or(path.deref());
511
511
512 let prefix = canonical_path(&root_dir, &root_dir, new_root)?;
512 let prefix = canonical_path(&root_dir, &root_dir, new_root)?;
513
513
514 Ok(Self {
514 Ok(Self {
515 prefix: path_to_hg_path_buf(prefix).and_then(|mut p| {
515 prefix: path_to_hg_path_buf(prefix).and_then(|mut p| {
516 if !p.is_empty() {
516 if !p.is_empty() {
517 p.push(b'/');
517 p.push(b'/');
518 }
518 }
519 Ok(p)
519 Ok(p)
520 })?,
520 })?,
521 path: path.to_owned(),
521 path: path.to_owned(),
522 root: new_root.to_owned(),
522 root: new_root.to_owned(),
523 })
523 })
524 }
524 }
525 }
525 }
526
526
527 /// Separate and pre-process subincludes from other patterns for the "ignore"
527 /// Separate and pre-process subincludes from other patterns for the "ignore"
528 /// phase.
528 /// phase.
529 pub fn filter_subincludes(
529 pub fn filter_subincludes(
530 ignore_patterns: &[IgnorePattern],
530 ignore_patterns: &[IgnorePattern],
531 root_dir: impl AsRef<Path>,
531 root_dir: impl AsRef<Path>,
532 ) -> Result<(Vec<SubInclude>, Vec<&IgnorePattern>), HgPathError> {
532 ) -> Result<(Vec<SubInclude>, Vec<&IgnorePattern>), HgPathError> {
533 let mut subincludes = vec![];
533 let mut subincludes = vec![];
534 let mut others = vec![];
534 let mut others = vec![];
535
535
536 for ignore_pattern in ignore_patterns.iter() {
536 for ignore_pattern in ignore_patterns.iter() {
537 let IgnorePattern {
537 let IgnorePattern {
538 syntax,
538 syntax,
539 pattern,
539 pattern,
540 source,
540 source,
541 } = ignore_pattern;
541 } = ignore_pattern;
542 if *syntax == PatternSyntax::SubInclude {
542 if *syntax == PatternSyntax::SubInclude {
543 subincludes.push(SubInclude::new(&root_dir, pattern, &source)?);
543 subincludes.push(SubInclude::new(&root_dir, pattern, &source)?);
544 } else {
544 } else {
545 others.push(ignore_pattern)
545 others.push(ignore_pattern)
546 }
546 }
547 }
547 }
548 Ok((subincludes, others))
548 Ok((subincludes, others))
549 }
549 }
550
550
551 #[cfg(test)]
551 #[cfg(test)]
552 mod tests {
552 mod tests {
553 use super::*;
553 use super::*;
554 use pretty_assertions::assert_eq;
554 use pretty_assertions::assert_eq;
555
555
556 #[test]
556 #[test]
557 fn escape_pattern_test() {
557 fn escape_pattern_test() {
558 let untouched =
558 let untouched =
559 br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
559 br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
560 assert_eq!(escape_pattern(untouched), untouched.to_vec());
560 assert_eq!(escape_pattern(untouched), untouched.to_vec());
561 // All escape codes
561 // All escape codes
562 assert_eq!(
562 assert_eq!(
563 escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
563 escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
564 br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
564 br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
565 .to_vec()
565 .to_vec()
566 );
566 );
567 }
567 }
568
568
569 #[test]
569 #[test]
570 fn glob_test() {
570 fn glob_test() {
571 assert_eq!(glob_to_re(br#"?"#), br#"."#);
571 assert_eq!(glob_to_re(br#"?"#), br#"."#);
572 assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
572 assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
573 assert_eq!(glob_to_re(br#"**"#), br#".*"#);
573 assert_eq!(glob_to_re(br#"**"#), br#".*"#);
574 assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
574 assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
575 assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
575 assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
576 assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
576 assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
577 assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
577 assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
578 assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
578 assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
579 }
579 }
580
580
581 #[test]
581 #[test]
582 fn test_parse_pattern_file_contents() {
582 fn test_parse_pattern_file_contents() {
583 let lines = b"syntax: glob\n*.elc";
583 let lines = b"syntax: glob\n*.elc";
584
584
585 assert_eq!(
585 assert_eq!(
586 parse_pattern_file_contents(lines, Path::new("file_path"), false)
586 parse_pattern_file_contents(lines, Path::new("file_path"), false)
587 .unwrap()
587 .unwrap()
588 .0,
588 .0,
589 vec![IgnorePattern::new(
589 vec![IgnorePattern::new(
590 PatternSyntax::RelGlob,
590 PatternSyntax::RelGlob,
591 b"*.elc",
591 b"*.elc",
592 Path::new("file_path")
592 Path::new("file_path")
593 )],
593 )],
594 );
594 );
595
595
596 let lines = b"syntax: include\nsyntax: glob";
596 let lines = b"syntax: include\nsyntax: glob";
597
597
598 assert_eq!(
598 assert_eq!(
599 parse_pattern_file_contents(lines, Path::new("file_path"), false)
599 parse_pattern_file_contents(lines, Path::new("file_path"), false)
600 .unwrap()
600 .unwrap()
601 .0,
601 .0,
602 vec![]
602 vec![]
603 );
603 );
604 let lines = b"glob:**.o";
604 let lines = b"glob:**.o";
605 assert_eq!(
605 assert_eq!(
606 parse_pattern_file_contents(lines, Path::new("file_path"), false)
606 parse_pattern_file_contents(lines, Path::new("file_path"), false)
607 .unwrap()
607 .unwrap()
608 .0,
608 .0,
609 vec![IgnorePattern::new(
609 vec![IgnorePattern::new(
610 PatternSyntax::RelGlob,
610 PatternSyntax::RelGlob,
611 b"**.o",
611 b"**.o",
612 Path::new("file_path")
612 Path::new("file_path")
613 )]
613 )]
614 );
614 );
615 }
615 }
616
616
617 #[test]
617 #[test]
618 fn test_build_single_regex() {
618 fn test_build_single_regex() {
619 assert_eq!(
619 assert_eq!(
620 build_single_regex(&IgnorePattern::new(
620 build_single_regex(&IgnorePattern::new(
621 PatternSyntax::RelGlob,
621 PatternSyntax::RelGlob,
622 b"rust/target/",
622 b"rust/target/",
623 Path::new("")
623 Path::new("")
624 ))
624 ))
625 .unwrap(),
625 .unwrap(),
626 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
626 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
627 );
627 );
628 assert_eq!(
629 build_single_regex(&IgnorePattern::new(
630 PatternSyntax::Regexp,
631 br"rust/target/\d+",
632 Path::new("")
633 ))
634 .unwrap(),
635 br"rust/target/\d+".to_vec(),
636 );
628 }
637 }
629
638
630 #[test]
639 #[test]
631 fn test_build_single_regex_shortcut() {
640 fn test_build_single_regex_shortcut() {
632 assert_eq!(
641 assert_eq!(
633 build_single_regex(&IgnorePattern::new(
642 build_single_regex(&IgnorePattern::new(
634 PatternSyntax::RootGlob,
643 PatternSyntax::RootGlob,
635 b"",
644 b"",
636 Path::new("")
645 Path::new("")
637 ))
646 ))
638 .unwrap(),
647 .unwrap(),
639 None,
648 None,
640 );
649 );
641 assert_eq!(
650 assert_eq!(
642 build_single_regex(&IgnorePattern::new(
651 build_single_regex(&IgnorePattern::new(
643 PatternSyntax::RootGlob,
652 PatternSyntax::RootGlob,
644 b"whatever",
653 b"whatever",
645 Path::new("")
654 Path::new("")
646 ))
655 ))
647 .unwrap(),
656 .unwrap(),
648 None,
657 None,
649 );
658 );
650 assert_eq!(
659 assert_eq!(
651 build_single_regex(&IgnorePattern::new(
660 build_single_regex(&IgnorePattern::new(
652 PatternSyntax::RootGlob,
661 PatternSyntax::RootGlob,
653 b"*.o",
662 b"*.o",
654 Path::new("")
663 Path::new("")
655 ))
664 ))
656 .unwrap(),
665 .unwrap(),
657 Some(br"^[^/]*\.o(?:/|$)".to_vec()),
666 Some(br"^[^/]*\.o(?:/|$)".to_vec()),
658 );
667 );
659 }
668 }
660 }
669 }
General Comments 0
You need to be logged in to leave comments. Login now