##// END OF EJS Templates
rust-filepatterns: improve API and robustness for pattern files parsing...
Raphaël Gomès -
r44784:d42eea9a default
parent child Browse files
Show More
@@ -1,380 +1,534 b''
1 // filepatterns.rs
1 // filepatterns.rs
2 //
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 //! Handling of Mercurial-specific patterns.
8 //! Handling of Mercurial-specific patterns.
9
9
10 use crate::{
10 use crate::{utils::SliceExt, FastHashMap, PatternError};
11 utils::SliceExt, FastHashMap, LineNumber, PatternError, PatternFileError,
12 };
13 use lazy_static::lazy_static;
11 use lazy_static::lazy_static;
14 use regex::bytes::{NoExpand, Regex};
12 use regex::bytes::{NoExpand, Regex};
15 use std::fs::File;
13 use std::fs::File;
16 use std::io::Read;
14 use std::io::Read;
17 use std::path::{Path, PathBuf};
15 use std::path::{Path, PathBuf};
18 use std::vec::Vec;
16 use std::vec::Vec;
19
17
20 lazy_static! {
18 lazy_static! {
21 static ref RE_ESCAPE: Vec<Vec<u8>> = {
19 static ref RE_ESCAPE: Vec<Vec<u8>> = {
22 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
20 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
23 let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
21 let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
24 for byte in to_escape {
22 for byte in to_escape {
25 v[*byte as usize].insert(0, b'\\');
23 v[*byte as usize].insert(0, b'\\');
26 }
24 }
27 v
25 v
28 };
26 };
29 }
27 }
30
28
31 /// These are matched in order
29 /// These are matched in order
32 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
30 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
33 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
31 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
34
32
33 /// Appended to the regexp of globs
34 const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";
35
35 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
36 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
36 pub enum PatternSyntax {
37 pub enum PatternSyntax {
38 /// A regular expression
37 Regexp,
39 Regexp,
38 /// Glob that matches at the front of the path
40 /// Glob that matches at the front of the path
39 RootGlob,
41 RootGlob,
40 /// Glob that matches at any suffix of the path (still anchored at
42 /// Glob that matches at any suffix of the path (still anchored at
41 /// slashes)
43 /// slashes)
42 Glob,
44 Glob,
45 /// a path relative to repository root, which is matched recursively
43 Path,
46 Path,
47 /// A path relative to cwd
44 RelPath,
48 RelPath,
49 /// an unrooted glob (*.rs matches Rust files in all dirs)
45 RelGlob,
50 RelGlob,
51 /// A regexp that needn't match the start of a name
46 RelRegexp,
52 RelRegexp,
53 /// A path relative to repository root, which is matched non-recursively
54 /// (will not match subdirectories)
47 RootFiles,
55 RootFiles,
48 }
56 }
49
57
50 /// Transforms a glob pattern into a regex
58 /// Transforms a glob pattern into a regex
51 fn glob_to_re(pat: &[u8]) -> Vec<u8> {
59 fn glob_to_re(pat: &[u8]) -> Vec<u8> {
52 let mut input = pat;
60 let mut input = pat;
53 let mut res: Vec<u8> = vec![];
61 let mut res: Vec<u8> = vec![];
54 let mut group_depth = 0;
62 let mut group_depth = 0;
55
63
56 while let Some((c, rest)) = input.split_first() {
64 while let Some((c, rest)) = input.split_first() {
57 input = rest;
65 input = rest;
58
66
59 match c {
67 match c {
60 b'*' => {
68 b'*' => {
61 for (source, repl) in GLOB_REPLACEMENTS {
69 for (source, repl) in GLOB_REPLACEMENTS {
62 if let Some(rest) = input.drop_prefix(source) {
70 if let Some(rest) = input.drop_prefix(source) {
63 input = rest;
71 input = rest;
64 res.extend(*repl);
72 res.extend(*repl);
65 break;
73 break;
66 }
74 }
67 }
75 }
68 }
76 }
69 b'?' => res.extend(b"."),
77 b'?' => res.extend(b"."),
70 b'[' => {
78 b'[' => {
71 match input.iter().skip(1).position(|b| *b == b']') {
79 match input.iter().skip(1).position(|b| *b == b']') {
72 None => res.extend(b"\\["),
80 None => res.extend(b"\\["),
73 Some(end) => {
81 Some(end) => {
74 // Account for the one we skipped
82 // Account for the one we skipped
75 let end = end + 1;
83 let end = end + 1;
76
84
77 res.extend(b"[");
85 res.extend(b"[");
78
86
79 for (i, b) in input[..end].iter().enumerate() {
87 for (i, b) in input[..end].iter().enumerate() {
80 if *b == b'!' && i == 0 {
88 if *b == b'!' && i == 0 {
81 res.extend(b"^")
89 res.extend(b"^")
82 } else if *b == b'^' && i == 0 {
90 } else if *b == b'^' && i == 0 {
83 res.extend(b"\\^")
91 res.extend(b"\\^")
84 } else if *b == b'\\' {
92 } else if *b == b'\\' {
85 res.extend(b"\\\\")
93 res.extend(b"\\\\")
86 } else {
94 } else {
87 res.push(*b)
95 res.push(*b)
88 }
96 }
89 }
97 }
90 res.extend(b"]");
98 res.extend(b"]");
91 input = &input[end + 1..];
99 input = &input[end + 1..];
92 }
100 }
93 }
101 }
94 }
102 }
95 b'{' => {
103 b'{' => {
96 group_depth += 1;
104 group_depth += 1;
97 res.extend(b"(?:")
105 res.extend(b"(?:")
98 }
106 }
99 b'}' if group_depth > 0 => {
107 b'}' if group_depth > 0 => {
100 group_depth -= 1;
108 group_depth -= 1;
101 res.extend(b")");
109 res.extend(b")");
102 }
110 }
103 b',' if group_depth > 0 => res.extend(b"|"),
111 b',' if group_depth > 0 => res.extend(b"|"),
104 b'\\' => {
112 b'\\' => {
105 let c = {
113 let c = {
106 if let Some((c, rest)) = input.split_first() {
114 if let Some((c, rest)) = input.split_first() {
107 input = rest;
115 input = rest;
108 c
116 c
109 } else {
117 } else {
110 c
118 c
111 }
119 }
112 };
120 };
113 res.extend(&RE_ESCAPE[*c as usize])
121 res.extend(&RE_ESCAPE[*c as usize])
114 }
122 }
115 _ => res.extend(&RE_ESCAPE[*c as usize]),
123 _ => res.extend(&RE_ESCAPE[*c as usize]),
116 }
124 }
117 }
125 }
118 res
126 res
119 }
127 }
120
128
121 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
129 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
122 pattern
130 pattern
123 .iter()
131 .iter()
124 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
132 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
125 .collect()
133 .collect()
126 }
134 }
127
135
128 fn parse_pattern_syntax(kind: &[u8]) -> Result<PatternSyntax, PatternError> {
136 pub fn parse_pattern_syntax(
137 kind: &[u8],
138 ) -> Result<PatternSyntax, PatternError> {
129 match kind {
139 match kind {
130 b"re" => Ok(PatternSyntax::Regexp),
140 b"re:" => Ok(PatternSyntax::Regexp),
131 b"path" => Ok(PatternSyntax::Path),
141 b"path:" => Ok(PatternSyntax::Path),
132 b"relpath" => Ok(PatternSyntax::RelPath),
142 b"relpath:" => Ok(PatternSyntax::RelPath),
133 b"rootfilesin" => Ok(PatternSyntax::RootFiles),
143 b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
134 b"relglob" => Ok(PatternSyntax::RelGlob),
144 b"relglob:" => Ok(PatternSyntax::RelGlob),
135 b"relre" => Ok(PatternSyntax::RelRegexp),
145 b"relre:" => Ok(PatternSyntax::RelRegexp),
136 b"glob" => Ok(PatternSyntax::Glob),
146 b"glob:" => Ok(PatternSyntax::Glob),
137 b"rootglob" => Ok(PatternSyntax::RootGlob),
147 b"rootglob:" => Ok(PatternSyntax::RootGlob),
138 _ => Err(PatternError::UnsupportedSyntax(
148 _ => Err(PatternError::UnsupportedSyntax(
139 String::from_utf8_lossy(kind).to_string(),
149 String::from_utf8_lossy(kind).to_string(),
140 )),
150 )),
141 }
151 }
142 }
152 }
143
153
144 /// Builds the regex that corresponds to the given pattern.
154 /// Builds the regex that corresponds to the given pattern.
145 /// If within a `syntax: regexp` context, returns the pattern,
155 /// If within a `syntax: regexp` context, returns the pattern,
146 /// otherwise, returns the corresponding regex.
156 /// otherwise, returns the corresponding regex.
147 fn _build_single_regex(
157 fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> {
148 syntax: PatternSyntax,
158 let IgnorePattern {
149 pattern: &[u8],
159 syntax, pattern, ..
150 globsuffix: &[u8],
160 } = entry;
151 ) -> Vec<u8> {
152 if pattern.is_empty() {
161 if pattern.is_empty() {
153 return vec![];
162 return vec![];
154 }
163 }
155 match syntax {
164 match syntax {
156 PatternSyntax::Regexp => pattern.to_owned(),
165 PatternSyntax::Regexp => pattern.to_owned(),
157 PatternSyntax::RelRegexp => {
166 PatternSyntax::RelRegexp => {
158 if pattern[0] == b'^' {
167 if pattern[0] == b'^' {
159 return pattern.to_owned();
168 return pattern.to_owned();
160 }
169 }
161 [b".*", pattern].concat()
170 [&b".*"[..], pattern].concat()
162 }
171 }
163 PatternSyntax::Path | PatternSyntax::RelPath => {
172 PatternSyntax::Path | PatternSyntax::RelPath => {
164 if pattern == b"." {
173 if pattern == b"." {
165 return vec![];
174 return vec![];
166 }
175 }
167 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
176 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
168 }
177 }
169 PatternSyntax::RootFiles => {
178 PatternSyntax::RootFiles => {
170 let mut res = if pattern == b"." {
179 let mut res = if pattern == b"." {
171 vec![]
180 vec![]
172 } else {
181 } else {
173 // Pattern is a directory name.
182 // Pattern is a directory name.
174 [escape_pattern(pattern).as_slice(), b"/"].concat()
183 [escape_pattern(pattern).as_slice(), b"/"].concat()
175 };
184 };
176
185
177 // Anything after the pattern must be a non-directory.
186 // Anything after the pattern must be a non-directory.
178 res.extend(b"[^/]+$");
187 res.extend(b"[^/]+$");
179 res
188 res
180 }
189 }
181 PatternSyntax::RelGlob => {
190 PatternSyntax::RelGlob => {
182 let glob_re = glob_to_re(pattern);
191 let glob_re = glob_to_re(pattern);
183 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
192 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
184 [b".*", rest, globsuffix].concat()
193 [b".*", rest, GLOB_SUFFIX].concat()
185 } else {
194 } else {
186 [b"(?:|.*/)", glob_re.as_slice(), globsuffix].concat()
195 [b"(?:|.*/)", glob_re.as_slice(), GLOB_SUFFIX].concat()
187 }
196 }
188 }
197 }
189 PatternSyntax::Glob | PatternSyntax::RootGlob => {
198 PatternSyntax::Glob | PatternSyntax::RootGlob => {
190 [glob_to_re(pattern).as_slice(), globsuffix].concat()
199 [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
191 }
200 }
192 }
201 }
193 }
202 }
194
203
195 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
204 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
196 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
205 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
197
206
207 /// TODO support other platforms
208 #[cfg(unix)]
209 pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
210 if bytes.is_empty() {
211 return b".".to_vec();
212 }
213 let sep = b'/';
214
215 let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
216 if initial_slashes > 2 {
217 // POSIX allows one or two initial slashes, but treats three or more
218 // as single slash.
219 initial_slashes = 1;
220 }
221 let components = bytes
222 .split(|b| *b == sep)
223 .filter(|c| !(c.is_empty() || c == b"."))
224 .fold(vec![], |mut acc, component| {
225 if component != b".."
226 || (initial_slashes == 0 && acc.is_empty())
227 || (!acc.is_empty() && acc[acc.len() - 1] == b"..")
228 {
229 acc.push(component)
230 } else if !acc.is_empty() {
231 acc.pop();
232 }
233 acc
234 });
235 let mut new_bytes = components.join(&sep);
236
237 if initial_slashes > 0 {
238 let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
239 buf.extend(new_bytes);
240 new_bytes = buf;
241 }
242 if new_bytes.is_empty() {
243 b".".to_vec()
244 } else {
245 new_bytes
246 }
247 }
248
198 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
249 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
199 /// that don't need to be transformed into a regex.
250 /// that don't need to be transformed into a regex.
200 pub fn build_single_regex(
251 pub fn build_single_regex(
201 kind: &[u8],
252 entry: &IgnorePattern,
202 pat: &[u8],
203 globsuffix: &[u8],
204 ) -> Result<Vec<u8>, PatternError> {
253 ) -> Result<Vec<u8>, PatternError> {
205 let enum_kind = parse_pattern_syntax(kind)?;
254 let IgnorePattern {
206 if enum_kind == PatternSyntax::RootGlob
255 pattern, syntax, ..
207 && !pat.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
256 } = entry;
257 let pattern = match syntax {
258 PatternSyntax::RootGlob
259 | PatternSyntax::Path
260 | PatternSyntax::RelGlob
261 | PatternSyntax::RootFiles => normalize_path_bytes(&pattern),
262 _ => pattern.to_owned(),
263 };
264 if *syntax == PatternSyntax::RootGlob
265 && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
208 {
266 {
209 let mut escaped = escape_pattern(pat);
267 let mut escaped = escape_pattern(&pattern);
210 escaped.extend(b"(?:/|$)");
268 escaped.extend(GLOB_SUFFIX);
211 Ok(escaped)
269 Ok(escaped)
212 } else {
270 } else {
213 Ok(_build_single_regex(enum_kind, pat, globsuffix))
271 let mut entry = entry.clone();
272 entry.pattern = pattern;
273 Ok(_build_single_regex(&entry))
214 }
274 }
215 }
275 }
216
276
217 lazy_static! {
277 lazy_static! {
218 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
278 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
219 let mut m = FastHashMap::default();
279 let mut m = FastHashMap::default();
220
280
221 m.insert(b"re".as_ref(), b"relre:".as_ref());
281 m.insert(b"re".as_ref(), b"relre:".as_ref());
222 m.insert(b"regexp".as_ref(), b"relre:".as_ref());
282 m.insert(b"regexp".as_ref(), b"relre:".as_ref());
223 m.insert(b"glob".as_ref(), b"relglob:".as_ref());
283 m.insert(b"glob".as_ref(), b"relglob:".as_ref());
224 m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
284 m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
225 m.insert(b"include".as_ref(), b"include".as_ref());
285 m.insert(b"include".as_ref(), b"include:".as_ref());
226 m.insert(b"subinclude".as_ref(), b"subinclude".as_ref());
286 m.insert(b"subinclude".as_ref(), b"subinclude:".as_ref());
227 m
287 m
228 };
288 };
229 }
289 }
230
290
231 pub type PatternTuple = (Vec<u8>, LineNumber, Vec<u8>);
291 #[derive(Debug)]
232 type WarningTuple = (PathBuf, Vec<u8>);
292 pub enum PatternFileWarning {
293 /// (file path, syntax bytes)
294 InvalidSyntax(PathBuf, Vec<u8>),
295 /// File path
296 NoSuchFile(PathBuf),
297 }
233
298
234 pub fn parse_pattern_file_contents<P: AsRef<Path>>(
299 pub fn parse_pattern_file_contents<P: AsRef<Path>>(
235 lines: &[u8],
300 lines: &[u8],
236 file_path: P,
301 file_path: P,
237 warn: bool,
302 warn: bool,
238 ) -> (Vec<PatternTuple>, Vec<WarningTuple>) {
303 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
239 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
304 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
240 let comment_escape_regex = Regex::new(r"\\#").unwrap();
305 let comment_escape_regex = Regex::new(r"\\#").unwrap();
241 let mut inputs: Vec<PatternTuple> = vec![];
306 let mut inputs: Vec<IgnorePattern> = vec![];
242 let mut warnings: Vec<WarningTuple> = vec![];
307 let mut warnings: Vec<PatternFileWarning> = vec![];
243
308
244 let mut current_syntax = b"relre:".as_ref();
309 let mut current_syntax = b"relre:".as_ref();
245
310
246 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
311 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
247 let line_number = line_number + 1;
312 let line_number = line_number + 1;
248
313
249 let line_buf;
314 let line_buf;
250 if line.contains(&b'#') {
315 if line.contains(&b'#') {
251 if let Some(cap) = comment_regex.captures(line) {
316 if let Some(cap) = comment_regex.captures(line) {
252 line = &line[..cap.get(1).unwrap().end()]
317 line = &line[..cap.get(1).unwrap().end()]
253 }
318 }
254 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
319 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
255 line = &line_buf;
320 line = &line_buf;
256 }
321 }
257
322
258 let mut line = line.trim_end();
323 let mut line = line.trim_end();
259
324
260 if line.is_empty() {
325 if line.is_empty() {
261 continue;
326 continue;
262 }
327 }
263
328
264 if let Some(syntax) = line.drop_prefix(b"syntax:") {
329 if let Some(syntax) = line.drop_prefix(b"syntax:") {
265 let syntax = syntax.trim();
330 let syntax = syntax.trim();
266
331
267 if let Some(rel_syntax) = SYNTAXES.get(syntax) {
332 if let Some(rel_syntax) = SYNTAXES.get(syntax) {
268 current_syntax = rel_syntax;
333 current_syntax = rel_syntax;
269 } else if warn {
334 } else if warn {
270 warnings
335 warnings.push(PatternFileWarning::InvalidSyntax(
271 .push((file_path.as_ref().to_owned(), syntax.to_owned()));
336 file_path.as_ref().to_owned(),
337 syntax.to_owned(),
338 ));
272 }
339 }
273 continue;
340 continue;
274 }
341 }
275
342
276 let mut line_syntax: &[u8] = &current_syntax;
343 let mut line_syntax: &[u8] = &current_syntax;
277
344
278 for (s, rels) in SYNTAXES.iter() {
345 for (s, rels) in SYNTAXES.iter() {
279 if let Some(rest) = line.drop_prefix(rels) {
346 if let Some(rest) = line.drop_prefix(rels) {
280 line_syntax = rels;
347 line_syntax = rels;
281 line = rest;
348 line = rest;
282 break;
349 break;
283 }
350 }
284 if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) {
351 if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) {
285 line_syntax = rels;
352 line_syntax = rels;
286 line = rest;
353 line = rest;
287 break;
354 break;
288 }
355 }
289 }
356 }
290
357
291 inputs.push((
358 inputs.push(IgnorePattern::new(
292 [line_syntax, line].concat(),
359 parse_pattern_syntax(&line_syntax).map_err(|e| match e {
293 line_number,
360 PatternError::UnsupportedSyntax(syntax) => {
294 line.to_owned(),
361 PatternError::UnsupportedSyntaxInFile(
362 syntax,
363 file_path.as_ref().to_string_lossy().into(),
364 line_number,
365 )
366 }
367 _ => e,
368 })?,
369 &line,
370 &file_path,
295 ));
371 ));
296 }
372 }
297 (inputs, warnings)
373 Ok((inputs, warnings))
298 }
374 }
299
375
300 pub fn read_pattern_file<P: AsRef<Path>>(
376 pub fn read_pattern_file<P: AsRef<Path>>(
301 file_path: P,
377 file_path: P,
302 warn: bool,
378 warn: bool,
303 ) -> Result<(Vec<PatternTuple>, Vec<WarningTuple>), PatternFileError> {
379 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
304 let mut f = File::open(file_path.as_ref())?;
380 let mut f = match File::open(file_path.as_ref()) {
381 Ok(f) => Ok(f),
382 Err(e) => match e.kind() {
383 std::io::ErrorKind::NotFound => {
384 return Ok((
385 vec![],
386 vec![PatternFileWarning::NoSuchFile(
387 file_path.as_ref().to_owned(),
388 )],
389 ))
390 }
391 _ => Err(e),
392 },
393 }?;
305 let mut contents = Vec::new();
394 let mut contents = Vec::new();
306
395
307 f.read_to_end(&mut contents)?;
396 f.read_to_end(&mut contents)?;
308
397
309 Ok(parse_pattern_file_contents(&contents, file_path, warn))
398 Ok(parse_pattern_file_contents(&contents, file_path, warn)?)
399 }
400
401 /// Represents an entry in an "ignore" file.
402 #[derive(Debug, Eq, PartialEq, Clone)]
403 pub struct IgnorePattern {
404 pub syntax: PatternSyntax,
405 pub pattern: Vec<u8>,
406 pub source: PathBuf,
310 }
407 }
311
408
409 impl IgnorePattern {
410 pub fn new(
411 syntax: PatternSyntax,
412 pattern: &[u8],
413 source: impl AsRef<Path>,
414 ) -> Self {
415 Self {
416 syntax,
417 pattern: pattern.to_owned(),
418 source: source.as_ref().to_owned(),
419 }
420 }
421 }
422
423 pub type PatternResult<T> = Result<T, PatternError>;
424
312 #[cfg(test)]
425 #[cfg(test)]
313 mod tests {
426 mod tests {
314 use super::*;
427 use super::*;
428 use pretty_assertions::assert_eq;
315
429
316 #[test]
430 #[test]
317 fn escape_pattern_test() {
431 fn escape_pattern_test() {
318 let untouched = br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
432 let untouched =
433 br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
319 assert_eq!(escape_pattern(untouched), untouched.to_vec());
434 assert_eq!(escape_pattern(untouched), untouched.to_vec());
320 // All escape codes
435 // All escape codes
321 assert_eq!(
436 assert_eq!(
322 escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
437 escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
323 br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
438 br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
324 .to_vec()
439 .to_vec()
325 );
440 );
326 }
441 }
327
442
328 #[test]
443 #[test]
329 fn glob_test() {
444 fn glob_test() {
330 assert_eq!(glob_to_re(br#"?"#), br#"."#);
445 assert_eq!(glob_to_re(br#"?"#), br#"."#);
331 assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
446 assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
332 assert_eq!(glob_to_re(br#"**"#), br#".*"#);
447 assert_eq!(glob_to_re(br#"**"#), br#".*"#);
333 assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
448 assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
334 assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
449 assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
335 assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
450 assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
336 assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
451 assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
337 assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
452 assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
338 }
453 }
339
454
340 #[test]
455 #[test]
341 fn test_parse_pattern_file_contents() {
456 fn test_parse_pattern_file_contents() {
342 let lines = b"syntax: glob\n*.elc";
457 let lines = b"syntax: glob\n*.elc";
343
458
344 assert_eq!(
459 assert_eq!(
345 vec![(b"relglob:*.elc".to_vec(), 2, b"*.elc".to_vec())],
346 parse_pattern_file_contents(lines, Path::new("file_path"), false)
460 parse_pattern_file_contents(lines, Path::new("file_path"), false)
461 .unwrap()
347 .0,
462 .0,
463 vec![IgnorePattern::new(
464 PatternSyntax::RelGlob,
465 b"*.elc",
466 Path::new("file_path")
467 )],
348 );
468 );
349
469
350 let lines = b"syntax: include\nsyntax: glob";
470 let lines = b"syntax: include\nsyntax: glob";
351
471
352 assert_eq!(
472 assert_eq!(
353 parse_pattern_file_contents(lines, Path::new("file_path"), false)
473 parse_pattern_file_contents(lines, Path::new("file_path"), false)
474 .unwrap()
354 .0,
475 .0,
355 vec![]
476 vec![]
356 );
477 );
357 let lines = b"glob:**.o";
478 let lines = b"glob:**.o";
358 assert_eq!(
479 assert_eq!(
359 parse_pattern_file_contents(lines, Path::new("file_path"), false)
480 parse_pattern_file_contents(lines, Path::new("file_path"), false)
481 .unwrap()
360 .0,
482 .0,
361 vec![(b"relglob:**.o".to_vec(), 1, b"**.o".to_vec())]
483 vec![IgnorePattern::new(
484 PatternSyntax::RelGlob,
485 b"**.o",
486 Path::new("file_path")
487 )]
488 );
489 }
490
491 #[test]
492 fn test_build_single_regex() {
493 assert_eq!(
494 build_single_regex(&IgnorePattern::new(
495 PatternSyntax::RelGlob,
496 b"rust/target/",
497 Path::new("")
498 ))
499 .unwrap(),
500 br"(?:|.*/)rust/target(?:/|$)".to_vec(),
362 );
501 );
363 }
502 }
364
503
365 #[test]
504 #[test]
366 fn test_build_single_regex_shortcut() {
505 fn test_build_single_regex_shortcut() {
367 assert_eq!(
506 assert_eq!(
368 br"(?:/|$)".to_vec(),
507 build_single_regex(&IgnorePattern::new(
369 build_single_regex(b"rootglob", b"", b"").unwrap()
508 PatternSyntax::RootGlob,
509 b"",
510 Path::new("")
511 ))
512 .unwrap(),
513 br"\.(?:/|$)".to_vec(),
370 );
514 );
371 assert_eq!(
515 assert_eq!(
516 build_single_regex(&IgnorePattern::new(
517 PatternSyntax::RootGlob,
518 b"whatever",
519 Path::new("")
520 ))
521 .unwrap(),
372 br"whatever(?:/|$)".to_vec(),
522 br"whatever(?:/|$)".to_vec(),
373 build_single_regex(b"rootglob", b"whatever", b"").unwrap()
374 );
523 );
375 assert_eq!(
524 assert_eq!(
376 br"[^/]*\.o".to_vec(),
525 build_single_regex(&IgnorePattern::new(
377 build_single_regex(b"rootglob", b"*.o", b"").unwrap()
526 PatternSyntax::RootGlob,
527 b"*.o",
528 Path::new("")
529 ))
530 .unwrap(),
531 br"[^/]*\.o(?:/|$)".to_vec(),
378 );
532 );
379 }
533 }
380 }
534 }
@@ -1,143 +1,169 b''
1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
1 // Copyright 2018-2020 Georges Racinet <georges.racinet@octobus.net>
2 // and Mercurial contributors
2 // and Mercurial contributors
3 //
3 //
4 // This software may be used and distributed according to the terms of the
4 // This software may be used and distributed according to the terms of the
5 // GNU General Public License version 2 or any later version.
5 // GNU General Public License version 2 or any later version.
6 mod ancestors;
6 mod ancestors;
7 pub mod dagops;
7 pub mod dagops;
8 pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors};
8 pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors};
9 mod dirstate;
9 mod dirstate;
10 pub mod discovery;
10 pub mod discovery;
11 pub mod testing; // unconditionally built, for use from integration tests
11 pub mod testing; // unconditionally built, for use from integration tests
12 pub use dirstate::{
12 pub use dirstate::{
13 dirs_multiset::{DirsMultiset, DirsMultisetIter},
13 dirs_multiset::{DirsMultiset, DirsMultisetIter},
14 dirstate_map::DirstateMap,
14 dirstate_map::DirstateMap,
15 parsers::{pack_dirstate, parse_dirstate, PARENT_SIZE},
15 parsers::{pack_dirstate, parse_dirstate, PARENT_SIZE},
16 status::{status, StatusResult},
16 status::{status, StatusResult},
17 CopyMap, CopyMapIter, DirstateEntry, DirstateParents, EntryState,
17 CopyMap, CopyMapIter, DirstateEntry, DirstateParents, EntryState,
18 StateMap, StateMapIter,
18 StateMap, StateMapIter,
19 };
19 };
20 mod filepatterns;
20 mod filepatterns;
21 pub mod matchers;
21 pub mod matchers;
22 pub mod revlog;
22 pub mod revlog;
23 pub use revlog::*;
23 pub use revlog::*;
24 pub mod utils;
24 pub mod utils;
25
25
26 use crate::utils::hg_path::{HgPathBuf, HgPathError};
26 use crate::utils::hg_path::{HgPathBuf, HgPathError};
27 pub use filepatterns::{
27 pub use filepatterns::{
28 build_single_regex, read_pattern_file, PatternSyntax, PatternTuple,
28 parse_pattern_syntax, read_pattern_file, IgnorePattern,
29 PatternFileWarning, PatternSyntax,
29 };
30 };
30 use std::collections::HashMap;
31 use std::collections::HashMap;
31 use twox_hash::RandomXxHashBuilder64;
32 use twox_hash::RandomXxHashBuilder64;
32
33
33 pub type LineNumber = usize;
34 pub type LineNumber = usize;
34
35
35 /// Rust's default hasher is too slow because it tries to prevent collision
36 /// Rust's default hasher is too slow because it tries to prevent collision
36 /// attacks. We are not concerned about those: if an ill-minded person has
37 /// attacks. We are not concerned about those: if an ill-minded person has
37 /// write access to your repository, you have other issues.
38 /// write access to your repository, you have other issues.
38 pub type FastHashMap<K, V> = HashMap<K, V, RandomXxHashBuilder64>;
39 pub type FastHashMap<K, V> = HashMap<K, V, RandomXxHashBuilder64>;
39
40
40 #[derive(Clone, Debug, PartialEq)]
41 #[derive(Clone, Debug, PartialEq)]
41 pub enum DirstateParseError {
42 pub enum DirstateParseError {
42 TooLittleData,
43 TooLittleData,
43 Overflow,
44 Overflow,
44 CorruptedEntry(String),
45 CorruptedEntry(String),
45 Damaged,
46 Damaged,
46 }
47 }
47
48
48 impl From<std::io::Error> for DirstateParseError {
49 impl From<std::io::Error> for DirstateParseError {
49 fn from(e: std::io::Error) -> Self {
50 fn from(e: std::io::Error) -> Self {
50 DirstateParseError::CorruptedEntry(e.to_string())
51 DirstateParseError::CorruptedEntry(e.to_string())
51 }
52 }
52 }
53 }
53
54
54 impl ToString for DirstateParseError {
55 impl ToString for DirstateParseError {
55 fn to_string(&self) -> String {
56 fn to_string(&self) -> String {
56 use crate::DirstateParseError::*;
57 use crate::DirstateParseError::*;
57 match self {
58 match self {
58 TooLittleData => "Too little data for dirstate.".to_string(),
59 TooLittleData => "Too little data for dirstate.".to_string(),
59 Overflow => "Overflow in dirstate.".to_string(),
60 Overflow => "Overflow in dirstate.".to_string(),
60 CorruptedEntry(e) => format!("Corrupted entry: {:?}.", e),
61 CorruptedEntry(e) => format!("Corrupted entry: {:?}.", e),
61 Damaged => "Dirstate appears to be damaged.".to_string(),
62 Damaged => "Dirstate appears to be damaged.".to_string(),
62 }
63 }
63 }
64 }
64 }
65 }
65
66
66 #[derive(Debug, PartialEq)]
67 #[derive(Debug, PartialEq)]
67 pub enum DirstatePackError {
68 pub enum DirstatePackError {
68 CorruptedEntry(String),
69 CorruptedEntry(String),
69 CorruptedParent,
70 CorruptedParent,
70 BadSize(usize, usize),
71 BadSize(usize, usize),
71 }
72 }
72
73
73 impl From<std::io::Error> for DirstatePackError {
74 impl From<std::io::Error> for DirstatePackError {
74 fn from(e: std::io::Error) -> Self {
75 fn from(e: std::io::Error) -> Self {
75 DirstatePackError::CorruptedEntry(e.to_string())
76 DirstatePackError::CorruptedEntry(e.to_string())
76 }
77 }
77 }
78 }
78 #[derive(Debug, PartialEq)]
79 #[derive(Debug, PartialEq)]
79 pub enum DirstateMapError {
80 pub enum DirstateMapError {
80 PathNotFound(HgPathBuf),
81 PathNotFound(HgPathBuf),
81 EmptyPath,
82 EmptyPath,
82 InvalidPath(HgPathError),
83 InvalidPath(HgPathError),
83 }
84 }
84
85
85 impl ToString for DirstateMapError {
86 impl ToString for DirstateMapError {
86 fn to_string(&self) -> String {
87 fn to_string(&self) -> String {
87 match self {
88 match self {
88 DirstateMapError::PathNotFound(_) => {
89 DirstateMapError::PathNotFound(_) => {
89 "expected a value, found none".to_string()
90 "expected a value, found none".to_string()
90 }
91 }
91 DirstateMapError::EmptyPath => "Overflow in dirstate.".to_string(),
92 DirstateMapError::EmptyPath => "Overflow in dirstate.".to_string(),
92 DirstateMapError::InvalidPath(e) => e.to_string(),
93 DirstateMapError::InvalidPath(e) => e.to_string(),
93 }
94 }
94 }
95 }
95 }
96 }
96
97
97 pub enum DirstateError {
98 pub enum DirstateError {
98 Parse(DirstateParseError),
99 Parse(DirstateParseError),
99 Pack(DirstatePackError),
100 Pack(DirstatePackError),
100 Map(DirstateMapError),
101 Map(DirstateMapError),
101 IO(std::io::Error),
102 IO(std::io::Error),
102 }
103 }
103
104
104 impl From<DirstateParseError> for DirstateError {
105 impl From<DirstateParseError> for DirstateError {
105 fn from(e: DirstateParseError) -> Self {
106 fn from(e: DirstateParseError) -> Self {
106 DirstateError::Parse(e)
107 DirstateError::Parse(e)
107 }
108 }
108 }
109 }
109
110
110 impl From<DirstatePackError> for DirstateError {
111 impl From<DirstatePackError> for DirstateError {
111 fn from(e: DirstatePackError) -> Self {
112 fn from(e: DirstatePackError) -> Self {
112 DirstateError::Pack(e)
113 DirstateError::Pack(e)
113 }
114 }
114 }
115 }
115
116
116 #[derive(Debug)]
117 #[derive(Debug)]
117 pub enum PatternError {
118 pub enum PatternError {
119 Path(HgPathError),
118 UnsupportedSyntax(String),
120 UnsupportedSyntax(String),
121 UnsupportedSyntaxInFile(String, String, usize),
122 TooLong(usize),
123 IO(std::io::Error),
119 }
124 }
120
125
121 #[derive(Debug)]
126 impl ToString for PatternError {
122 pub enum PatternFileError {
127 fn to_string(&self) -> String {
123 IO(std::io::Error),
128 match self {
124 Pattern(PatternError, LineNumber),
129 PatternError::UnsupportedSyntax(syntax) => {
125 }
130 format!("Unsupported syntax {}", syntax)
126
131 }
127 impl From<std::io::Error> for PatternFileError {
132 PatternError::UnsupportedSyntaxInFile(syntax, file_path, line) => {
128 fn from(e: std::io::Error) -> Self {
133 format!(
129 PatternFileError::IO(e)
134 "{}:{}: unsupported syntax {}",
135 file_path, line, syntax
136 )
137 }
138 PatternError::TooLong(size) => {
139 format!("matcher pattern is too long ({} bytes)", size)
140 }
141 PatternError::IO(e) => e.to_string(),
142 PatternError::Path(e) => e.to_string(),
143 }
130 }
144 }
131 }
145 }
132
146
133 impl From<DirstateMapError> for DirstateError {
147 impl From<DirstateMapError> for DirstateError {
134 fn from(e: DirstateMapError) -> Self {
148 fn from(e: DirstateMapError) -> Self {
135 DirstateError::Map(e)
149 DirstateError::Map(e)
136 }
150 }
137 }
151 }
138
152
139 impl From<std::io::Error> for DirstateError {
153 impl From<std::io::Error> for DirstateError {
140 fn from(e: std::io::Error) -> Self {
154 fn from(e: std::io::Error) -> Self {
141 DirstateError::IO(e)
155 DirstateError::IO(e)
142 }
156 }
143 }
157 }
158
159 impl From<std::io::Error> for PatternError {
160 fn from(e: std::io::Error) -> Self {
161 PatternError::IO(e)
162 }
163 }
164
165 impl From<HgPathError> for PatternError {
166 fn from(e: HgPathError) -> Self {
167 PatternError::Path(e)
168 }
169 }
General Comments 0
You need to be logged in to leave comments. Login now