##// END OF EJS Templates
rustfilepatterns: shorter code for concatenating slices...
Valentin Gatien-Baron -
r43133:69195b6f default
parent child Browse files
Show More
@@ -1,391 +1,377 b''
1 1 // filepatterns.rs
2 2 //
3 3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 4 //
5 5 // This software may be used and distributed according to the terms of the
6 6 // GNU General Public License version 2 or any later version.
7 7
8 8 //! Handling of Mercurial-specific patterns.
9 9
10 10 use crate::{
11 11 utils::{files::get_path_from_bytes, SliceExt},
12 12 LineNumber, PatternError, PatternFileError,
13 13 };
14 14 use lazy_static::lazy_static;
15 15 use regex::bytes::{NoExpand, Regex};
16 16 use std::collections::HashMap;
17 17 use std::fs::File;
18 18 use std::io::Read;
19 19 use std::vec::Vec;
20 20
21 21 lazy_static! {
22 22 static ref RE_ESCAPE: Vec<Vec<u8>> = {
23 23 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
24 24 let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
25 25 for byte in to_escape {
26 26 v[*byte as usize].insert(0, b'\\');
27 27 }
28 28 v
29 29 };
30 30 }
31 31
32 32 /// These are matched in order
33 33 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
34 34 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
35 35
36 36 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
37 37 pub enum PatternSyntax {
38 38 Regexp,
39 39 /// Glob that matches at the front of the path
40 40 RootGlob,
41 41 /// Glob that matches at any suffix of the path (still anchored at
42 42 /// slashes)
43 43 Glob,
44 44 Path,
45 45 RelPath,
46 46 RelGlob,
47 47 RelRegexp,
48 48 RootFiles,
49 49 }
50 50
51 51 /// Transforms a glob pattern into a regex
52 52 fn glob_to_re(pat: &[u8]) -> Vec<u8> {
53 53 let mut input = pat;
54 54 let mut res: Vec<u8> = vec![];
55 55 let mut group_depth = 0;
56 56
57 57 while let Some((c, rest)) = input.split_first() {
58 58 input = rest;
59 59
60 60 match c {
61 61 b'*' => {
62 62 for (source, repl) in GLOB_REPLACEMENTS {
63 63 if let Some(rest) = input.drop_prefix(source) {
64 64 input = rest;
65 65 res.extend(*repl);
66 66 break;
67 67 }
68 68 }
69 69 }
70 70 b'?' => res.extend(b"."),
71 71 b'[' => {
72 72 match input.iter().skip(1).position(|b| *b == b']') {
73 73 None => res.extend(b"\\["),
74 74 Some(end) => {
75 75 // Account for the one we skipped
76 76 let end = end + 1;
77 77
78 78 res.extend(b"[");
79 79
80 80 for (i, b) in input[..end].iter().enumerate() {
81 81 if *b == b'!' && i == 0 {
82 82 res.extend(b"^")
83 83 } else if *b == b'^' && i == 0 {
84 84 res.extend(b"\\^")
85 85 } else if *b == b'\\' {
86 86 res.extend(b"\\\\")
87 87 } else {
88 88 res.push(*b)
89 89 }
90 90 }
91 91 res.extend(b"]");
92 92 input = &input[end + 1..];
93 93 }
94 94 }
95 95 }
96 96 b'{' => {
97 97 group_depth += 1;
98 98 res.extend(b"(?:")
99 99 }
100 100 b'}' if group_depth > 0 => {
101 101 group_depth -= 1;
102 102 res.extend(b")");
103 103 }
104 104 b',' if group_depth > 0 => res.extend(b"|"),
105 105 b'\\' => {
106 106 let c = {
107 107 if let Some((c, rest)) = input.split_first() {
108 108 input = rest;
109 109 c
110 110 } else {
111 111 c
112 112 }
113 113 };
114 114 res.extend(&RE_ESCAPE[*c as usize])
115 115 }
116 116 _ => res.extend(&RE_ESCAPE[*c as usize]),
117 117 }
118 118 }
119 119 res
120 120 }
121 121
122 122 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
123 123 pattern
124 124 .iter()
125 125 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
126 126 .collect()
127 127 }
128 128
129 129 fn parse_pattern_syntax(kind: &[u8]) -> Result<PatternSyntax, PatternError> {
130 130 match kind {
131 131 b"re" => Ok(PatternSyntax::Regexp),
132 132 b"path" => Ok(PatternSyntax::Path),
133 133 b"relpath" => Ok(PatternSyntax::RelPath),
134 134 b"rootfilesin" => Ok(PatternSyntax::RootFiles),
135 135 b"relglob" => Ok(PatternSyntax::RelGlob),
136 136 b"relre" => Ok(PatternSyntax::RelRegexp),
137 137 b"glob" => Ok(PatternSyntax::Glob),
138 138 b"rootglob" => Ok(PatternSyntax::RootGlob),
139 139 _ => Err(PatternError::UnsupportedSyntax(
140 140 String::from_utf8_lossy(kind).to_string(),
141 141 )),
142 142 }
143 143 }
144 144
145 145 /// Builds the regex that corresponds to the given pattern.
146 146 /// If within a `syntax: regexp` context, returns the pattern,
147 147 /// otherwise, returns the corresponding regex.
148 148 fn _build_single_regex(
149 149 syntax: PatternSyntax,
150 150 pattern: &[u8],
151 151 globsuffix: &[u8],
152 152 ) -> Vec<u8> {
153 153 if pattern.is_empty() {
154 154 return vec![];
155 155 }
156 156 match syntax {
157 157 PatternSyntax::Regexp => pattern.to_owned(),
158 158 PatternSyntax::RelRegexp => {
159 159 if pattern[0] == b'^' {
160 160 return pattern.to_owned();
161 161 }
162 let mut res = b".*".to_vec();
163 res.extend(pattern);
164 res
162 [b".*", pattern].concat()
165 163 }
166 164 PatternSyntax::Path | PatternSyntax::RelPath => {
167 165 if pattern == b"." {
168 166 return vec![];
169 167 }
170 let mut pattern = escape_pattern(pattern);
171 pattern.extend(b"(?:/|$)");
172 pattern
168 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
173 169 }
174 170 PatternSyntax::RootFiles => {
175 171 let mut res = if pattern == b"." {
176 172 vec![]
177 173 } else {
178 174 // Pattern is a directory name.
179 let mut as_vec: Vec<u8> = escape_pattern(pattern);
180 as_vec.push(b'/');
181 as_vec
175 [escape_pattern(pattern).as_slice(), b"/"].concat()
182 176 };
183 177
184 178 // Anything after the pattern must be a non-directory.
185 179 res.extend(b"[^/]+$");
186 180 res
187 181 }
188 182 PatternSyntax::RelGlob => {
189 let mut res: Vec<u8> = vec![];
190 183 let glob_re = glob_to_re(pattern);
191 184 if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
192 res.extend(b".*");
193 res.extend(rest);
185 [b".*", rest, globsuffix].concat()
194 186 } else {
195 res.extend(b"(?:|.*/)");
196 res.extend(glob_re);
187 [b"(?:|.*/)", glob_re.as_slice(), globsuffix].concat()
197 188 }
198 res.extend(globsuffix.iter());
199 res
200 189 }
201 190 PatternSyntax::Glob | PatternSyntax::RootGlob => {
202 let mut res: Vec<u8> = vec![];
203 res.extend(glob_to_re(pattern));
204 res.extend(globsuffix.iter());
205 res
191 [glob_to_re(pattern).as_slice(), globsuffix].concat()
206 192 }
207 193 }
208 194 }
209 195
210 196 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
211 197 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
212 198
213 199 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
214 200 /// that don't need to be transformed into a regex.
215 201 pub fn build_single_regex(
216 202 kind: &[u8],
217 203 pat: &[u8],
218 204 globsuffix: &[u8],
219 205 ) -> Result<Vec<u8>, PatternError> {
220 206 let enum_kind = parse_pattern_syntax(kind)?;
221 207 if enum_kind == PatternSyntax::RootGlob
222 208 && !pat.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
223 209 {
224 210 let mut escaped = escape_pattern(pat);
225 211 escaped.extend(b"(?:/|$)");
226 212 Ok(escaped)
227 213 } else {
228 214 Ok(_build_single_regex(enum_kind, pat, globsuffix))
229 215 }
230 216 }
231 217
232 218 lazy_static! {
233 219 static ref SYNTAXES: HashMap<&'static [u8], &'static [u8]> = {
234 220 let mut m = HashMap::new();
235 221
236 222 m.insert(b"re".as_ref(), b"relre:".as_ref());
237 223 m.insert(b"regexp".as_ref(), b"relre:".as_ref());
238 224 m.insert(b"glob".as_ref(), b"relglob:".as_ref());
239 225 m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
240 226 m.insert(b"include".as_ref(), b"include".as_ref());
241 227 m.insert(b"subinclude".as_ref(), b"subinclude".as_ref());
242 228 m
243 229 };
244 230 }
245 231
246 232 pub type PatternTuple = (Vec<u8>, LineNumber, Vec<u8>);
247 233 type WarningTuple = (Vec<u8>, Vec<u8>);
248 234
249 235 pub fn parse_pattern_file_contents(
250 236 lines: &[u8],
251 237 file_path: &[u8],
252 238 warn: bool,
253 239 ) -> (Vec<PatternTuple>, Vec<WarningTuple>) {
254 240 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
255 241 let comment_escape_regex = Regex::new(r"\\#").unwrap();
256 242 let mut inputs: Vec<PatternTuple> = vec![];
257 243 let mut warnings: Vec<WarningTuple> = vec![];
258 244
259 245 let mut current_syntax = b"relre:".as_ref();
260 246
261 247 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
262 248 let line_number = line_number + 1;
263 249
264 250 let line_buf;
265 251 if line.contains(&b'#') {
266 252 if let Some(cap) = comment_regex.captures(line) {
267 253 line = &line[..cap.get(1).unwrap().end()]
268 254 }
269 255 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
270 256 line = &line_buf;
271 257 }
272 258
273 259 let mut line = line.trim_end();
274 260
275 261 if line.is_empty() {
276 262 continue;
277 263 }
278 264
279 265 if let Some(syntax) = line.drop_prefix(b"syntax:") {
280 266 let syntax = syntax.trim();
281 267
282 268 if let Some(rel_syntax) = SYNTAXES.get(syntax) {
283 269 current_syntax = rel_syntax;
284 270 } else if warn {
285 271 warnings.push((file_path.to_owned(), syntax.to_owned()));
286 272 }
287 273 continue;
288 274 }
289 275
290 276 let mut line_syntax: &[u8] = &current_syntax;
291 277
292 278 for (s, rels) in SYNTAXES.iter() {
293 279 if let Some(rest) = line.drop_prefix(rels) {
294 280 line_syntax = rels;
295 281 line = rest;
296 282 break;
297 283 }
298 284 if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) {
299 285 line_syntax = rels;
300 286 line = rest;
301 287 break;
302 288 }
303 289 }
304 290
305 291 inputs.push((
306 292 [line_syntax, line].concat(),
307 293 line_number,
308 294 line.to_owned(),
309 295 ));
310 296 }
311 297 (inputs, warnings)
312 298 }
313 299
314 300 pub fn read_pattern_file(
315 301 file_path: &[u8],
316 302 warn: bool,
317 303 ) -> Result<(Vec<PatternTuple>, Vec<WarningTuple>), PatternFileError> {
318 304 let mut f = File::open(get_path_from_bytes(file_path))?;
319 305 let mut contents = Vec::new();
320 306
321 307 f.read_to_end(&mut contents)?;
322 308
323 309 Ok(parse_pattern_file_contents(&contents, file_path, warn))
324 310 }
325 311
326 312 #[cfg(test)]
327 313 mod tests {
328 314 use super::*;
329 315
330 316 #[test]
331 317 fn escape_pattern_test() {
332 318 let untouched = br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
333 319 assert_eq!(escape_pattern(untouched), untouched.to_vec());
334 320 // All escape codes
335 321 assert_eq!(
336 322 escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
337 323 br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
338 324 .to_vec()
339 325 );
340 326 }
341 327
342 328 #[test]
343 329 fn glob_test() {
344 330 assert_eq!(glob_to_re(br#"?"#), br#"."#);
345 331 assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
346 332 assert_eq!(glob_to_re(br#"**"#), br#".*"#);
347 333 assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
348 334 assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
349 335 assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
350 336 assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
351 337 assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
352 338 }
353 339
354 340 #[test]
355 341 fn test_parse_pattern_file_contents() {
356 342 let lines = b"syntax: glob\n*.elc";
357 343
358 344 assert_eq!(
359 345 vec![(b"relglob:*.elc".to_vec(), 2, b"*.elc".to_vec())],
360 346 parse_pattern_file_contents(lines, b"file_path", false).0,
361 347 );
362 348
363 349 let lines = b"syntax: include\nsyntax: glob";
364 350
365 351 assert_eq!(
366 352 parse_pattern_file_contents(lines, b"file_path", false).0,
367 353 vec![]
368 354 );
369 355 let lines = b"glob:**.o";
370 356 assert_eq!(
371 357 parse_pattern_file_contents(lines, b"file_path", false).0,
372 358 vec![(b"relglob:**.o".to_vec(), 1, b"**.o".to_vec())]
373 359 );
374 360 }
375 361
376 362 #[test]
377 363 fn test_build_single_regex_shortcut() {
378 364 assert_eq!(
379 365 br"(?:/|$)".to_vec(),
380 366 build_single_regex(b"rootglob", b"", b"").unwrap()
381 367 );
382 368 assert_eq!(
383 369 br"whatever(?:/|$)".to_vec(),
384 370 build_single_regex(b"rootglob", b"whatever", b"").unwrap()
385 371 );
386 372 assert_eq!(
387 373 br"[^/]*\.o".to_vec(),
388 374 build_single_regex(b"rootglob", b"*.o", b"").unwrap()
389 375 );
390 376 }
391 377 }
General Comments 0
You need to be logged in to leave comments. Login now