##// END OF EJS Templates
rust-filepatterns: silence warning of non_upper_case_globals
Yuya Nishihara -
r42683:a4a468b0 default
parent child Browse files
Show More
@@ -1,372 +1,372 b''
1 1 use crate::{LineNumber, PatternError, PatternFileError};
2 2 use regex::bytes::Regex;
3 3 use std::collections::HashMap;
4 4 use std::fs::File;
5 5 use std::io::Read;
6 6 use std::vec::Vec;
7 7 use utils::files::get_path_from_bytes;
8 8 use utils::{replace_slice, SliceExt};
9 9
10 10 lazy_static! {
11 static ref reescape: Vec<Vec<u8>> = {
11 static ref RE_ESCAPE: Vec<Vec<u8>> = {
12 12 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
13 13 let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
14 14 for byte in to_escape {
15 15 v[*byte as usize].insert(0, b'\\');
16 16 }
17 17 v
18 18 };
19 19 }
20 20
21 21 /// These are matched in order
22 22 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
23 23 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
24 24
25 25 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
26 26 pub enum PatternSyntax {
27 27 Regexp,
28 28 /// Glob that matches at the front of the path
29 29 RootGlob,
30 30 /// Glob that matches at any suffix of the path (still anchored at slashes)
31 31 Glob,
32 32 Path,
33 33 RelPath,
34 34 RelGlob,
35 35 RelRegexp,
36 36 RootFiles,
37 37 }
38 38
39 39 /// Transforms a glob pattern into a regex
40 40 fn glob_to_re(pat: &[u8]) -> Vec<u8> {
41 41 let mut input = pat;
42 42 let mut res: Vec<u8> = vec![];
43 43 let mut group_depth = 0;
44 44
45 45 while let Some((c, rest)) = input.split_first() {
46 46 input = rest;
47 47
48 48 match c {
49 49 b'*' => {
50 50 for (source, repl) in GLOB_REPLACEMENTS {
51 51 if input.starts_with(source) {
52 52 input = &input[source.len()..];
53 53 res.extend(*repl);
54 54 break;
55 55 }
56 56 }
57 57 }
58 58 b'?' => res.extend(b"."),
59 59 b'[' => {
60 60 match input.iter().skip(1).position(|b| *b == b']') {
61 61 None => res.extend(b"\\["),
62 62 Some(end) => {
63 63 // Account for the one we skipped
64 64 let end = end + 1;
65 65
66 66 res.extend(b"[");
67 67
68 68 for (i, b) in input[..end].iter().enumerate() {
69 69 if *b == b'!' && i == 0 {
70 70 res.extend(b"^")
71 71 } else if *b == b'^' && i == 0 {
72 72 res.extend(b"\\^")
73 73 } else if *b == b'\\' {
74 74 res.extend(b"\\\\")
75 75 } else {
76 76 res.push(*b)
77 77 }
78 78 }
79 79 res.extend(b"]");
80 80 input = &input[end + 1..];
81 81 }
82 82 }
83 83 }
84 84 b'{' => {
85 85 group_depth += 1;
86 86 res.extend(b"(?:")
87 87 }
88 88 b'}' if group_depth > 0 => {
89 89 group_depth -= 1;
90 90 res.extend(b")");
91 91 }
92 92 b',' if group_depth > 0 => res.extend(b"|"),
93 93 b'\\' => {
94 94 let c = {
95 95 if let Some((c, rest)) = input.split_first() {
96 96 input = rest;
97 97 c
98 98 } else {
99 99 c
100 100 }
101 101 };
102 res.extend(&reescape[*c as usize])
102 res.extend(&RE_ESCAPE[*c as usize])
103 103 }
104 _ => res.extend(&reescape[*c as usize]),
104 _ => res.extend(&RE_ESCAPE[*c as usize]),
105 105 }
106 106 }
107 107 res
108 108 }
109 109
110 110 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
111 111 pattern
112 112 .iter()
113 .flat_map(|c| reescape[*c as usize].clone())
113 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
114 114 .collect()
115 115 }
116 116
117 117 fn parse_pattern_syntax(kind: &[u8]) -> Result<PatternSyntax, PatternError> {
118 118 match kind {
119 119 b"re" => Ok(PatternSyntax::Regexp),
120 120 b"path" => Ok(PatternSyntax::Path),
121 121 b"relpath" => Ok(PatternSyntax::RelPath),
122 122 b"rootfilesin" => Ok(PatternSyntax::RootFiles),
123 123 b"relglob" => Ok(PatternSyntax::RelGlob),
124 124 b"relre" => Ok(PatternSyntax::RelRegexp),
125 125 b"glob" => Ok(PatternSyntax::Glob),
126 126 b"rootglob" => Ok(PatternSyntax::RootGlob),
127 127 _ => Err(PatternError::UnsupportedSyntax(
128 128 String::from_utf8_lossy(kind).to_string(),
129 129 )),
130 130 }
131 131 }
132 132
133 133 /// Builds the regex that corresponds to the given pattern.
134 134 /// If within a `syntax: regexp` context, returns the pattern,
135 135 /// otherwise, returns the corresponding regex.
136 136 fn _build_single_regex(
137 137 syntax: PatternSyntax,
138 138 pattern: &[u8],
139 139 globsuffix: &[u8],
140 140 ) -> Vec<u8> {
141 141 if pattern.is_empty() {
142 142 return vec![];
143 143 }
144 144 match syntax {
145 145 PatternSyntax::Regexp => pattern.to_owned(),
146 146 PatternSyntax::RelRegexp => {
147 147 if pattern[0] == b'^' {
148 148 return pattern.to_owned();
149 149 }
150 150 let mut res = b".*".to_vec();
151 151 res.extend(pattern);
152 152 res
153 153 }
154 154 PatternSyntax::Path | PatternSyntax::RelPath => {
155 155 if pattern == b"." {
156 156 return vec![];
157 157 }
158 158 let mut pattern = escape_pattern(pattern);
159 159 pattern.extend(b"(?:/|$)");
160 160 pattern
161 161 }
162 162 PatternSyntax::RootFiles => {
163 163 let mut res = if pattern == b"." {
164 164 vec![]
165 165 } else {
166 166 // Pattern is a directory name.
167 167 let mut as_vec: Vec<u8> = escape_pattern(pattern);
168 168 as_vec.push(b'/');
169 169 as_vec
170 170 };
171 171
172 172 // Anything after the pattern must be a non-directory.
173 173 res.extend(b"[^/]+$");
174 174 res
175 175 }
176 176 PatternSyntax::Glob
177 177 | PatternSyntax::RelGlob
178 178 | PatternSyntax::RootGlob => {
179 179 let mut res: Vec<u8> = vec![];
180 180 if syntax == PatternSyntax::RelGlob {
181 181 res.extend(b"(?:|.*/)");
182 182 }
183 183
184 184 res.extend(glob_to_re(pattern));
185 185 res.extend(globsuffix.iter());
186 186 res
187 187 }
188 188 }
189 189 }
190 190
191 191 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
192 192 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
193 193
194 194 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
195 195 /// that don't need to be transformed into a regex.
196 196 pub fn build_single_regex(
197 197 kind: &[u8],
198 198 pat: &[u8],
199 199 globsuffix: &[u8],
200 200 ) -> Result<Vec<u8>, PatternError> {
201 201 let enum_kind = parse_pattern_syntax(kind)?;
202 202 if enum_kind == PatternSyntax::RootGlob
203 203 && !pat.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
204 204 {
205 205 let mut escaped = escape_pattern(pat);
206 206 escaped.extend(b"(?:/|$)");
207 207 Ok(escaped)
208 208 } else {
209 209 Ok(_build_single_regex(enum_kind, pat, globsuffix))
210 210 }
211 211 }
212 212
213 213 lazy_static! {
214 214 static ref SYNTAXES: HashMap<&'static [u8], &'static [u8]> = {
215 215 let mut m = HashMap::new();
216 216
217 217 m.insert(b"re".as_ref(), b"relre:".as_ref());
218 218 m.insert(b"regexp".as_ref(), b"relre:".as_ref());
219 219 m.insert(b"glob".as_ref(), b"relglob:".as_ref());
220 220 m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
221 221 m.insert(b"include".as_ref(), b"include".as_ref());
222 222 m.insert(b"subinclude".as_ref(), b"subinclude".as_ref());
223 223 m
224 224 };
225 225 }
226 226
227 227 pub type PatternTuple = (Vec<u8>, LineNumber, Vec<u8>);
228 228 type WarningTuple = (String, String);
229 229
230 230 pub fn parse_pattern_file_contents(
231 231 lines: &[u8],
232 232 file_path: &[u8],
233 233 warn: bool,
234 234 ) -> (Vec<PatternTuple>, Vec<WarningTuple>) {
235 235 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
236 236 let mut inputs: Vec<PatternTuple> = vec![];
237 237 let mut warnings: Vec<WarningTuple> = vec![];
238 238
239 239 let mut current_syntax = b"relre:".as_ref();
240 240
241 241 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
242 242 let line_number = line_number + 1;
243 243
244 244 if line.contains(&('#' as u8)) {
245 245 if let Some(cap) = comment_regex.captures(line) {
246 246 line = &line[..cap.get(1).unwrap().end()]
247 247 }
248 248 let mut line = line.to_owned();
249 249 replace_slice(&mut line, br"\#", b"#");
250 250 }
251 251
252 252 let mut line = line.trim_end();
253 253
254 254 if line.is_empty() {
255 255 continue;
256 256 }
257 257
258 258 if line.starts_with(b"syntax:") {
259 259 let syntax = line[b"syntax:".len()..].trim();
260 260
261 261 if let Some(rel_syntax) = SYNTAXES.get(syntax) {
262 262 current_syntax = rel_syntax;
263 263 } else if warn {
264 264 warnings.push((
265 265 String::from_utf8_lossy(file_path).to_string(),
266 266 String::from_utf8_lossy(syntax).to_string(),
267 267 ));
268 268 }
269 269 continue;
270 270 }
271 271
272 272 let mut line_syntax: &[u8] = &current_syntax;
273 273
274 274 for (s, rels) in SYNTAXES.iter() {
275 275 if line.starts_with(rels) {
276 276 line_syntax = rels;
277 277 line = &line[rels.len()..];
278 278 break;
279 279 } else if line.starts_with(&[s, b":".as_ref()].concat()) {
280 280 line_syntax = rels;
281 281 line = &line[s.len() + 1..];
282 282 break;
283 283 }
284 284 }
285 285
286 286 inputs.push((
287 287 [line_syntax, line].concat(),
288 288 line_number,
289 289 line.to_owned(),
290 290 ));
291 291 }
292 292 (inputs, warnings)
293 293 }
294 294
295 295 pub fn read_pattern_file(
296 296 file_path: &[u8],
297 297 warn: bool,
298 298 ) -> Result<(Vec<PatternTuple>, Vec<WarningTuple>), PatternFileError> {
299 299 let mut f = File::open(get_path_from_bytes(file_path))?;
300 300 let mut contents = Vec::new();
301 301
302 302 f.read_to_end(&mut contents)?;
303 303
304 304 Ok(parse_pattern_file_contents(&contents, file_path, warn))
305 305 }
306 306
307 307 #[cfg(test)]
308 308 mod tests {
309 309 use super::*;
310 310
311 311 #[test]
312 312 fn escape_pattern_test() {
313 313 let untouched = br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
314 314 assert_eq!(escape_pattern(untouched), untouched.to_vec());
315 315 // All escape codes
316 316 assert_eq!(
317 317 escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
318 318 br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
319 319 .to_vec()
320 320 );
321 321 }
322 322
323 323 #[test]
324 324 fn glob_test() {
325 325 assert_eq!(glob_to_re(br#"?"#), br#"."#);
326 326 assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
327 327 assert_eq!(glob_to_re(br#"**"#), br#".*"#);
328 328 assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
329 329 assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
330 330 assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
331 331 assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
332 332 assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
333 333 }
334 334
335 335 #[test]
336 336 fn test_parse_pattern_file_contents() {
337 337 let lines = b"syntax: glob\n*.elc";
338 338
339 339 assert_eq!(
340 340 vec![(b"relglob:*.elc".to_vec(), 2, b"*.elc".to_vec())],
341 341 parse_pattern_file_contents(lines, b"file_path", false).0,
342 342 );
343 343
344 344 let lines = b"syntax: include\nsyntax: glob";
345 345
346 346 assert_eq!(
347 347 parse_pattern_file_contents(lines, b"file_path", false).0,
348 348 vec![]
349 349 );
350 350 let lines = b"glob:**.o";
351 351 assert_eq!(
352 352 parse_pattern_file_contents(lines, b"file_path", false).0,
353 353 vec![(b"relglob:**.o".to_vec(), 1, b"**.o".to_vec())]
354 354 );
355 355 }
356 356
357 357 #[test]
358 358 fn test_build_single_regex_shortcut() {
359 359 assert_eq!(
360 360 br"(?:/|$)".to_vec(),
361 361 build_single_regex(b"rootglob", b"", b"").unwrap()
362 362 );
363 363 assert_eq!(
364 364 br"whatever(?:/|$)".to_vec(),
365 365 build_single_regex(b"rootglob", b"whatever", b"").unwrap()
366 366 );
367 367 assert_eq!(
368 368 br"[^/]*\.o".to_vec(),
369 369 build_single_regex(b"rootglob", b"*.o", b"").unwrap()
370 370 );
371 371 }
372 372 }
General Comments 0
You need to be logged in to leave comments. Login now