##// END OF EJS Templates
rust-docstrings: add missing module docstrings...
Raphaël Gomès -
r42978:0def0fcb default draft
parent child Browse files
Show More
@@ -1,373 +1,382 b''
1 // filepatterns.rs
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
7
8 //! Handling of Mercurial-specific patterns.
9
1 10 use crate::{
2 11 utils::{files::get_path_from_bytes, SliceExt},
3 12 LineNumber, PatternError, PatternFileError,
4 13 };
5 14 use lazy_static::lazy_static;
6 15 use regex::bytes::{NoExpand, Regex};
7 16 use std::collections::HashMap;
8 17 use std::fs::File;
9 18 use std::io::Read;
10 19 use std::vec::Vec;
11 20
12 21 lazy_static! {
13 22 static ref RE_ESCAPE: Vec<Vec<u8>> = {
14 23 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
15 24 let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
16 25 for byte in to_escape {
17 26 v[*byte as usize].insert(0, b'\\');
18 27 }
19 28 v
20 29 };
21 30 }
22 31
23 32 /// These are matched in order
24 33 const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
25 34 &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
26 35
27 36 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
28 37 pub enum PatternSyntax {
29 38 Regexp,
30 39 /// Glob that matches at the front of the path
31 40 RootGlob,
32 41 /// Glob that matches at any suffix of the path (still anchored at slashes)
33 42 Glob,
34 43 Path,
35 44 RelPath,
36 45 RelGlob,
37 46 RelRegexp,
38 47 RootFiles,
39 48 }
40 49
41 50 /// Transforms a glob pattern into a regex
42 51 fn glob_to_re(pat: &[u8]) -> Vec<u8> {
43 52 let mut input = pat;
44 53 let mut res: Vec<u8> = vec![];
45 54 let mut group_depth = 0;
46 55
47 56 while let Some((c, rest)) = input.split_first() {
48 57 input = rest;
49 58
50 59 match c {
51 60 b'*' => {
52 61 for (source, repl) in GLOB_REPLACEMENTS {
53 62 if input.starts_with(source) {
54 63 input = &input[source.len()..];
55 64 res.extend(*repl);
56 65 break;
57 66 }
58 67 }
59 68 }
60 69 b'?' => res.extend(b"."),
61 70 b'[' => {
62 71 match input.iter().skip(1).position(|b| *b == b']') {
63 72 None => res.extend(b"\\["),
64 73 Some(end) => {
65 74 // Account for the one we skipped
66 75 let end = end + 1;
67 76
68 77 res.extend(b"[");
69 78
70 79 for (i, b) in input[..end].iter().enumerate() {
71 80 if *b == b'!' && i == 0 {
72 81 res.extend(b"^")
73 82 } else if *b == b'^' && i == 0 {
74 83 res.extend(b"\\^")
75 84 } else if *b == b'\\' {
76 85 res.extend(b"\\\\")
77 86 } else {
78 87 res.push(*b)
79 88 }
80 89 }
81 90 res.extend(b"]");
82 91 input = &input[end + 1..];
83 92 }
84 93 }
85 94 }
86 95 b'{' => {
87 96 group_depth += 1;
88 97 res.extend(b"(?:")
89 98 }
90 99 b'}' if group_depth > 0 => {
91 100 group_depth -= 1;
92 101 res.extend(b")");
93 102 }
94 103 b',' if group_depth > 0 => res.extend(b"|"),
95 104 b'\\' => {
96 105 let c = {
97 106 if let Some((c, rest)) = input.split_first() {
98 107 input = rest;
99 108 c
100 109 } else {
101 110 c
102 111 }
103 112 };
104 113 res.extend(&RE_ESCAPE[*c as usize])
105 114 }
106 115 _ => res.extend(&RE_ESCAPE[*c as usize]),
107 116 }
108 117 }
109 118 res
110 119 }
111 120
112 121 fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
113 122 pattern
114 123 .iter()
115 124 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
116 125 .collect()
117 126 }
118 127
119 128 fn parse_pattern_syntax(kind: &[u8]) -> Result<PatternSyntax, PatternError> {
120 129 match kind {
121 130 b"re" => Ok(PatternSyntax::Regexp),
122 131 b"path" => Ok(PatternSyntax::Path),
123 132 b"relpath" => Ok(PatternSyntax::RelPath),
124 133 b"rootfilesin" => Ok(PatternSyntax::RootFiles),
125 134 b"relglob" => Ok(PatternSyntax::RelGlob),
126 135 b"relre" => Ok(PatternSyntax::RelRegexp),
127 136 b"glob" => Ok(PatternSyntax::Glob),
128 137 b"rootglob" => Ok(PatternSyntax::RootGlob),
129 138 _ => Err(PatternError::UnsupportedSyntax(
130 139 String::from_utf8_lossy(kind).to_string(),
131 140 )),
132 141 }
133 142 }
134 143
135 144 /// Builds the regex that corresponds to the given pattern.
136 145 /// If within a `syntax: regexp` context, returns the pattern,
137 146 /// otherwise, returns the corresponding regex.
138 147 fn _build_single_regex(
139 148 syntax: PatternSyntax,
140 149 pattern: &[u8],
141 150 globsuffix: &[u8],
142 151 ) -> Vec<u8> {
143 152 if pattern.is_empty() {
144 153 return vec![];
145 154 }
146 155 match syntax {
147 156 PatternSyntax::Regexp => pattern.to_owned(),
148 157 PatternSyntax::RelRegexp => {
149 158 if pattern[0] == b'^' {
150 159 return pattern.to_owned();
151 160 }
152 161 let mut res = b".*".to_vec();
153 162 res.extend(pattern);
154 163 res
155 164 }
156 165 PatternSyntax::Path | PatternSyntax::RelPath => {
157 166 if pattern == b"." {
158 167 return vec![];
159 168 }
160 169 let mut pattern = escape_pattern(pattern);
161 170 pattern.extend(b"(?:/|$)");
162 171 pattern
163 172 }
164 173 PatternSyntax::RootFiles => {
165 174 let mut res = if pattern == b"." {
166 175 vec![]
167 176 } else {
168 177 // Pattern is a directory name.
169 178 let mut as_vec: Vec<u8> = escape_pattern(pattern);
170 179 as_vec.push(b'/');
171 180 as_vec
172 181 };
173 182
174 183 // Anything after the pattern must be a non-directory.
175 184 res.extend(b"[^/]+$");
176 185 res
177 186 }
178 187 PatternSyntax::Glob
179 188 | PatternSyntax::RelGlob
180 189 | PatternSyntax::RootGlob => {
181 190 let mut res: Vec<u8> = vec![];
182 191 if syntax == PatternSyntax::RelGlob {
183 192 res.extend(b"(?:|.*/)");
184 193 }
185 194
186 195 res.extend(glob_to_re(pattern));
187 196 res.extend(globsuffix.iter());
188 197 res
189 198 }
190 199 }
191 200 }
192 201
193 202 const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
194 203 [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
195 204
196 205 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
197 206 /// that don't need to be transformed into a regex.
198 207 pub fn build_single_regex(
199 208 kind: &[u8],
200 209 pat: &[u8],
201 210 globsuffix: &[u8],
202 211 ) -> Result<Vec<u8>, PatternError> {
203 212 let enum_kind = parse_pattern_syntax(kind)?;
204 213 if enum_kind == PatternSyntax::RootGlob
205 214 && !pat.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
206 215 {
207 216 let mut escaped = escape_pattern(pat);
208 217 escaped.extend(b"(?:/|$)");
209 218 Ok(escaped)
210 219 } else {
211 220 Ok(_build_single_regex(enum_kind, pat, globsuffix))
212 221 }
213 222 }
214 223
215 224 lazy_static! {
216 225 static ref SYNTAXES: HashMap<&'static [u8], &'static [u8]> = {
217 226 let mut m = HashMap::new();
218 227
219 228 m.insert(b"re".as_ref(), b"relre:".as_ref());
220 229 m.insert(b"regexp".as_ref(), b"relre:".as_ref());
221 230 m.insert(b"glob".as_ref(), b"relglob:".as_ref());
222 231 m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
223 232 m.insert(b"include".as_ref(), b"include".as_ref());
224 233 m.insert(b"subinclude".as_ref(), b"subinclude".as_ref());
225 234 m
226 235 };
227 236 }
228 237
229 238 pub type PatternTuple = (Vec<u8>, LineNumber, Vec<u8>);
230 239 type WarningTuple = (Vec<u8>, Vec<u8>);
231 240
232 241 pub fn parse_pattern_file_contents(
233 242 lines: &[u8],
234 243 file_path: &[u8],
235 244 warn: bool,
236 245 ) -> (Vec<PatternTuple>, Vec<WarningTuple>) {
237 246 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
238 247 let comment_escape_regex = Regex::new(r"\\#").unwrap();
239 248 let mut inputs: Vec<PatternTuple> = vec![];
240 249 let mut warnings: Vec<WarningTuple> = vec![];
241 250
242 251 let mut current_syntax = b"relre:".as_ref();
243 252
244 253 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
245 254 let line_number = line_number + 1;
246 255
247 256 let line_buf;
248 257 if line.contains(&b'#') {
249 258 if let Some(cap) = comment_regex.captures(line) {
250 259 line = &line[..cap.get(1).unwrap().end()]
251 260 }
252 261 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
253 262 line = &line_buf;
254 263 }
255 264
256 265 let mut line = line.trim_end();
257 266
258 267 if line.is_empty() {
259 268 continue;
260 269 }
261 270
262 271 if line.starts_with(b"syntax:") {
263 272 let syntax = line[b"syntax:".len()..].trim();
264 273
265 274 if let Some(rel_syntax) = SYNTAXES.get(syntax) {
266 275 current_syntax = rel_syntax;
267 276 } else if warn {
268 277 warnings.push((file_path.to_owned(), syntax.to_owned()));
269 278 }
270 279 continue;
271 280 }
272 281
273 282 let mut line_syntax: &[u8] = &current_syntax;
274 283
275 284 for (s, rels) in SYNTAXES.iter() {
276 285 if line.starts_with(rels) {
277 286 line_syntax = rels;
278 287 line = &line[rels.len()..];
279 288 break;
280 289 } else if line.starts_with(&[s, b":".as_ref()].concat()) {
281 290 line_syntax = rels;
282 291 line = &line[s.len() + 1..];
283 292 break;
284 293 }
285 294 }
286 295
287 296 inputs.push((
288 297 [line_syntax, line].concat(),
289 298 line_number,
290 299 line.to_owned(),
291 300 ));
292 301 }
293 302 (inputs, warnings)
294 303 }
295 304
296 305 pub fn read_pattern_file(
297 306 file_path: &[u8],
298 307 warn: bool,
299 308 ) -> Result<(Vec<PatternTuple>, Vec<WarningTuple>), PatternFileError> {
300 309 let mut f = File::open(get_path_from_bytes(file_path))?;
301 310 let mut contents = Vec::new();
302 311
303 312 f.read_to_end(&mut contents)?;
304 313
305 314 Ok(parse_pattern_file_contents(&contents, file_path, warn))
306 315 }
307 316
308 317 #[cfg(test)]
309 318 mod tests {
310 319 use super::*;
311 320
312 321 #[test]
313 322 fn escape_pattern_test() {
314 323 let untouched = br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
315 324 assert_eq!(escape_pattern(untouched), untouched.to_vec());
316 325 // All escape codes
317 326 assert_eq!(
318 327 escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
319 328 br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
320 329 .to_vec()
321 330 );
322 331 }
323 332
324 333 #[test]
325 334 fn glob_test() {
326 335 assert_eq!(glob_to_re(br#"?"#), br#"."#);
327 336 assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
328 337 assert_eq!(glob_to_re(br#"**"#), br#".*"#);
329 338 assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
330 339 assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
331 340 assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
332 341 assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
333 342 assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
334 343 }
335 344
336 345 #[test]
337 346 fn test_parse_pattern_file_contents() {
338 347 let lines = b"syntax: glob\n*.elc";
339 348
340 349 assert_eq!(
341 350 vec![(b"relglob:*.elc".to_vec(), 2, b"*.elc".to_vec())],
342 351 parse_pattern_file_contents(lines, b"file_path", false).0,
343 352 );
344 353
345 354 let lines = b"syntax: include\nsyntax: glob";
346 355
347 356 assert_eq!(
348 357 parse_pattern_file_contents(lines, b"file_path", false).0,
349 358 vec![]
350 359 );
351 360 let lines = b"glob:**.o";
352 361 assert_eq!(
353 362 parse_pattern_file_contents(lines, b"file_path", false).0,
354 363 vec![(b"relglob:**.o".to_vec(), 1, b"**.o".to_vec())]
355 364 );
356 365 }
357 366
358 367 #[test]
359 368 fn test_build_single_regex_shortcut() {
360 369 assert_eq!(
361 370 br"(?:/|$)".to_vec(),
362 371 build_single_regex(b"rootglob", b"", b"").unwrap()
363 372 );
364 373 assert_eq!(
365 374 br"whatever(?:/|$)".to_vec(),
366 375 build_single_regex(b"rootglob", b"whatever", b"").unwrap()
367 376 );
368 377 assert_eq!(
369 378 br"[^/]*\.o".to_vec(),
370 379 build_single_regex(b"rootglob", b"*.o", b"").unwrap()
371 380 );
372 381 }
373 382 }
@@ -1,92 +1,101 b''
1 // utils module
2 //
3 // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
4 //
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
7
8 //! Contains useful functions, traits, structs, etc. for use in core.
9
1 10 pub mod files;
2 11
3 12 use std::convert::AsMut;
4 13
5 14 /// Takes a slice and copies it into an array.
6 15 ///
7 16 /// # Panics
8 17 ///
9 18 /// Will panic if the slice and target array don't have the same length.
10 19 pub fn copy_into_array<A, T>(slice: &[T]) -> A
11 20 where
12 21 A: Sized + Default + AsMut<[T]>,
13 22 T: Copy,
14 23 {
15 24 let mut a = Default::default();
16 25 <A as AsMut<[T]>>::as_mut(&mut a).copy_from_slice(slice);
17 26 a
18 27 }
19 28
20 29 /// Replaces the `from` slice with the `to` slice inside the `buf` slice.
21 30 ///
22 31 /// # Examples
23 32 ///
24 33 /// ```
25 34 /// use crate::hg::utils::replace_slice;
26 35 /// let mut line = b"I hate writing tests!".to_vec();
27 36 /// replace_slice(&mut line, b"hate", b"love");
28 37 /// assert_eq!(
29 38 /// line,
30 39 /// b"I love writing tests!".to_vec()
31 40 ///);
32 41 ///
33 42 /// ```
34 43 pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T])
35 44 where
36 45 T: Clone + PartialEq,
37 46 {
38 47 if buf.len() < from.len() || from.len() != to.len() {
39 48 return;
40 49 }
41 50 for i in 0..=buf.len() - from.len() {
42 51 if buf[i..].starts_with(from) {
43 52 buf[i..(i + from.len())].clone_from_slice(to);
44 53 }
45 54 }
46 55 }
47 56
48 57 pub trait SliceExt {
49 58 fn trim_end(&self) -> &Self;
50 59 fn trim_start(&self) -> &Self;
51 60 fn trim(&self) -> &Self;
52 61 }
53 62
54 63 fn is_not_whitespace(c: &u8) -> bool {
55 64 !(*c as char).is_whitespace()
56 65 }
57 66
58 67 impl SliceExt for [u8] {
59 68 fn trim_end(&self) -> &[u8] {
60 69 if let Some(last) = self.iter().rposition(is_not_whitespace) {
61 70 &self[..last + 1]
62 71 } else {
63 72 &[]
64 73 }
65 74 }
66 75 fn trim_start(&self) -> &[u8] {
67 76 if let Some(first) = self.iter().position(is_not_whitespace) {
68 77 &self[first..]
69 78 } else {
70 79 &[]
71 80 }
72 81 }
73 82
74 83 /// ```
75 84 /// use hg::utils::SliceExt;
76 85 /// assert_eq!(
77 86 /// b" to trim ".trim(),
78 87 /// b"to trim"
79 88 /// );
80 89 /// assert_eq!(
81 90 /// b"to trim ".trim(),
82 91 /// b"to trim"
83 92 /// );
84 93 /// assert_eq!(
85 94 /// b" to trim".trim(),
86 95 /// b"to trim"
87 96 /// );
88 97 /// ```
89 98 fn trim(&self) -> &[u8] {
90 99 self.trim_start().trim_end()
91 100 }
92 101 }
@@ -1,83 +1,94 b''
1 // files.rs
2 //
3 // Copyright 2019
4 // Raphaël Gomès <rgomes@octobus.net>,
5 // Yuya Nishihara <yuya@tcha.org>
6 //
7 // This software may be used and distributed according to the terms of the
8 // GNU General Public License version 2 or any later version.
9
10 //! Functions for fiddling with files.
11
1 12 use std::iter::FusedIterator;
2 13 use std::path::Path;
3 14
4 15 pub fn get_path_from_bytes(bytes: &[u8]) -> &Path {
5 16 let os_str;
6 17 #[cfg(unix)]
7 18 {
8 19 use std::os::unix::ffi::OsStrExt;
9 20 os_str = std::ffi::OsStr::from_bytes(bytes);
10 21 }
11 22 #[cfg(windows)]
12 23 {
13 24 // TODO: convert from Windows MBCS (ANSI encoding) to WTF8.
14 25 // Perhaps, the return type would have to be Result<PathBuf>.
15 26 use std::os::windows::ffi::OsStrExt;
16 27 os_str = std::ffi::OsString::from_wide(bytes);
17 28 }
18 29
19 30 Path::new(os_str)
20 31 }
21 32
22 33 /// An iterator over repository path yielding itself and its ancestors.
23 34 #[derive(Copy, Clone, Debug)]
24 35 pub struct Ancestors<'a> {
25 36 next: Option<&'a [u8]>,
26 37 }
27 38
28 39 impl<'a> Iterator for Ancestors<'a> {
29 40 // if we had an HgPath type, this would yield &'a HgPath
30 41 type Item = &'a [u8];
31 42
32 43 fn next(&mut self) -> Option<Self::Item> {
33 44 let next = self.next;
34 45 self.next = match self.next {
35 46 Some(s) if s.is_empty() => None,
36 47 Some(s) => {
37 48 let p = s.iter().rposition(|&c| c == b'/').unwrap_or(0);
38 49 Some(&s[..p])
39 50 }
40 51 None => None,
41 52 };
42 53 next
43 54 }
44 55 }
45 56
46 57 impl<'a> FusedIterator for Ancestors<'a> {}
47 58
48 59 /// Returns an iterator yielding ancestor directories of the given repository
49 60 /// path.
50 61 ///
51 62 /// The path is separated by '/', and must not start with '/'.
52 63 ///
53 64 /// The path itself isn't included unless it is b"" (meaning the root
54 65 /// directory.)
55 66 pub fn find_dirs<'a>(path: &'a [u8]) -> Ancestors<'a> {
56 67 let mut dirs = Ancestors { next: Some(path) };
57 68 if !path.is_empty() {
58 69 dirs.next(); // skip itself
59 70 }
60 71 dirs
61 72 }
62 73
63 74 #[cfg(test)]
64 75 mod tests {
65 76 #[test]
66 77 fn find_dirs_some() {
67 78 let mut dirs = super::find_dirs(b"foo/bar/baz");
68 79 assert_eq!(dirs.next(), Some(b"foo/bar".as_ref()));
69 80 assert_eq!(dirs.next(), Some(b"foo".as_ref()));
70 81 assert_eq!(dirs.next(), Some(b"".as_ref()));
71 82 assert_eq!(dirs.next(), None);
72 83 assert_eq!(dirs.next(), None);
73 84 }
74 85
75 86 #[test]
76 87 fn find_dirs_empty() {
77 88 // looks weird, but mercurial.util.finddirs(b"") yields b""
78 89 let mut dirs = super::find_dirs(b"");
79 90 assert_eq!(dirs.next(), Some(b"".as_ref()));
80 91 assert_eq!(dirs.next(), None);
81 92 assert_eq!(dirs.next(), None);
82 93 }
83 94 }
General Comments 0
You need to be logged in to leave comments. Login now