##// END OF EJS Templates
rust-utils: add util for canonical path...
rust-utils: add util for canonical path Differential Revision: https://phab.mercurial-scm.org/D7871

File last commit:

r44278:5ac243a9 default
r44783:4caac36c default
Show More
filepatterns.rs
380 lines | 11.6 KiB | application/rls-services+xml | RustLexer
Raphaël Gomès
rust-docstrings: add missing module docstrings...
r42996 // filepatterns.rs
//
// Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
//! Handling of Mercurial-specific patterns.
Raphaël Gomès
rust-performance: introduce FastHashMap type alias for HashMap...
r44278 use crate::{
utils::SliceExt, FastHashMap, LineNumber, PatternError, PatternFileError,
};
Raphaël Gomès
rust: switch hg-core and hg-cpython to rust 2018 edition...
r42828 use lazy_static::lazy_static;
Yuya Nishihara
rust-filepatterns: unescape comment character property...
r42859 use regex::bytes::{NoExpand, Regex};
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 use std::fs::File;
use std::io::Read;
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 use std::path::{Path, PathBuf};
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 use std::vec::Vec;
lazy_static! {
Yuya Nishihara
rust-filepatterns: silence warning of non_upper_case_globals
r42683 static ref RE_ESCAPE: Vec<Vec<u8>> = {
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
for byte in to_escape {
v[*byte as usize].insert(0, b'\\');
}
v
};
}
/// These are matched in order
const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
&[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum PatternSyntax {
Regexp,
/// Glob that matches at the front of the path
RootGlob,
Yuya Nishihara
rust: apply more formatting fixes...
r43109 /// Glob that matches at any suffix of the path (still anchored at
/// slashes)
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 Glob,
Path,
RelPath,
RelGlob,
RelRegexp,
RootFiles,
}
/// Transforms a glob pattern into a regex
fn glob_to_re(pat: &[u8]) -> Vec<u8> {
let mut input = pat;
let mut res: Vec<u8> = vec![];
let mut group_depth = 0;
while let Some((c, rest)) = input.split_first() {
input = rest;
match c {
b'*' => {
for (source, repl) in GLOB_REPLACEMENTS {
Valentin Gatien-Baron
rustfilepatterns: refactor the pattern of removing a prefix from a &[u8]...
r43129 if let Some(rest) = input.drop_prefix(source) {
input = rest;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 res.extend(*repl);
break;
}
}
}
b'?' => res.extend(b"."),
b'[' => {
match input.iter().skip(1).position(|b| *b == b']') {
None => res.extend(b"\\["),
Some(end) => {
// Account for the one we skipped
let end = end + 1;
res.extend(b"[");
for (i, b) in input[..end].iter().enumerate() {
if *b == b'!' && i == 0 {
res.extend(b"^")
} else if *b == b'^' && i == 0 {
res.extend(b"\\^")
} else if *b == b'\\' {
res.extend(b"\\\\")
} else {
res.push(*b)
}
}
res.extend(b"]");
input = &input[end + 1..];
}
}
}
b'{' => {
group_depth += 1;
res.extend(b"(?:")
}
b'}' if group_depth > 0 => {
group_depth -= 1;
res.extend(b")");
}
b',' if group_depth > 0 => res.extend(b"|"),
b'\\' => {
let c = {
if let Some((c, rest)) = input.split_first() {
input = rest;
c
} else {
c
}
};
Yuya Nishihara
rust-filepatterns: silence warning of non_upper_case_globals
r42683 res.extend(&RE_ESCAPE[*c as usize])
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Yuya Nishihara
rust-filepatterns: silence warning of non_upper_case_globals
r42683 _ => res.extend(&RE_ESCAPE[*c as usize]),
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
}
res
}
fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
pattern
.iter()
Yuya Nishihara
rust-filepatterns: silence warning of non_upper_case_globals
r42683 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 .collect()
}
fn parse_pattern_syntax(kind: &[u8]) -> Result<PatternSyntax, PatternError> {
match kind {
b"re" => Ok(PatternSyntax::Regexp),
b"path" => Ok(PatternSyntax::Path),
b"relpath" => Ok(PatternSyntax::RelPath),
b"rootfilesin" => Ok(PatternSyntax::RootFiles),
b"relglob" => Ok(PatternSyntax::RelGlob),
b"relre" => Ok(PatternSyntax::RelRegexp),
b"glob" => Ok(PatternSyntax::Glob),
b"rootglob" => Ok(PatternSyntax::RootGlob),
_ => Err(PatternError::UnsupportedSyntax(
String::from_utf8_lossy(kind).to_string(),
)),
}
}
/// Builds the regex that corresponds to the given pattern.
/// If within a `syntax: regexp` context, returns the pattern,
/// otherwise, returns the corresponding regex.
fn _build_single_regex(
syntax: PatternSyntax,
pattern: &[u8],
globsuffix: &[u8],
) -> Vec<u8> {
if pattern.is_empty() {
return vec![];
}
match syntax {
PatternSyntax::Regexp => pattern.to_owned(),
PatternSyntax::RelRegexp => {
if pattern[0] == b'^' {
return pattern.to_owned();
}
Valentin Gatien-Baron
rustfilepatterns: shorter code for concatenating slices...
r43133 [b".*", pattern].concat()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
PatternSyntax::Path | PatternSyntax::RelPath => {
if pattern == b"." {
return vec![];
}
Valentin Gatien-Baron
rustfilepatterns: shorter code for concatenating slices...
r43133 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
PatternSyntax::RootFiles => {
let mut res = if pattern == b"." {
vec![]
} else {
// Pattern is a directory name.
Valentin Gatien-Baron
rustfilepatterns: shorter code for concatenating slices...
r43133 [escape_pattern(pattern).as_slice(), b"/"].concat()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 };
// Anything after the pattern must be a non-directory.
res.extend(b"[^/]+$");
res
}
Valentin Gatien-Baron
match: simplify the regexps created for glob patterns...
r43132 PatternSyntax::RelGlob => {
let glob_re = glob_to_re(pattern);
if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
Valentin Gatien-Baron
rustfilepatterns: shorter code for concatenating slices...
r43133 [b".*", rest, globsuffix].concat()
Valentin Gatien-Baron
match: simplify the regexps created for glob patterns...
r43132 } else {
Valentin Gatien-Baron
rustfilepatterns: shorter code for concatenating slices...
r43133 [b"(?:|.*/)", glob_re.as_slice(), globsuffix].concat()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Valentin Gatien-Baron
match: simplify the regexps created for glob patterns...
r43132 }
PatternSyntax::Glob | PatternSyntax::RootGlob => {
Valentin Gatien-Baron
rustfilepatterns: shorter code for concatenating slices...
r43133 [glob_to_re(pattern).as_slice(), globsuffix].concat()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
}
}
const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
[b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
/// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
/// that don't need to be transformed into a regex.
pub fn build_single_regex(
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 kind: &[u8],
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 pat: &[u8],
globsuffix: &[u8],
) -> Result<Vec<u8>, PatternError> {
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let enum_kind = parse_pattern_syntax(kind)?;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 if enum_kind == PatternSyntax::RootGlob
Raphaël Gomès
rust-regex: fix shortcut for exact matches...
r42631 && !pat.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 {
Raphaël Gomès
rust-regex: fix shortcut for exact matches...
r42631 let mut escaped = escape_pattern(pat);
escaped.extend(b"(?:/|$)");
Ok(escaped)
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 } else {
Ok(_build_single_regex(enum_kind, pat, globsuffix))
}
}
lazy_static! {
Raphaël Gomès
rust-performance: introduce FastHashMap type alias for HashMap...
r44278 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
let mut m = FastHashMap::default();
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 m.insert(b"re".as_ref(), b"relre:".as_ref());
m.insert(b"regexp".as_ref(), b"relre:".as_ref());
m.insert(b"glob".as_ref(), b"relglob:".as_ref());
m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
m.insert(b"include".as_ref(), b"include".as_ref());
m.insert(b"subinclude".as_ref(), b"subinclude".as_ref());
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 m
};
}
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 pub type PatternTuple = (Vec<u8>, LineNumber, Vec<u8>);
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 type WarningTuple = (PathBuf, Vec<u8>);
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 pub fn parse_pattern_file_contents<P: AsRef<Path>>(
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 lines: &[u8],
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 file_path: P,
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 warn: bool,
) -> (Vec<PatternTuple>, Vec<WarningTuple>) {
let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
Yuya Nishihara
rust-filepatterns: unescape comment character property...
r42859 let comment_escape_regex = Regex::new(r"\\#").unwrap();
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 let mut inputs: Vec<PatternTuple> = vec![];
let mut warnings: Vec<WarningTuple> = vec![];
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let mut current_syntax = b"relre:".as_ref();
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 let line_number = line_number + 1;
Yuya Nishihara
rust-filepatterns: unescape comment character property...
r42859 let line_buf;
Yuya Nishihara
rust-filepatterns: use literal b'#' instead of cast
r42858 if line.contains(&b'#') {
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 if let Some(cap) = comment_regex.captures(line) {
line = &line[..cap.get(1).unwrap().end()]
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Yuya Nishihara
rust-filepatterns: unescape comment character property...
r42859 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
line = &line_buf;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
let mut line = line.trim_end();
if line.is_empty() {
continue;
}
Valentin Gatien-Baron
rustfilepatterns: refactor the pattern of removing a prefix from a &[u8]...
r43129 if let Some(syntax) = line.drop_prefix(b"syntax:") {
let syntax = syntax.trim();
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
if let Some(rel_syntax) = SYNTAXES.get(syntax) {
current_syntax = rel_syntax;
} else if warn {
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 warnings
.push((file_path.as_ref().to_owned(), syntax.to_owned()));
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
continue;
}
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let mut line_syntax: &[u8] = &current_syntax;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
for (s, rels) in SYNTAXES.iter() {
Valentin Gatien-Baron
rustfilepatterns: refactor the pattern of removing a prefix from a &[u8]...
r43129 if let Some(rest) = line.drop_prefix(rels) {
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 line_syntax = rels;
Valentin Gatien-Baron
rustfilepatterns: refactor the pattern of removing a prefix from a &[u8]...
r43129 line = rest;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 break;
Valentin Gatien-Baron
rustfilepatterns: refactor the pattern of removing a prefix from a &[u8]...
r43129 }
if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) {
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 line_syntax = rels;
Valentin Gatien-Baron
rustfilepatterns: refactor the pattern of removing a prefix from a &[u8]...
r43129 line = rest;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 break;
}
}
inputs.push((
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 [line_syntax, line].concat(),
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 line_number,
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 line.to_owned(),
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 ));
}
(inputs, warnings)
}
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 pub fn read_pattern_file<P: AsRef<Path>>(
file_path: P,
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 warn: bool,
) -> Result<(Vec<PatternTuple>, Vec<WarningTuple>), PatternFileError> {
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 let mut f = File::open(file_path.as_ref())?;
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let mut contents = Vec::new();
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 f.read_to_end(&mut contents)?;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 Ok(parse_pattern_file_contents(&contents, file_path, warn))
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn escape_pattern_test() {
let untouched = br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
assert_eq!(escape_pattern(untouched), untouched.to_vec());
// All escape codes
assert_eq!(
escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
.to_vec()
);
}
#[test]
fn glob_test() {
assert_eq!(glob_to_re(br#"?"#), br#"."#);
assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
assert_eq!(glob_to_re(br#"**"#), br#".*"#);
assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
}
#[test]
fn test_parse_pattern_file_contents() {
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let lines = b"syntax: glob\n*.elc";
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
assert_eq!(
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 vec![(b"relglob:*.elc".to_vec(), 2, b"*.elc".to_vec())],
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 parse_pattern_file_contents(lines, Path::new("file_path"), false)
.0,
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 );
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let lines = b"syntax: include\nsyntax: glob";
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
assert_eq!(
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 parse_pattern_file_contents(lines, Path::new("file_path"), false)
.0,
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 vec![]
);
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let lines = b"glob:**.o";
assert_eq!(
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 parse_pattern_file_contents(lines, Path::new("file_path"), false)
.0,
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 vec![(b"relglob:**.o".to_vec(), 1, b"**.o".to_vec())]
);
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Raphaël Gomès
rust-regex: fix shortcut for exact matches...
r42631
#[test]
fn test_build_single_regex_shortcut() {
assert_eq!(
br"(?:/|$)".to_vec(),
build_single_regex(b"rootglob", b"", b"").unwrap()
);
assert_eq!(
br"whatever(?:/|$)".to_vec(),
build_single_regex(b"rootglob", b"whatever", b"").unwrap()
);
assert_eq!(
br"[^/]*\.o".to_vec(),
build_single_regex(b"rootglob", b"*.o", b"").unwrap()
);
}
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }