##// END OF EJS Templates
rust-nodemap: NodeMap trait with simplest implementation...
rust-nodemap: NodeMap trait with simplest implementation We're defining here only a small part of the immutable methods it will have at the end. This is so we can focus in the following changesets on the needed abstractions for a mutable append-only serializable version. The first implementor exposes the actual lookup algorithm in its simplest form. It will have to be expanded to account for the missing methods, and the special cases related to NULL_NODE. Differential Revision: https://phab.mercurial-scm.org/D7791

File last commit:

r44278:5ac243a9 default
r44644:e52401a9 default
Show More
filepatterns.rs
380 lines | 11.6 KiB | application/rls-services+xml | RustLexer
Raphaël Gomès
rust-docstrings: add missing module docstrings...
r42996 // filepatterns.rs
//
// Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
//! Handling of Mercurial-specific patterns.
Raphaël Gomès
rust-performance: introduce FastHashMap type alias for HashMap...
r44278 use crate::{
utils::SliceExt, FastHashMap, LineNumber, PatternError, PatternFileError,
};
Raphaël Gomès
rust: switch hg-core and hg-cpython to rust 2018 edition...
r42828 use lazy_static::lazy_static;
Yuya Nishihara
rust-filepatterns: unescape comment character property...
r42859 use regex::bytes::{NoExpand, Regex};
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 use std::fs::File;
use std::io::Read;
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 use std::path::{Path, PathBuf};
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 use std::vec::Vec;
lazy_static! {
Yuya Nishihara
rust-filepatterns: silence warning of non_upper_case_globals
r42683 static ref RE_ESCAPE: Vec<Vec<u8>> = {
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
for byte in to_escape {
v[*byte as usize].insert(0, b'\\');
}
v
};
}
/// These are matched in order
const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
&[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum PatternSyntax {
Regexp,
/// Glob that matches at the front of the path
RootGlob,
Yuya Nishihara
rust: apply more formatting fixes...
r43109 /// Glob that matches at any suffix of the path (still anchored at
/// slashes)
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 Glob,
Path,
RelPath,
RelGlob,
RelRegexp,
RootFiles,
}
/// Transforms a glob pattern into a regex
fn glob_to_re(pat: &[u8]) -> Vec<u8> {
let mut input = pat;
let mut res: Vec<u8> = vec![];
let mut group_depth = 0;
while let Some((c, rest)) = input.split_first() {
input = rest;
match c {
b'*' => {
for (source, repl) in GLOB_REPLACEMENTS {
Valentin Gatien-Baron
rustfilepatterns: refactor the pattern of removing a prefix from a &[u8]...
r43129 if let Some(rest) = input.drop_prefix(source) {
input = rest;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 res.extend(*repl);
break;
}
}
}
b'?' => res.extend(b"."),
b'[' => {
match input.iter().skip(1).position(|b| *b == b']') {
None => res.extend(b"\\["),
Some(end) => {
// Account for the one we skipped
let end = end + 1;
res.extend(b"[");
for (i, b) in input[..end].iter().enumerate() {
if *b == b'!' && i == 0 {
res.extend(b"^")
} else if *b == b'^' && i == 0 {
res.extend(b"\\^")
} else if *b == b'\\' {
res.extend(b"\\\\")
} else {
res.push(*b)
}
}
res.extend(b"]");
input = &input[end + 1..];
}
}
}
b'{' => {
group_depth += 1;
res.extend(b"(?:")
}
b'}' if group_depth > 0 => {
group_depth -= 1;
res.extend(b")");
}
b',' if group_depth > 0 => res.extend(b"|"),
b'\\' => {
let c = {
if let Some((c, rest)) = input.split_first() {
input = rest;
c
} else {
c
}
};
Yuya Nishihara
rust-filepatterns: silence warning of non_upper_case_globals
r42683 res.extend(&RE_ESCAPE[*c as usize])
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Yuya Nishihara
rust-filepatterns: silence warning of non_upper_case_globals
r42683 _ => res.extend(&RE_ESCAPE[*c as usize]),
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
}
res
}
fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
pattern
.iter()
Yuya Nishihara
rust-filepatterns: silence warning of non_upper_case_globals
r42683 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 .collect()
}
fn parse_pattern_syntax(kind: &[u8]) -> Result<PatternSyntax, PatternError> {
match kind {
b"re" => Ok(PatternSyntax::Regexp),
b"path" => Ok(PatternSyntax::Path),
b"relpath" => Ok(PatternSyntax::RelPath),
b"rootfilesin" => Ok(PatternSyntax::RootFiles),
b"relglob" => Ok(PatternSyntax::RelGlob),
b"relre" => Ok(PatternSyntax::RelRegexp),
b"glob" => Ok(PatternSyntax::Glob),
b"rootglob" => Ok(PatternSyntax::RootGlob),
_ => Err(PatternError::UnsupportedSyntax(
String::from_utf8_lossy(kind).to_string(),
)),
}
}
/// Builds the regex that corresponds to the given pattern.
/// If within a `syntax: regexp` context, returns the pattern,
/// otherwise, returns the corresponding regex.
fn _build_single_regex(
syntax: PatternSyntax,
pattern: &[u8],
globsuffix: &[u8],
) -> Vec<u8> {
if pattern.is_empty() {
return vec![];
}
match syntax {
PatternSyntax::Regexp => pattern.to_owned(),
PatternSyntax::RelRegexp => {
if pattern[0] == b'^' {
return pattern.to_owned();
}
Valentin Gatien-Baron
rustfilepatterns: shorter code for concatenating slices...
r43133 [b".*", pattern].concat()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
PatternSyntax::Path | PatternSyntax::RelPath => {
if pattern == b"." {
return vec![];
}
Valentin Gatien-Baron
rustfilepatterns: shorter code for concatenating slices...
r43133 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
PatternSyntax::RootFiles => {
let mut res = if pattern == b"." {
vec![]
} else {
// Pattern is a directory name.
Valentin Gatien-Baron
rustfilepatterns: shorter code for concatenating slices...
r43133 [escape_pattern(pattern).as_slice(), b"/"].concat()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 };
// Anything after the pattern must be a non-directory.
res.extend(b"[^/]+$");
res
}
Valentin Gatien-Baron
match: simplify the regexps created for glob patterns...
r43132 PatternSyntax::RelGlob => {
let glob_re = glob_to_re(pattern);
if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
Valentin Gatien-Baron
rustfilepatterns: shorter code for concatenating slices...
r43133 [b".*", rest, globsuffix].concat()
Valentin Gatien-Baron
match: simplify the regexps created for glob patterns...
r43132 } else {
Valentin Gatien-Baron
rustfilepatterns: shorter code for concatenating slices...
r43133 [b"(?:|.*/)", glob_re.as_slice(), globsuffix].concat()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Valentin Gatien-Baron
match: simplify the regexps created for glob patterns...
r43132 }
PatternSyntax::Glob | PatternSyntax::RootGlob => {
Valentin Gatien-Baron
rustfilepatterns: shorter code for concatenating slices...
r43133 [glob_to_re(pattern).as_slice(), globsuffix].concat()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
}
}
const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
[b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
/// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
/// that don't need to be transformed into a regex.
pub fn build_single_regex(
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 kind: &[u8],
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 pat: &[u8],
globsuffix: &[u8],
) -> Result<Vec<u8>, PatternError> {
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let enum_kind = parse_pattern_syntax(kind)?;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 if enum_kind == PatternSyntax::RootGlob
Raphaël Gomès
rust-regex: fix shortcut for exact matches...
r42631 && !pat.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 {
Raphaël Gomès
rust-regex: fix shortcut for exact matches...
r42631 let mut escaped = escape_pattern(pat);
escaped.extend(b"(?:/|$)");
Ok(escaped)
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 } else {
Ok(_build_single_regex(enum_kind, pat, globsuffix))
}
}
lazy_static! {
Raphaël Gomès
rust-performance: introduce FastHashMap type alias for HashMap...
r44278 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
let mut m = FastHashMap::default();
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 m.insert(b"re".as_ref(), b"relre:".as_ref());
m.insert(b"regexp".as_ref(), b"relre:".as_ref());
m.insert(b"glob".as_ref(), b"relglob:".as_ref());
m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
m.insert(b"include".as_ref(), b"include".as_ref());
m.insert(b"subinclude".as_ref(), b"subinclude".as_ref());
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 m
};
}
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 pub type PatternTuple = (Vec<u8>, LineNumber, Vec<u8>);
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 type WarningTuple = (PathBuf, Vec<u8>);
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 pub fn parse_pattern_file_contents<P: AsRef<Path>>(
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 lines: &[u8],
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 file_path: P,
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 warn: bool,
) -> (Vec<PatternTuple>, Vec<WarningTuple>) {
let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
Yuya Nishihara
rust-filepatterns: unescape comment character property...
r42859 let comment_escape_regex = Regex::new(r"\\#").unwrap();
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 let mut inputs: Vec<PatternTuple> = vec![];
let mut warnings: Vec<WarningTuple> = vec![];
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let mut current_syntax = b"relre:".as_ref();
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 let line_number = line_number + 1;
Yuya Nishihara
rust-filepatterns: unescape comment character property...
r42859 let line_buf;
Yuya Nishihara
rust-filepatterns: use literal b'#' instead of cast
r42858 if line.contains(&b'#') {
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 if let Some(cap) = comment_regex.captures(line) {
line = &line[..cap.get(1).unwrap().end()]
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Yuya Nishihara
rust-filepatterns: unescape comment character property...
r42859 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
line = &line_buf;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
let mut line = line.trim_end();
if line.is_empty() {
continue;
}
Valentin Gatien-Baron
rustfilepatterns: refactor the pattern of removing a prefix from a &[u8]...
r43129 if let Some(syntax) = line.drop_prefix(b"syntax:") {
let syntax = syntax.trim();
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
if let Some(rel_syntax) = SYNTAXES.get(syntax) {
current_syntax = rel_syntax;
} else if warn {
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 warnings
.push((file_path.as_ref().to_owned(), syntax.to_owned()));
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
continue;
}
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let mut line_syntax: &[u8] = &current_syntax;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
for (s, rels) in SYNTAXES.iter() {
Valentin Gatien-Baron
rustfilepatterns: refactor the pattern of removing a prefix from a &[u8]...
r43129 if let Some(rest) = line.drop_prefix(rels) {
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 line_syntax = rels;
Valentin Gatien-Baron
rustfilepatterns: refactor the pattern of removing a prefix from a &[u8]...
r43129 line = rest;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 break;
Valentin Gatien-Baron
rustfilepatterns: refactor the pattern of removing a prefix from a &[u8]...
r43129 }
if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) {
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 line_syntax = rels;
Valentin Gatien-Baron
rustfilepatterns: refactor the pattern of removing a prefix from a &[u8]...
r43129 line = rest;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 break;
}
}
inputs.push((
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 [line_syntax, line].concat(),
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 line_number,
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 line.to_owned(),
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 ));
}
(inputs, warnings)
}
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 pub fn read_pattern_file<P: AsRef<Path>>(
file_path: P,
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 warn: bool,
) -> Result<(Vec<PatternTuple>, Vec<WarningTuple>), PatternFileError> {
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 let mut f = File::open(file_path.as_ref())?;
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let mut contents = Vec::new();
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 f.read_to_end(&mut contents)?;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 Ok(parse_pattern_file_contents(&contents, file_path, warn))
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn escape_pattern_test() {
let untouched = br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
assert_eq!(escape_pattern(untouched), untouched.to_vec());
// All escape codes
assert_eq!(
escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
.to_vec()
);
}
#[test]
fn glob_test() {
assert_eq!(glob_to_re(br#"?"#), br#"."#);
assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
assert_eq!(glob_to_re(br#"**"#), br#".*"#);
assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
}
#[test]
fn test_parse_pattern_file_contents() {
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let lines = b"syntax: glob\n*.elc";
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
assert_eq!(
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 vec![(b"relglob:*.elc".to_vec(), 2, b"*.elc".to_vec())],
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 parse_pattern_file_contents(lines, Path::new("file_path"), false)
.0,
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 );
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let lines = b"syntax: include\nsyntax: glob";
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
assert_eq!(
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 parse_pattern_file_contents(lines, Path::new("file_path"), false)
.0,
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 vec![]
);
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let lines = b"glob:**.o";
assert_eq!(
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 parse_pattern_file_contents(lines, Path::new("file_path"), false)
.0,
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 vec![(b"relglob:**.o".to_vec(), 1, b"**.o".to_vec())]
);
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Raphaël Gomès
rust-regex: fix shortcut for exact matches...
r42631
#[test]
fn test_build_single_regex_shortcut() {
assert_eq!(
br"(?:/|$)".to_vec(),
build_single_regex(b"rootglob", b"", b"").unwrap()
);
assert_eq!(
br"whatever(?:/|$)".to_vec(),
build_single_regex(b"rootglob", b"whatever", b"").unwrap()
);
assert_eq!(
br"[^/]*\.o".to_vec(),
build_single_regex(b"rootglob", b"*.o", b"").unwrap()
);
}
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }