##// END OF EJS Templates
copies: stop attempt to avoid extra dict copies around branching...
copies: stop attempt to avoid extra dict copies around branching In the python code, we attempt to avoid unnecessary dict copies when gathering copy information. However that logic is wobbly and I keep running into case where independent branches affects each others. With the current code we can't ensure we are the only "user" of dict when dealing with merge. This caused havoc in the next series on tests I am about to introduce. So for now I am disabling the faulty optimisation. I believe we will need a dedicated overlay to deal with the "copy on write logic" to have something correct. I am also hoping to find time to build dedicated test case for this category of problem instead of relying on side effect in other tests. However for now I am focussing on another issue. Differential Revision: https://phab.mercurial-scm.org/D9608

File last commit:

r45500:26114bd6 default
r46800:cb8b2ee8 default
Show More
filepatterns.rs
670 lines | 20.3 KiB | application/rls-services+xml | RustLexer
Raphaël Gomès
rust-docstrings: add missing module docstrings...
r42996 // filepatterns.rs
//
// Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
//! Handling of Mercurial-specific patterns.
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 use crate::{
utils::{
files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
SliceExt,
},
FastHashMap, PatternError,
};
Raphaël Gomès
rust: switch hg-core and hg-cpython to rust 2018 edition...
r42828 use lazy_static::lazy_static;
Yuya Nishihara
rust-filepatterns: unescape comment character property...
r42859 use regex::bytes::{NoExpand, Regex};
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 use std::fs::File;
use std::io::Read;
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 use std::ops::Deref;
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 use std::path::{Path, PathBuf};
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 use std::vec::Vec;
lazy_static! {
Yuya Nishihara
rust-filepatterns: silence warning of non_upper_case_globals
r42683 static ref RE_ESCAPE: Vec<Vec<u8>> = {
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
for byte in to_escape {
v[*byte as usize].insert(0, b'\\');
}
v
};
}
/// These are matched in order
const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
&[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// Appended to the regexp of globs
const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum PatternSyntax {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// A regular expression
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 Regexp,
/// Glob that matches at the front of the path
RootGlob,
Yuya Nishihara
rust: apply more formatting fixes...
r43109 /// Glob that matches at any suffix of the path (still anchored at
/// slashes)
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 Glob,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// a path relative to repository root, which is matched recursively
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 Path,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// A path relative to cwd
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 RelPath,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// an unrooted glob (*.rs matches Rust files in all dirs)
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 RelGlob,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// A regexp that needn't match the start of a name
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 RelRegexp,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// A path relative to repository root, which is matched non-recursively
/// (will not match subdirectories)
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 RootFiles,
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 /// A file of patterns to read and include
Include,
/// A file of patterns to match against files under the same directory
SubInclude,
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
/// Transforms a glob pattern into a regex
fn glob_to_re(pat: &[u8]) -> Vec<u8> {
let mut input = pat;
let mut res: Vec<u8> = vec![];
let mut group_depth = 0;
while let Some((c, rest)) = input.split_first() {
input = rest;
match c {
b'*' => {
for (source, repl) in GLOB_REPLACEMENTS {
Valentin Gatien-Baron
rustfilepatterns: refactor the pattern of removing a prefix from a &[u8]...
r43129 if let Some(rest) = input.drop_prefix(source) {
input = rest;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 res.extend(*repl);
break;
}
}
}
b'?' => res.extend(b"."),
b'[' => {
match input.iter().skip(1).position(|b| *b == b']') {
None => res.extend(b"\\["),
Some(end) => {
// Account for the one we skipped
let end = end + 1;
res.extend(b"[");
for (i, b) in input[..end].iter().enumerate() {
if *b == b'!' && i == 0 {
res.extend(b"^")
} else if *b == b'^' && i == 0 {
res.extend(b"\\^")
} else if *b == b'\\' {
res.extend(b"\\\\")
} else {
res.push(*b)
}
}
res.extend(b"]");
input = &input[end + 1..];
}
}
}
b'{' => {
group_depth += 1;
res.extend(b"(?:")
}
b'}' if group_depth > 0 => {
group_depth -= 1;
res.extend(b")");
}
b',' if group_depth > 0 => res.extend(b"|"),
b'\\' => {
let c = {
if let Some((c, rest)) = input.split_first() {
input = rest;
c
} else {
c
}
};
Yuya Nishihara
rust-filepatterns: silence warning of non_upper_case_globals
r42683 res.extend(&RE_ESCAPE[*c as usize])
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Yuya Nishihara
rust-filepatterns: silence warning of non_upper_case_globals
r42683 _ => res.extend(&RE_ESCAPE[*c as usize]),
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
}
res
}
fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
pattern
.iter()
Yuya Nishihara
rust-filepatterns: silence warning of non_upper_case_globals
r42683 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 .collect()
}
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 pub fn parse_pattern_syntax(
kind: &[u8],
) -> Result<PatternSyntax, PatternError> {
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 match kind {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 b"re:" => Ok(PatternSyntax::Regexp),
b"path:" => Ok(PatternSyntax::Path),
b"relpath:" => Ok(PatternSyntax::RelPath),
b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
b"relglob:" => Ok(PatternSyntax::RelGlob),
b"relre:" => Ok(PatternSyntax::RelRegexp),
b"glob:" => Ok(PatternSyntax::Glob),
b"rootglob:" => Ok(PatternSyntax::RootGlob),
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 b"include:" => Ok(PatternSyntax::Include),
b"subinclude:" => Ok(PatternSyntax::SubInclude),
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 _ => Err(PatternError::UnsupportedSyntax(
String::from_utf8_lossy(kind).to_string(),
)),
}
}
/// Builds the regex that corresponds to the given pattern.
/// If within a `syntax: regexp` context, returns the pattern,
/// otherwise, returns the corresponding regex.
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> {
let IgnorePattern {
syntax, pattern, ..
} = entry;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 if pattern.is_empty() {
return vec![];
}
match syntax {
Raphaël Gomès
rust-regex: fix issues with regex anchoring and performance...
r45347 PatternSyntax::Regexp => pattern.to_owned(),
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 PatternSyntax::RelRegexp => {
Raphaël Gomès
rust-matchers: use the `regex` crate...
r45084 // The `regex` crate accepts `**` while `re2` and Python's `re`
// do not. Checking for `*` correctly triggers the same error all
// engines.
Raphaël Gomès
rust-regex: prevent nonsensical `.*.*` pattern from happening...
r45348 if pattern[0] == b'^'
|| pattern[0] == b'*'
|| pattern.starts_with(b".*")
{
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 return pattern.to_owned();
}
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 [&b".*"[..], pattern].concat()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
PatternSyntax::Path | PatternSyntax::RelPath => {
if pattern == b"." {
return vec![];
}
Valentin Gatien-Baron
rustfilepatterns: shorter code for concatenating slices...
r43133 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
PatternSyntax::RootFiles => {
let mut res = if pattern == b"." {
Raphaël Gomès
rust-regex: fix issues with regex anchoring and performance...
r45347 vec![]
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 } else {
// Pattern is a directory name.
Raphaël Gomès
rust-regex: fix issues with regex anchoring and performance...
r45347 [escape_pattern(pattern).as_slice(), b"/"].concat()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 };
// Anything after the pattern must be a non-directory.
res.extend(b"[^/]+$");
res
}
Valentin Gatien-Baron
match: simplify the regexps created for glob patterns...
r43132 PatternSyntax::RelGlob => {
let glob_re = glob_to_re(pattern);
if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 [b".*", rest, GLOB_SUFFIX].concat()
Valentin Gatien-Baron
match: simplify the regexps created for glob patterns...
r43132 } else {
Raphaël Gomès
rust-matchers: use the `regex` crate...
r45084 [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Valentin Gatien-Baron
match: simplify the regexps created for glob patterns...
r43132 }
PatternSyntax::Glob | PatternSyntax::RootGlob => {
Raphaël Gomès
rust-regex: fix issues with regex anchoring and performance...
r45347 [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(),
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
}
const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
[b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// TODO support other platforms
#[cfg(unix)]
pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
if bytes.is_empty() {
return b".".to_vec();
}
let sep = b'/';
let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
if initial_slashes > 2 {
// POSIX allows one or two initial slashes, but treats three or more
// as single slash.
initial_slashes = 1;
}
let components = bytes
.split(|b| *b == sep)
.filter(|c| !(c.is_empty() || c == b"."))
.fold(vec![], |mut acc, component| {
if component != b".."
|| (initial_slashes == 0 && acc.is_empty())
|| (!acc.is_empty() && acc[acc.len() - 1] == b"..")
{
acc.push(component)
} else if !acc.is_empty() {
acc.pop();
}
acc
});
let mut new_bytes = components.join(&sep);
if initial_slashes > 0 {
let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
buf.extend(new_bytes);
new_bytes = buf;
}
if new_bytes.is_empty() {
b".".to_vec()
} else {
new_bytes
}
}
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
/// that don't need to be transformed into a regex.
pub fn build_single_regex(
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 entry: &IgnorePattern,
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 ) -> Result<Option<Vec<u8>>, PatternError> {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 let IgnorePattern {
pattern, syntax, ..
} = entry;
let pattern = match syntax {
PatternSyntax::RootGlob
| PatternSyntax::Path
| PatternSyntax::RelGlob
| PatternSyntax::RootFiles => normalize_path_bytes(&pattern),
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 PatternSyntax::Include | PatternSyntax::SubInclude => {
return Err(PatternError::NonRegexPattern(entry.clone()))
}
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 _ => pattern.to_owned(),
};
if *syntax == PatternSyntax::RootGlob
&& !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 {
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 Ok(None)
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 } else {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 let mut entry = entry.clone();
entry.pattern = pattern;
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 Ok(Some(_build_single_regex(&entry)))
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
}
lazy_static! {
Raphaël Gomès
rust-performance: introduce FastHashMap type alias for HashMap...
r44278 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
let mut m = FastHashMap::default();
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 m.insert(b"re".as_ref(), b"relre:".as_ref());
m.insert(b"regexp".as_ref(), b"relre:".as_ref());
m.insert(b"glob".as_ref(), b"relglob:".as_ref());
m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 m.insert(b"include".as_ref(), b"include:".as_ref());
m.insert(b"subinclude".as_ref(), b"subinclude:".as_ref());
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 m
};
}
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 #[derive(Debug)]
pub enum PatternFileWarning {
/// (file path, syntax bytes)
InvalidSyntax(PathBuf, Vec<u8>),
/// File path
NoSuchFile(PathBuf),
}
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 pub fn parse_pattern_file_contents<P: AsRef<Path>>(
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 lines: &[u8],
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 file_path: P,
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 warn: bool,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
Raphaël Gomès
rust: do a clippy pass...
r45500
#[allow(clippy::trivial_regex)]
Yuya Nishihara
rust-filepatterns: unescape comment character property...
r42859 let comment_escape_regex = Regex::new(r"\\#").unwrap();
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 let mut inputs: Vec<IgnorePattern> = vec![];
let mut warnings: Vec<PatternFileWarning> = vec![];
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let mut current_syntax = b"relre:".as_ref();
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 let line_number = line_number + 1;
Yuya Nishihara
rust-filepatterns: unescape comment character property...
r42859 let line_buf;
Yuya Nishihara
rust-filepatterns: use literal b'#' instead of cast
r42858 if line.contains(&b'#') {
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 if let Some(cap) = comment_regex.captures(line) {
line = &line[..cap.get(1).unwrap().end()]
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Yuya Nishihara
rust-filepatterns: unescape comment character property...
r42859 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
line = &line_buf;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
let mut line = line.trim_end();
if line.is_empty() {
continue;
}
Valentin Gatien-Baron
rustfilepatterns: refactor the pattern of removing a prefix from a &[u8]...
r43129 if let Some(syntax) = line.drop_prefix(b"syntax:") {
let syntax = syntax.trim();
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
if let Some(rel_syntax) = SYNTAXES.get(syntax) {
current_syntax = rel_syntax;
} else if warn {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 warnings.push(PatternFileWarning::InvalidSyntax(
file_path.as_ref().to_owned(),
syntax.to_owned(),
));
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
continue;
}
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let mut line_syntax: &[u8] = &current_syntax;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
for (s, rels) in SYNTAXES.iter() {
Valentin Gatien-Baron
rustfilepatterns: refactor the pattern of removing a prefix from a &[u8]...
r43129 if let Some(rest) = line.drop_prefix(rels) {
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 line_syntax = rels;
Valentin Gatien-Baron
rustfilepatterns: refactor the pattern of removing a prefix from a &[u8]...
r43129 line = rest;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 break;
Valentin Gatien-Baron
rustfilepatterns: refactor the pattern of removing a prefix from a &[u8]...
r43129 }
if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) {
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 line_syntax = rels;
Valentin Gatien-Baron
rustfilepatterns: refactor the pattern of removing a prefix from a &[u8]...
r43129 line = rest;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 break;
}
}
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 inputs.push(IgnorePattern::new(
parse_pattern_syntax(&line_syntax).map_err(|e| match e {
PatternError::UnsupportedSyntax(syntax) => {
PatternError::UnsupportedSyntaxInFile(
syntax,
file_path.as_ref().to_string_lossy().into(),
line_number,
)
}
_ => e,
})?,
&line,
&file_path,
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 ));
}
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 Ok((inputs, warnings))
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 pub fn read_pattern_file<P: AsRef<Path>>(
file_path: P,
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 warn: bool,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
let mut f = match File::open(file_path.as_ref()) {
Ok(f) => Ok(f),
Err(e) => match e.kind() {
std::io::ErrorKind::NotFound => {
return Ok((
vec![],
vec![PatternFileWarning::NoSuchFile(
file_path.as_ref().to_owned(),
)],
))
}
_ => Err(e),
},
}?;
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let mut contents = Vec::new();
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 f.read_to_end(&mut contents)?;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 Ok(parse_pattern_file_contents(&contents, file_path, warn)?)
}
/// Represents an entry in an "ignore" file.
#[derive(Debug, Eq, PartialEq, Clone)]
pub struct IgnorePattern {
pub syntax: PatternSyntax,
pub pattern: Vec<u8>,
pub source: PathBuf,
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 impl IgnorePattern {
pub fn new(
syntax: PatternSyntax,
pattern: &[u8],
source: impl AsRef<Path>,
) -> Self {
Self {
syntax,
pattern: pattern.to_owned(),
source: source.as_ref().to_owned(),
}
}
}
pub type PatternResult<T> = Result<T, PatternError>;
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 /// Wrapper for `read_pattern_file` that also recursively expands `include:`
/// patterns.
///
/// `subinclude:` is not treated as a special pattern here: unraveling them
/// needs to occur in the "ignore" phase.
pub fn get_patterns_from_file(
pattern_file: impl AsRef<Path>,
root_dir: impl AsRef<Path>,
) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
let (patterns, mut warnings) = read_pattern_file(&pattern_file, true)?;
let patterns = patterns
.into_iter()
.flat_map(|entry| -> PatternResult<_> {
let IgnorePattern {
Raphaël Gomès
rust: do a clippy pass...
r45500 syntax, pattern, ..
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 } = &entry;
Ok(match syntax {
PatternSyntax::Include => {
let inner_include =
root_dir.as_ref().join(get_path_from_bytes(&pattern));
let (inner_pats, inner_warnings) = get_patterns_from_file(
&inner_include,
root_dir.as_ref(),
)?;
warnings.extend(inner_warnings);
inner_pats
}
_ => vec![entry],
})
})
.flatten()
.collect();
Ok((patterns, warnings))
}
/// Holds all the information needed to handle a `subinclude:` pattern.
pub struct SubInclude {
/// Will be used for repository (hg) paths that start with this prefix.
/// It is relative to the current working directory, so comparing against
/// repository paths is painless.
pub prefix: HgPathBuf,
/// The file itself, containing the patterns
pub path: PathBuf,
/// Folder in the filesystem where this it applies
pub root: PathBuf,
}
impl SubInclude {
pub fn new(
root_dir: impl AsRef<Path>,
pattern: &[u8],
source: impl AsRef<Path>,
) -> Result<SubInclude, HgPathError> {
let normalized_source =
normalize_path_bytes(&get_bytes_from_path(source));
let source_root = get_path_from_bytes(&normalized_source);
Raphaël Gomès
rust: do a clippy pass...
r45500 let source_root =
source_root.parent().unwrap_or_else(|| source_root.deref());
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785
let path = source_root.join(get_path_from_bytes(pattern));
Raphaël Gomès
rust: do a clippy pass...
r45500 let new_root = path.parent().unwrap_or_else(|| path.deref());
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785
let prefix = canonical_path(&root_dir, &root_dir, new_root)?;
Ok(Self {
prefix: path_to_hg_path_buf(prefix).and_then(|mut p| {
if !p.is_empty() {
p.push(b'/');
}
Ok(p)
})?,
path: path.to_owned(),
root: new_root.to_owned(),
})
}
}
/// Separate and pre-process subincludes from other patterns for the "ignore"
/// phase.
pub fn filter_subincludes(
ignore_patterns: &[IgnorePattern],
root_dir: impl AsRef<Path>,
) -> Result<(Vec<SubInclude>, Vec<&IgnorePattern>), HgPathError> {
let mut subincludes = vec![];
let mut others = vec![];
for ignore_pattern in ignore_patterns.iter() {
let IgnorePattern {
syntax,
pattern,
source,
} = ignore_pattern;
if *syntax == PatternSyntax::SubInclude {
subincludes.push(SubInclude::new(&root_dir, pattern, &source)?);
} else {
others.push(ignore_pattern)
}
}
Ok((subincludes, others))
}
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 #[cfg(test)]
mod tests {
use super::*;
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 use pretty_assertions::assert_eq;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
#[test]
fn escape_pattern_test() {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 let untouched =
br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 assert_eq!(escape_pattern(untouched), untouched.to_vec());
// All escape codes
assert_eq!(
escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
.to_vec()
);
}
#[test]
fn glob_test() {
assert_eq!(glob_to_re(br#"?"#), br#"."#);
assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
assert_eq!(glob_to_re(br#"**"#), br#".*"#);
assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
}
#[test]
fn test_parse_pattern_file_contents() {
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let lines = b"syntax: glob\n*.elc";
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
assert_eq!(
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 parse_pattern_file_contents(lines, Path::new("file_path"), false)
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 .unwrap()
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 .0,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 vec![IgnorePattern::new(
PatternSyntax::RelGlob,
b"*.elc",
Path::new("file_path")
)],
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 );
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let lines = b"syntax: include\nsyntax: glob";
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
assert_eq!(
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 parse_pattern_file_contents(lines, Path::new("file_path"), false)
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 .unwrap()
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 .0,
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 vec![]
);
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let lines = b"glob:**.o";
assert_eq!(
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 parse_pattern_file_contents(lines, Path::new("file_path"), false)
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 .unwrap()
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 .0,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 vec![IgnorePattern::new(
PatternSyntax::RelGlob,
b"**.o",
Path::new("file_path")
)]
);
}
#[test]
fn test_build_single_regex() {
assert_eq!(
build_single_regex(&IgnorePattern::new(
PatternSyntax::RelGlob,
b"rust/target/",
Path::new("")
))
.unwrap(),
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 );
Raphaël Gomès
rust-regex: add test for verbatim regex syntax...
r45349 assert_eq!(
build_single_regex(&IgnorePattern::new(
PatternSyntax::Regexp,
br"rust/target/\d+",
Path::new("")
))
.unwrap(),
Some(br"rust/target/\d+".to_vec()),
);
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Raphaël Gomès
rust-regex: fix shortcut for exact matches...
r42631
#[test]
fn test_build_single_regex_shortcut() {
assert_eq!(
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 build_single_regex(&IgnorePattern::new(
PatternSyntax::RootGlob,
b"",
Path::new("")
))
.unwrap(),
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 None,
Raphaël Gomès
rust-regex: fix shortcut for exact matches...
r42631 );
assert_eq!(
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 build_single_regex(&IgnorePattern::new(
PatternSyntax::RootGlob,
b"whatever",
Path::new("")
))
.unwrap(),
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 None,
Raphaël Gomès
rust-regex: fix shortcut for exact matches...
r42631 );
assert_eq!(
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 build_single_regex(&IgnorePattern::new(
PatternSyntax::RootGlob,
b"*.o",
Path::new("")
))
.unwrap(),
Raphaël Gomès
rust-regex: fix issues with regex anchoring and performance...
r45347 Some(br"[^/]*\.o(?:/|$)".to_vec()),
Raphaël Gomès
rust-regex: fix shortcut for exact matches...
r42631 );
}
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }