##// END OF EJS Templates
dirstate-v2: add test that shows a collision in ignore patterns hash...
dirstate-v2: add test that shows a collision in ignore patterns hash This hash is used for optimizing dirstate `status`. We demonstrate that the hash is incorrectly ignoring the changes to the semantics of the ignore files just because the contents (but not their source) haven't changed. This is fixed in the next changeset.

File last commit:

r50383:7c93e38a default
r50452:ca19335e stable
Show More
filepatterns.rs
706 lines | 21.8 KiB | application/rls-services+xml | RustLexer
Raphaël Gomès
rust-docstrings: add missing module docstrings...
r42996 // filepatterns.rs
//
// Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
//! Handling of Mercurial-specific patterns.
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 use crate::{
utils::{
files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
SliceExt,
},
FastHashMap, PatternError,
};
Raphaël Gomès
rust: switch hg-core and hg-cpython to rust 2018 edition...
r42828 use lazy_static::lazy_static;
Yuya Nishihara
rust-filepatterns: unescape comment character property...
r42859 use regex::bytes::{NoExpand, Regex};
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 use std::ops::Deref;
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 use std::path::{Path, PathBuf};
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 use std::vec::Vec;
lazy_static! {
Yuya Nishihara
rust-filepatterns: silence warning of non_upper_case_globals
r42683 static ref RE_ESCAPE: Vec<Vec<u8>> = {
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
for byte in to_escape {
v[*byte as usize].insert(0, b'\\');
}
v
};
}
/// These are matched in order
const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
&[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// Appended to the regexp of globs
const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 #[derive(Debug, Clone, PartialEq, Eq)]
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 pub enum PatternSyntax {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// A regular expression
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 Regexp,
/// Glob that matches at the front of the path
RootGlob,
Yuya Nishihara
rust: apply more formatting fixes...
r43109 /// Glob that matches at any suffix of the path (still anchored at
/// slashes)
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 Glob,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// a path relative to repository root, which is matched recursively
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 Path,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// A path relative to cwd
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 RelPath,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// an unrooted glob (*.rs matches Rust files in all dirs)
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 RelGlob,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// A regexp that needn't match the start of a name
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 RelRegexp,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// A path relative to repository root, which is matched non-recursively
/// (will not match subdirectories)
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 RootFiles,
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 /// A file of patterns to read and include
Include,
/// A file of patterns to match against files under the same directory
SubInclude,
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 /// SubInclude with the result of parsing the included file
///
/// Note: there is no ExpandedInclude because that expansion can be done
/// in place by replacing the Include pattern by the included patterns.
/// SubInclude requires more handling.
///
/// Note: `Box` is used to minimize size impact on other enum variants
ExpandedSubInclude(Box<SubInclude>),
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
/// Transforms a glob pattern into a regex
fn glob_to_re(pat: &[u8]) -> Vec<u8> {
let mut input = pat;
let mut res: Vec<u8> = vec![];
let mut group_depth = 0;
while let Some((c, rest)) = input.split_first() {
input = rest;
match c {
b'*' => {
for (source, repl) in GLOB_REPLACEMENTS {
Valentin Gatien-Baron
rustfilepatterns: refactor the pattern of removing a prefix from a &[u8]...
r43129 if let Some(rest) = input.drop_prefix(source) {
input = rest;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 res.extend(*repl);
break;
}
}
}
b'?' => res.extend(b"."),
b'[' => {
match input.iter().skip(1).position(|b| *b == b']') {
None => res.extend(b"\\["),
Some(end) => {
// Account for the one we skipped
let end = end + 1;
res.extend(b"[");
for (i, b) in input[..end].iter().enumerate() {
if *b == b'!' && i == 0 {
res.extend(b"^")
} else if *b == b'^' && i == 0 {
res.extend(b"\\^")
} else if *b == b'\\' {
res.extend(b"\\\\")
} else {
res.push(*b)
}
}
res.extend(b"]");
input = &input[end + 1..];
}
}
}
b'{' => {
group_depth += 1;
res.extend(b"(?:")
}
b'}' if group_depth > 0 => {
group_depth -= 1;
res.extend(b")");
}
b',' if group_depth > 0 => res.extend(b"|"),
b'\\' => {
let c = {
if let Some((c, rest)) = input.split_first() {
input = rest;
c
} else {
c
}
};
Yuya Nishihara
rust-filepatterns: silence warning of non_upper_case_globals
r42683 res.extend(&RE_ESCAPE[*c as usize])
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Yuya Nishihara
rust-filepatterns: silence warning of non_upper_case_globals
r42683 _ => res.extend(&RE_ESCAPE[*c as usize]),
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
}
res
}
fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
pattern
.iter()
Yuya Nishihara
rust-filepatterns: silence warning of non_upper_case_globals
r42683 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 .collect()
}
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 pub fn parse_pattern_syntax(
kind: &[u8],
) -> Result<PatternSyntax, PatternError> {
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 match kind {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 b"re:" => Ok(PatternSyntax::Regexp),
b"path:" => Ok(PatternSyntax::Path),
b"relpath:" => Ok(PatternSyntax::RelPath),
b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
b"relglob:" => Ok(PatternSyntax::RelGlob),
b"relre:" => Ok(PatternSyntax::RelRegexp),
b"glob:" => Ok(PatternSyntax::Glob),
b"rootglob:" => Ok(PatternSyntax::RootGlob),
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 b"include:" => Ok(PatternSyntax::Include),
b"subinclude:" => Ok(PatternSyntax::SubInclude),
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 _ => Err(PatternError::UnsupportedSyntax(
String::from_utf8_lossy(kind).to_string(),
)),
}
}
/// Builds the regex that corresponds to the given pattern.
/// If within a `syntax: regexp` context, returns the pattern,
/// otherwise, returns the corresponding regex.
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> {
let IgnorePattern {
syntax, pattern, ..
} = entry;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 if pattern.is_empty() {
return vec![];
}
match syntax {
Raphaël Gomès
rust-regex: fix issues with regex anchoring and performance...
r45347 PatternSyntax::Regexp => pattern.to_owned(),
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 PatternSyntax::RelRegexp => {
Raphaël Gomès
rust-matchers: use the `regex` crate...
r45084 // The `regex` crate accepts `**` while `re2` and Python's `re`
// do not. Checking for `*` correctly triggers the same error all
// engines.
Raphaël Gomès
rust-regex: prevent nonsensical `.*.*` pattern from happening...
r45348 if pattern[0] == b'^'
|| pattern[0] == b'*'
|| pattern.starts_with(b".*")
{
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 return pattern.to_owned();
}
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 [&b".*"[..], pattern].concat()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
PatternSyntax::Path | PatternSyntax::RelPath => {
if pattern == b"." {
return vec![];
}
Valentin Gatien-Baron
rustfilepatterns: shorter code for concatenating slices...
r43133 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
PatternSyntax::RootFiles => {
let mut res = if pattern == b"." {
Raphaël Gomès
rust-regex: fix issues with regex anchoring and performance...
r45347 vec![]
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 } else {
// Pattern is a directory name.
Raphaël Gomès
rust-regex: fix issues with regex anchoring and performance...
r45347 [escape_pattern(pattern).as_slice(), b"/"].concat()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 };
// Anything after the pattern must be a non-directory.
res.extend(b"[^/]+$");
res
}
Valentin Gatien-Baron
match: simplify the regexps created for glob patterns...
r43132 PatternSyntax::RelGlob => {
let glob_re = glob_to_re(pattern);
if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 [b".*", rest, GLOB_SUFFIX].concat()
Valentin Gatien-Baron
match: simplify the regexps created for glob patterns...
r43132 } else {
Raphaël Gomès
rust-matchers: use the `regex` crate...
r45084 [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Valentin Gatien-Baron
match: simplify the regexps created for glob patterns...
r43132 }
PatternSyntax::Glob | PatternSyntax::RootGlob => {
Raphaël Gomès
rust-regex: fix issues with regex anchoring and performance...
r45347 [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 PatternSyntax::Include
| PatternSyntax::SubInclude
| PatternSyntax::ExpandedSubInclude(_) => unreachable!(),
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
}
const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
[b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// TODO support other platforms
#[cfg(unix)]
pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
if bytes.is_empty() {
return b".".to_vec();
}
let sep = b'/';
let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
if initial_slashes > 2 {
// POSIX allows one or two initial slashes, but treats three or more
// as single slash.
initial_slashes = 1;
}
let components = bytes
.split(|b| *b == sep)
.filter(|c| !(c.is_empty() || c == b"."))
.fold(vec![], |mut acc, component| {
if component != b".."
|| (initial_slashes == 0 && acc.is_empty())
|| (!acc.is_empty() && acc[acc.len() - 1] == b"..")
{
acc.push(component)
} else if !acc.is_empty() {
acc.pop();
}
acc
});
let mut new_bytes = components.join(&sep);
if initial_slashes > 0 {
let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
buf.extend(new_bytes);
new_bytes = buf;
}
if new_bytes.is_empty() {
b".".to_vec()
} else {
new_bytes
}
}
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
/// that don't need to be transformed into a regex.
pub fn build_single_regex(
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 entry: &IgnorePattern,
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 ) -> Result<Option<Vec<u8>>, PatternError> {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 let IgnorePattern {
pattern, syntax, ..
} = entry;
let pattern = match syntax {
PatternSyntax::RootGlob
| PatternSyntax::Path
| PatternSyntax::RelGlob
| PatternSyntax::RootFiles => normalize_path_bytes(&pattern),
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 PatternSyntax::Include | PatternSyntax::SubInclude => {
return Err(PatternError::NonRegexPattern(entry.clone()))
}
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 _ => pattern.to_owned(),
};
if *syntax == PatternSyntax::RootGlob
&& !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 {
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 Ok(None)
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 } else {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 let mut entry = entry.clone();
entry.pattern = pattern;
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 Ok(Some(_build_single_regex(&entry)))
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
}
lazy_static! {
Raphaël Gomès
rust-performance: introduce FastHashMap type alias for HashMap...
r44278 static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
let mut m = FastHashMap::default();
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 m.insert(b"re".as_ref(), b"relre:".as_ref());
m.insert(b"regexp".as_ref(), b"relre:".as_ref());
m.insert(b"glob".as_ref(), b"relglob:".as_ref());
m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 m.insert(b"include".as_ref(), b"include:".as_ref());
m.insert(b"subinclude".as_ref(), b"subinclude:".as_ref());
Raphaël Gomès
rhg-status: add support for narrow clones
r50383 m.insert(b"path".as_ref(), b"path:".as_ref());
m.insert(b"rootfilesin".as_ref(), b"rootfilesin:".as_ref());
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 m
};
}
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 #[derive(Debug)]
pub enum PatternFileWarning {
/// (file path, syntax bytes)
InvalidSyntax(PathBuf, Vec<u8>),
/// File path
NoSuchFile(PathBuf),
}
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Simon Sapin
rust: Make some file path parameters less generic...
r48169 pub fn parse_pattern_file_contents(
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 lines: &[u8],
Simon Sapin
rust: Make some file path parameters less generic...
r48169 file_path: &Path,
Raphaël Gomès
rust-filepatterns: allow overriding default syntax...
r50377 default_syntax_override: Option<&[u8]>,
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 warn: bool,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
Raphaël Gomès
rust: do a clippy pass...
r45500
#[allow(clippy::trivial_regex)]
Yuya Nishihara
rust-filepatterns: unescape comment character property...
r42859 let comment_escape_regex = Regex::new(r"\\#").unwrap();
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 let mut inputs: Vec<IgnorePattern> = vec![];
let mut warnings: Vec<PatternFileWarning> = vec![];
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Raphaël Gomès
rust-filepatterns: allow overriding default syntax...
r50377 let mut current_syntax =
default_syntax_override.unwrap_or(b"relre:".as_ref());
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 let line_number = line_number + 1;
Yuya Nishihara
rust-filepatterns: unescape comment character property...
r42859 let line_buf;
Yuya Nishihara
rust-filepatterns: use literal b'#' instead of cast
r42858 if line.contains(&b'#') {
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 if let Some(cap) = comment_regex.captures(line) {
line = &line[..cap.get(1).unwrap().end()]
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Yuya Nishihara
rust-filepatterns: unescape comment character property...
r42859 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
line = &line_buf;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
let mut line = line.trim_end();
if line.is_empty() {
continue;
}
Valentin Gatien-Baron
rustfilepatterns: refactor the pattern of removing a prefix from a &[u8]...
r43129 if let Some(syntax) = line.drop_prefix(b"syntax:") {
let syntax = syntax.trim();
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
if let Some(rel_syntax) = SYNTAXES.get(syntax) {
current_syntax = rel_syntax;
} else if warn {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 warnings.push(PatternFileWarning::InvalidSyntax(
Simon Sapin
rust: Make some file path parameters less generic...
r48169 file_path.to_owned(),
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 syntax.to_owned(),
));
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
continue;
}
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let mut line_syntax: &[u8] = &current_syntax;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
for (s, rels) in SYNTAXES.iter() {
Valentin Gatien-Baron
rustfilepatterns: refactor the pattern of removing a prefix from a &[u8]...
r43129 if let Some(rest) = line.drop_prefix(rels) {
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 line_syntax = rels;
Valentin Gatien-Baron
rustfilepatterns: refactor the pattern of removing a prefix from a &[u8]...
r43129 line = rest;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 break;
Valentin Gatien-Baron
rustfilepatterns: refactor the pattern of removing a prefix from a &[u8]...
r43129 }
if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) {
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 line_syntax = rels;
Valentin Gatien-Baron
rustfilepatterns: refactor the pattern of removing a prefix from a &[u8]...
r43129 line = rest;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 break;
}
}
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 inputs.push(IgnorePattern::new(
parse_pattern_syntax(&line_syntax).map_err(|e| match e {
PatternError::UnsupportedSyntax(syntax) => {
PatternError::UnsupportedSyntaxInFile(
syntax,
Simon Sapin
rust: Make some file path parameters less generic...
r48169 file_path.to_string_lossy().into(),
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 line_number,
)
}
_ => e,
})?,
&line,
Simon Sapin
rust: Make some file path parameters less generic...
r48169 file_path,
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 ));
}
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 Ok((inputs, warnings))
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Simon Sapin
rust: Make some file path parameters less generic...
r48169 pub fn read_pattern_file(
file_path: &Path,
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 warn: bool,
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 inspect_pattern_bytes: &mut impl FnMut(&[u8]),
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 match std::fs::read(file_path) {
Ok(contents) => {
inspect_pattern_bytes(&contents);
Raphaël Gomès
rust-filepatterns: allow overriding default syntax...
r50377 parse_pattern_file_contents(&contents, file_path, None, warn)
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 }
Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok((
vec![],
vec![PatternFileWarning::NoSuchFile(file_path.to_owned())],
)),
Err(e) => Err(e.into()),
}
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 }
/// Represents an entry in an "ignore" file.
#[derive(Debug, Eq, PartialEq, Clone)]
pub struct IgnorePattern {
pub syntax: PatternSyntax,
pub pattern: Vec<u8>,
pub source: PathBuf,
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 impl IgnorePattern {
Simon Sapin
rust: Make some file path parameters less generic...
r48169 pub fn new(syntax: PatternSyntax, pattern: &[u8], source: &Path) -> Self {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 Self {
syntax,
pattern: pattern.to_owned(),
Simon Sapin
rust: Make some file path parameters less generic...
r48169 source: source.to_owned(),
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 }
}
}
pub type PatternResult<T> = Result<T, PatternError>;
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 /// Wrapper for `read_pattern_file` that also recursively expands `include:`
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 /// and `subinclude:` patterns.
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 ///
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 /// The former are expanded in place, while `PatternSyntax::ExpandedSubInclude`
/// is used for the latter to form a tree of patterns.
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 pub fn get_patterns_from_file(
Simon Sapin
rust: Make some file path parameters less generic...
r48169 pattern_file: &Path,
root_dir: &Path,
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 inspect_pattern_bytes: &mut impl FnMut(&[u8]),
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 let (patterns, mut warnings) =
read_pattern_file(pattern_file, true, inspect_pattern_bytes)?;
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 let patterns = patterns
.into_iter()
.flat_map(|entry| -> PatternResult<_> {
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 Ok(match &entry.syntax {
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 PatternSyntax::Include => {
let inner_include =
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 root_dir.join(get_path_from_bytes(&entry.pattern));
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 let (inner_pats, inner_warnings) = get_patterns_from_file(
&inner_include,
root_dir,
inspect_pattern_bytes,
)?;
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 warnings.extend(inner_warnings);
inner_pats
}
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 PatternSyntax::SubInclude => {
let mut sub_include = SubInclude::new(
&root_dir,
&entry.pattern,
&entry.source,
)?;
let (inner_patterns, inner_warnings) =
get_patterns_from_file(
&sub_include.path,
&sub_include.root,
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 inspect_pattern_bytes,
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 )?;
sub_include.included_patterns = inner_patterns;
warnings.extend(inner_warnings);
vec![IgnorePattern {
syntax: PatternSyntax::ExpandedSubInclude(Box::new(
sub_include,
)),
..entry
}]
}
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 _ => vec![entry],
})
})
.flatten()
.collect();
Ok((patterns, warnings))
}
/// Holds all the information needed to handle a `subinclude:` pattern.
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 #[derive(Debug, PartialEq, Eq, Clone)]
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 pub struct SubInclude {
/// Will be used for repository (hg) paths that start with this prefix.
/// It is relative to the current working directory, so comparing against
/// repository paths is painless.
pub prefix: HgPathBuf,
/// The file itself, containing the patterns
pub path: PathBuf,
/// Folder in the filesystem where this it applies
pub root: PathBuf,
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170
pub included_patterns: Vec<IgnorePattern>,
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 }
impl SubInclude {
pub fn new(
Simon Sapin
rust: Make some file path parameters less generic...
r48169 root_dir: &Path,
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 pattern: &[u8],
Simon Sapin
rust: Make some file path parameters less generic...
r48169 source: &Path,
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 ) -> Result<SubInclude, HgPathError> {
let normalized_source =
normalize_path_bytes(&get_bytes_from_path(source));
let source_root = get_path_from_bytes(&normalized_source);
Raphaël Gomès
rust: do a clippy pass...
r45500 let source_root =
source_root.parent().unwrap_or_else(|| source_root.deref());
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785
let path = source_root.join(get_path_from_bytes(pattern));
Raphaël Gomès
rust: do a clippy pass...
r45500 let new_root = path.parent().unwrap_or_else(|| path.deref());
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785
Simon Sapin
rust: Make some file path parameters less generic...
r48169 let prefix = canonical_path(root_dir, root_dir, new_root)?;
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785
Ok(Self {
prefix: path_to_hg_path_buf(prefix).and_then(|mut p| {
if !p.is_empty() {
Arseniy Alekseyev
rhg: more efficient `HgPath::join`...
r49132 p.push_byte(b'/');
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 }
Ok(p)
})?,
path: path.to_owned(),
root: new_root.to_owned(),
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 included_patterns: Vec::new(),
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 })
}
}
/// Separate and pre-process subincludes from other patterns for the "ignore"
/// phase.
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 pub fn filter_subincludes(
ignore_patterns: Vec<IgnorePattern>,
) -> Result<(Vec<Box<SubInclude>>, Vec<IgnorePattern>), HgPathError> {
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 let mut subincludes = vec![];
let mut others = vec![];
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 for pattern in ignore_patterns {
if let PatternSyntax::ExpandedSubInclude(sub_include) = pattern.syntax
{
subincludes.push(sub_include);
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 } else {
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 others.push(pattern)
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 }
}
Ok((subincludes, others))
}
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 #[cfg(test)]
mod tests {
use super::*;
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 use pretty_assertions::assert_eq;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
#[test]
fn escape_pattern_test() {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 let untouched =
br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 assert_eq!(escape_pattern(untouched), untouched.to_vec());
// All escape codes
assert_eq!(
escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
.to_vec()
);
}
#[test]
fn glob_test() {
assert_eq!(glob_to_re(br#"?"#), br#"."#);
assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
assert_eq!(glob_to_re(br#"**"#), br#".*"#);
assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
}
#[test]
fn test_parse_pattern_file_contents() {
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let lines = b"syntax: glob\n*.elc";
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
assert_eq!(
Raphaël Gomès
rust-filepatterns: allow overriding default syntax...
r50377 parse_pattern_file_contents(
lines,
Path::new("file_path"),
None,
false
)
.unwrap()
.0,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 vec![IgnorePattern::new(
PatternSyntax::RelGlob,
b"*.elc",
Path::new("file_path")
)],
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 );
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let lines = b"syntax: include\nsyntax: glob";
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
assert_eq!(
Raphaël Gomès
rust-filepatterns: allow overriding default syntax...
r50377 parse_pattern_file_contents(
lines,
Path::new("file_path"),
None,
false
)
.unwrap()
.0,
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 vec![]
);
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let lines = b"glob:**.o";
assert_eq!(
Raphaël Gomès
rust-filepatterns: allow overriding default syntax...
r50377 parse_pattern_file_contents(
lines,
Path::new("file_path"),
None,
false
)
.unwrap()
.0,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 vec![IgnorePattern::new(
PatternSyntax::RelGlob,
b"**.o",
Path::new("file_path")
)]
);
}
#[test]
fn test_build_single_regex() {
assert_eq!(
build_single_regex(&IgnorePattern::new(
PatternSyntax::RelGlob,
b"rust/target/",
Path::new("")
))
.unwrap(),
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 );
Raphaël Gomès
rust-regex: add test for verbatim regex syntax...
r45349 assert_eq!(
build_single_regex(&IgnorePattern::new(
PatternSyntax::Regexp,
br"rust/target/\d+",
Path::new("")
))
.unwrap(),
Some(br"rust/target/\d+".to_vec()),
);
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Raphaël Gomès
rust-regex: fix shortcut for exact matches...
r42631
#[test]
fn test_build_single_regex_shortcut() {
assert_eq!(
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 build_single_regex(&IgnorePattern::new(
PatternSyntax::RootGlob,
b"",
Path::new("")
))
.unwrap(),
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 None,
Raphaël Gomès
rust-regex: fix shortcut for exact matches...
r42631 );
assert_eq!(
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 build_single_regex(&IgnorePattern::new(
PatternSyntax::RootGlob,
b"whatever",
Path::new("")
))
.unwrap(),
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 None,
Raphaël Gomès
rust-regex: fix shortcut for exact matches...
r42631 );
assert_eq!(
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 build_single_regex(&IgnorePattern::new(
PatternSyntax::RootGlob,
b"*.o",
Path::new("")
))
.unwrap(),
Raphaël Gomès
rust-regex: fix issues with regex anchoring and performance...
r45347 Some(br"[^/]*\.o(?:/|$)".to_vec()),
Raphaël Gomès
rust-regex: fix shortcut for exact matches...
r42631 );
}
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }