##// END OF EJS Templates
tests: use sha256line.py instead of /dev/random in test-censor.t (issue6858)...
tests: use sha256line.py instead of /dev/random in test-censor.t (issue6858) Sometimes the systems that run our test suite don't have enough entropy and they cannot produce target file of the expected size using /dev/random, which results in test failures. Switching to /dev/urandom would give us way more available data at the cost of it being less "random", but we don't really need to use entropy for this task at all, since we only care if the file size after compression is big enough to not be stored inline in the revlog. So let's use something that we already have used to generate this kind of data in other tests.

File last commit:

r52013:532e74ad default
r52255:e7be2ddf stable
Show More
filepatterns.rs
874 lines | 26.5 KiB | application/rls-services+xml | RustLexer
Raphaël Gomès
rust-docstrings: add missing module docstrings...
r42996 // filepatterns.rs
//
// Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
//! Handling of Mercurial-specific patterns.
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 use crate::{
utils::{
files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
SliceExt,
},
FastHashMap, PatternError,
};
Raphaël Gomès
rust: switch hg-core and hg-cpython to rust 2018 edition...
r42828 use lazy_static::lazy_static;
Yuya Nishihara
rust-filepatterns: unescape comment character property...
r42859 use regex::bytes::{NoExpand, Regex};
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 use std::ops::Deref;
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 use std::path::{Path, PathBuf};
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 use std::vec::Vec;
lazy_static! {
Yuya Nishihara
rust-filepatterns: silence warning of non_upper_case_globals
r42683 static ref RE_ESCAPE: Vec<Vec<u8>> = {
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
Spencer Baugh
rust: don't escape spaces in regex...
r51752 let to_escape = b"()[]{}?*+-|^$\\.&~#\t\n\r\x0b\x0c";
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 for byte in to_escape {
v[*byte as usize].insert(0, b'\\');
}
v
};
}
/// These are matched in order
const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
&[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 #[derive(Debug, Clone, PartialEq, Eq)]
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 pub enum PatternSyntax {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// A regular expression
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 Regexp,
/// Glob that matches at the front of the path
RootGlob,
Yuya Nishihara
rust: apply more formatting fixes...
r43109 /// Glob that matches at any suffix of the path (still anchored at
/// slashes)
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 Glob,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// a path relative to repository root, which is matched recursively
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 Path,
Raphaël Gomès
match: add `filepath:` pattern to match an exact filepath relative to the root...
r51588 /// a single exact path relative to repository root
FilePath,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// A path relative to cwd
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 RelPath,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// an unrooted glob (*.rs matches Rust files in all dirs)
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 RelGlob,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// A regexp that needn't match the start of a name
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 RelRegexp,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// A path relative to repository root, which is matched non-recursively
/// (will not match subdirectories)
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 RootFiles,
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 /// A file of patterns to read and include
Include,
/// A file of patterns to match against files under the same directory
SubInclude,
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 /// SubInclude with the result of parsing the included file
///
/// Note: there is no ExpandedInclude because that expansion can be done
/// in place by replacing the Include pattern by the included patterns.
/// SubInclude requires more handling.
///
/// Note: `Box` is used to minimize size impact on other enum variants
ExpandedSubInclude(Box<SubInclude>),
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
/// Transforms a glob pattern into a regex
fn glob_to_re(pat: &[u8]) -> Vec<u8> {
let mut input = pat;
let mut res: Vec<u8> = vec![];
let mut group_depth = 0;
while let Some((c, rest)) = input.split_first() {
input = rest;
match c {
b'*' => {
for (source, repl) in GLOB_REPLACEMENTS {
Valentin Gatien-Baron
rustfilepatterns: refactor the pattern of removing a prefix from a &[u8]...
r43129 if let Some(rest) = input.drop_prefix(source) {
input = rest;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 res.extend(*repl);
break;
}
}
}
b'?' => res.extend(b"."),
b'[' => {
match input.iter().skip(1).position(|b| *b == b']') {
None => res.extend(b"\\["),
Some(end) => {
// Account for the one we skipped
let end = end + 1;
res.extend(b"[");
for (i, b) in input[..end].iter().enumerate() {
if *b == b'!' && i == 0 {
res.extend(b"^")
} else if *b == b'^' && i == 0 {
res.extend(b"\\^")
} else if *b == b'\\' {
res.extend(b"\\\\")
} else {
res.push(*b)
}
}
res.extend(b"]");
input = &input[end + 1..];
}
}
}
b'{' => {
group_depth += 1;
res.extend(b"(?:")
}
b'}' if group_depth > 0 => {
group_depth -= 1;
res.extend(b")");
}
b',' if group_depth > 0 => res.extend(b"|"),
b'\\' => {
let c = {
if let Some((c, rest)) = input.split_first() {
input = rest;
c
} else {
c
}
};
Yuya Nishihara
rust-filepatterns: silence warning of non_upper_case_globals
r42683 res.extend(&RE_ESCAPE[*c as usize])
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Yuya Nishihara
rust-filepatterns: silence warning of non_upper_case_globals
r42683 _ => res.extend(&RE_ESCAPE[*c as usize]),
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
}
res
}
fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
pattern
.iter()
Yuya Nishihara
rust-filepatterns: silence warning of non_upper_case_globals
r42683 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 .collect()
}
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 pub fn parse_pattern_syntax(
kind: &[u8],
) -> Result<PatternSyntax, PatternError> {
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 match kind {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 b"re:" => Ok(PatternSyntax::Regexp),
b"path:" => Ok(PatternSyntax::Path),
Raphaël Gomès
match: add `filepath:` pattern to match an exact filepath relative to the root...
r51588 b"filepath:" => Ok(PatternSyntax::FilePath),
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 b"relpath:" => Ok(PatternSyntax::RelPath),
b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
b"relglob:" => Ok(PatternSyntax::RelGlob),
b"relre:" => Ok(PatternSyntax::RelRegexp),
b"glob:" => Ok(PatternSyntax::Glob),
b"rootglob:" => Ok(PatternSyntax::RootGlob),
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 b"include:" => Ok(PatternSyntax::Include),
b"subinclude:" => Ok(PatternSyntax::SubInclude),
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 _ => Err(PatternError::UnsupportedSyntax(
String::from_utf8_lossy(kind).to_string(),
)),
}
}
matcher: fix the issue with regex inline-flag in rust oo...
r50499 lazy_static! {
static ref FLAG_RE: Regex = Regex::new(r"^\(\?[aiLmsux]+\)").unwrap();
}
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 /// Builds the regex that corresponds to the given pattern.
/// If within a `syntax: regexp` context, returns the pattern,
/// otherwise, returns the corresponding regex.
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 fn _build_single_regex(entry: &IgnorePattern, glob_suffix: &[u8]) -> Vec<u8> {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 let IgnorePattern {
syntax, pattern, ..
} = entry;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 if pattern.is_empty() {
return vec![];
}
match syntax {
Raphaël Gomès
rust-regex: fix issues with regex anchoring and performance...
r45347 PatternSyntax::Regexp => pattern.to_owned(),
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 PatternSyntax::RelRegexp => {
Raphaël Gomès
rust-matchers: use the `regex` crate...
r45084 // The `regex` crate accepts `**` while `re2` and Python's `re`
// do not. Checking for `*` correctly triggers the same error all
// engines.
Raphaël Gomès
rust-regex: prevent nonsensical `.*.*` pattern from happening...
r45348 if pattern[0] == b'^'
|| pattern[0] == b'*'
|| pattern.starts_with(b".*")
{
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 return pattern.to_owned();
}
matcher: fix the issue with regex inline-flag in rust oo...
r50499 match FLAG_RE.find(pattern) {
Some(mat) => {
let s = mat.start();
let e = mat.end();
[
&b"(?"[..],
&pattern[s + 2..e - 1],
&b":"[..],
matcher: do not prepend '.*' to pattern using ^ after flags...
r50500 if pattern[e] == b'^'
|| pattern[e] == b'*'
|| pattern[e..].starts_with(b".*")
{
&b""[..]
} else {
&b".*"[..]
},
matcher: fix the issue with regex inline-flag in rust oo...
r50499 &pattern[e..],
&b")"[..],
]
.concat()
}
None => [&b".*"[..], pattern].concat(),
}
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
PatternSyntax::Path | PatternSyntax::RelPath => {
if pattern == b"." {
return vec![];
}
Valentin Gatien-Baron
rustfilepatterns: shorter code for concatenating slices...
r43133 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
PatternSyntax::RootFiles => {
let mut res = if pattern == b"." {
Raphaël Gomès
rust-regex: fix issues with regex anchoring and performance...
r45347 vec![]
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 } else {
// Pattern is a directory name.
Raphaël Gomès
rust-regex: fix issues with regex anchoring and performance...
r45347 [escape_pattern(pattern).as_slice(), b"/"].concat()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 };
// Anything after the pattern must be a non-directory.
res.extend(b"[^/]+$");
res
}
Valentin Gatien-Baron
match: simplify the regexps created for glob patterns...
r43132 PatternSyntax::RelGlob => {
let glob_re = glob_to_re(pattern);
if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 [b".*", rest, glob_suffix].concat()
Valentin Gatien-Baron
match: simplify the regexps created for glob patterns...
r43132 } else {
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 [b"(?:.*/)?", glob_re.as_slice(), glob_suffix].concat()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Valentin Gatien-Baron
match: simplify the regexps created for glob patterns...
r43132 }
PatternSyntax::Glob | PatternSyntax::RootGlob => {
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 [glob_to_re(pattern).as_slice(), glob_suffix].concat()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 PatternSyntax::Include
| PatternSyntax::SubInclude
Raphaël Gomès
match: add `filepath:` pattern to match an exact filepath relative to the root...
r51588 | PatternSyntax::ExpandedSubInclude(_)
| PatternSyntax::FilePath => unreachable!(),
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
}
const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
[b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// TODO support other platforms
#[cfg(unix)]
pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
if bytes.is_empty() {
return b".".to_vec();
}
let sep = b'/';
let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
if initial_slashes > 2 {
// POSIX allows one or two initial slashes, but treats three or more
// as single slash.
initial_slashes = 1;
}
let components = bytes
.split(|b| *b == sep)
.filter(|c| !(c.is_empty() || c == b"."))
.fold(vec![], |mut acc, component| {
if component != b".."
|| (initial_slashes == 0 && acc.is_empty())
|| (!acc.is_empty() && acc[acc.len() - 1] == b"..")
{
acc.push(component)
} else if !acc.is_empty() {
acc.pop();
}
acc
});
let mut new_bytes = components.join(&sep);
if initial_slashes > 0 {
let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
buf.extend(new_bytes);
new_bytes = buf;
}
if new_bytes.is_empty() {
b".".to_vec()
} else {
new_bytes
}
}
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
/// that don't need to be transformed into a regex.
pub fn build_single_regex(
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 entry: &IgnorePattern,
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 glob_suffix: &[u8],
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 ) -> Result<Option<Vec<u8>>, PatternError> {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 let IgnorePattern {
pattern, syntax, ..
} = entry;
let pattern = match syntax {
PatternSyntax::RootGlob
| PatternSyntax::Path
| PatternSyntax::RelGlob
Spencer Baugh
rust-filepatterns: also normalize RelPath...
r51751 | PatternSyntax::RelPath
Raphaël Gomès
rust-clippy: fix most warnings in `hg-core`...
r50825 | PatternSyntax::RootFiles => normalize_path_bytes(pattern),
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 PatternSyntax::Include | PatternSyntax::SubInclude => {
return Err(PatternError::NonRegexPattern(entry.clone()))
}
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 _ => pattern.to_owned(),
};
Raphaël Gomès
match: add `filepath:` pattern to match an exact filepath relative to the root...
r51588 let is_simple_rootglob = *syntax == PatternSyntax::RootGlob
&& !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b));
if is_simple_rootglob || syntax == &PatternSyntax::FilePath {
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 Ok(None)
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 } else {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 let mut entry = entry.clone();
entry.pattern = pattern;
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 Ok(Some(_build_single_regex(&entry, glob_suffix)))
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
}
lazy_static! {
Spencer Baugh
rust: simplify pattern file parsing...
r51750 static ref SYNTAXES: FastHashMap<&'static [u8], PatternSyntax> = {
Raphaël Gomès
rust-performance: introduce FastHashMap type alias for HashMap...
r44278 let mut m = FastHashMap::default();
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Spencer Baugh
rust: simplify pattern file parsing...
r51750 m.insert(b"re:".as_ref(), PatternSyntax::Regexp);
m.insert(b"regexp:".as_ref(), PatternSyntax::Regexp);
m.insert(b"path:".as_ref(), PatternSyntax::Path);
m.insert(b"filepath:".as_ref(), PatternSyntax::FilePath);
m.insert(b"relpath:".as_ref(), PatternSyntax::RelPath);
m.insert(b"rootfilesin:".as_ref(), PatternSyntax::RootFiles);
m.insert(b"relglob:".as_ref(), PatternSyntax::RelGlob);
m.insert(b"relre:".as_ref(), PatternSyntax::RelRegexp);
m.insert(b"glob:".as_ref(), PatternSyntax::Glob);
m.insert(b"rootglob:".as_ref(), PatternSyntax::RootGlob);
m.insert(b"include:".as_ref(), PatternSyntax::Include);
m.insert(b"subinclude:".as_ref(), PatternSyntax::SubInclude);
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 m
};
}
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 #[derive(Debug)]
pub enum PatternFileWarning {
/// (file path, syntax bytes)
InvalidSyntax(PathBuf, Vec<u8>),
/// File path
NoSuchFile(PathBuf),
}
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Spencer Baugh
rust: simplify pattern file parsing...
r51750 pub fn parse_one_pattern(
pattern: &[u8],
source: &Path,
default: PatternSyntax,
Spencer Baugh
rhg: support "status FILE"...
r51759 normalize: bool,
Spencer Baugh
rust: simplify pattern file parsing...
r51750 ) -> IgnorePattern {
let mut pattern_bytes: &[u8] = pattern;
let mut syntax = default;
for (s, val) in SYNTAXES.iter() {
if let Some(rest) = pattern_bytes.drop_prefix(s) {
syntax = val.clone();
pattern_bytes = rest;
break;
}
}
Spencer Baugh
rhg: support "status FILE"...
r51759 let pattern = match syntax {
PatternSyntax::RootGlob
| PatternSyntax::Path
| PatternSyntax::Glob
| PatternSyntax::RelGlob
| PatternSyntax::RelPath
| PatternSyntax::RootFiles
if normalize =>
{
normalize_path_bytes(pattern_bytes)
}
_ => pattern_bytes.to_vec(),
};
Spencer Baugh
rust: simplify pattern file parsing...
r51750
IgnorePattern {
syntax,
pattern,
source: source.to_owned(),
}
}
Simon Sapin
rust: Make some file path parameters less generic...
r48169 pub fn parse_pattern_file_contents(
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 lines: &[u8],
Simon Sapin
rust: Make some file path parameters less generic...
r48169 file_path: &Path,
Spencer Baugh
rust: simplify pattern file parsing...
r51750 default_syntax_override: Option<PatternSyntax>,
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 warn: bool,
Spencer Baugh
rust: simplify pattern file parsing...
r51750 relativize: bool,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
Raphaël Gomès
rust: do a clippy pass...
r45500
#[allow(clippy::trivial_regex)]
Yuya Nishihara
rust-filepatterns: unescape comment character property...
r42859 let comment_escape_regex = Regex::new(r"\\#").unwrap();
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 let mut inputs: Vec<IgnorePattern> = vec![];
let mut warnings: Vec<PatternFileWarning> = vec![];
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Raphaël Gomès
rust-filepatterns: allow overriding default syntax...
r50377 let mut current_syntax =
Spencer Baugh
rust: simplify pattern file parsing...
r51750 default_syntax_override.unwrap_or(PatternSyntax::RelRegexp);
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Spencer Baugh
rust: simplify pattern file parsing...
r51750 for mut line in lines.split(|c| *c == b'\n') {
Yuya Nishihara
rust-filepatterns: unescape comment character property...
r42859 let line_buf;
Yuya Nishihara
rust-filepatterns: use literal b'#' instead of cast
r42858 if line.contains(&b'#') {
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 if let Some(cap) = comment_regex.captures(line) {
line = &line[..cap.get(1).unwrap().end()]
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Yuya Nishihara
rust-filepatterns: unescape comment character property...
r42859 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
line = &line_buf;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Spencer Baugh
rust: simplify pattern file parsing...
r51750 let line = line.trim_end();
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
if line.is_empty() {
continue;
}
Valentin Gatien-Baron
rustfilepatterns: refactor the pattern of removing a prefix from a &[u8]...
r43129 if let Some(syntax) = line.drop_prefix(b"syntax:") {
let syntax = syntax.trim();
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Spencer Baugh
rust: simplify pattern file parsing...
r51750 if let Some(parsed) =
SYNTAXES.get([syntax, &b":"[..]].concat().as_slice())
{
current_syntax = parsed.clone();
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 } else if warn {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 warnings.push(PatternFileWarning::InvalidSyntax(
Simon Sapin
rust: Make some file path parameters less generic...
r48169 file_path.to_owned(),
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 syntax.to_owned(),
));
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Spencer Baugh
rust: simplify pattern file parsing...
r51750 } else {
let pattern = parse_one_pattern(
line,
file_path,
current_syntax.clone(),
Spencer Baugh
rhg: support "status FILE"...
r51759 false,
Spencer Baugh
rust: simplify pattern file parsing...
r51750 );
inputs.push(if relativize {
pattern.to_relative()
} else {
pattern
})
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
}
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 Ok((inputs, warnings))
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Spencer Baugh
rhg: support "status FILE"...
r51759 pub fn parse_pattern_args(
patterns: Vec<Vec<u8>>,
cwd: &Path,
root: &Path,
) -> Result<Vec<IgnorePattern>, HgPathError> {
let mut ignore_patterns: Vec<IgnorePattern> = Vec::new();
for pattern in patterns {
let pattern = parse_one_pattern(
&pattern,
Path::new("<args>"),
PatternSyntax::RelPath,
true,
);
match pattern.syntax {
PatternSyntax::RelGlob | PatternSyntax::RelPath => {
let name = get_path_from_bytes(&pattern.pattern);
let canon = canonical_path(root, cwd, name)?;
ignore_patterns.push(IgnorePattern {
syntax: pattern.syntax,
pattern: get_bytes_from_path(canon),
source: pattern.source,
})
}
_ => ignore_patterns.push(pattern.to_owned()),
};
}
Ok(ignore_patterns)
}
Simon Sapin
rust: Make some file path parameters less generic...
r48169 pub fn read_pattern_file(
file_path: &Path,
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 warn: bool,
Raphaël Gomès
dirstate-v2: hash the source of the ignore patterns as well...
r50453 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 match std::fs::read(file_path) {
Ok(contents) => {
Raphaël Gomès
dirstate-v2: hash the source of the ignore patterns as well...
r50453 inspect_pattern_bytes(file_path, &contents);
Spencer Baugh
rust: simplify pattern file parsing...
r51750 parse_pattern_file_contents(&contents, file_path, None, warn, true)
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 }
Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok((
vec![],
vec![PatternFileWarning::NoSuchFile(file_path.to_owned())],
)),
Err(e) => Err(e.into()),
}
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 }
/// Represents an entry in an "ignore" file.
#[derive(Debug, Eq, PartialEq, Clone)]
pub struct IgnorePattern {
pub syntax: PatternSyntax,
pub pattern: Vec<u8>,
pub source: PathBuf,
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 impl IgnorePattern {
Simon Sapin
rust: Make some file path parameters less generic...
r48169 pub fn new(syntax: PatternSyntax, pattern: &[u8], source: &Path) -> Self {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 Self {
syntax,
pattern: pattern.to_owned(),
Simon Sapin
rust: Make some file path parameters less generic...
r48169 source: source.to_owned(),
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 }
}
Spencer Baugh
rust: simplify pattern file parsing...
r51750
pub fn to_relative(self) -> Self {
let Self {
syntax,
pattern,
source,
} = self;
Self {
syntax: match syntax {
PatternSyntax::Regexp => PatternSyntax::RelRegexp,
PatternSyntax::Glob => PatternSyntax::RelGlob,
x => x,
},
pattern,
source,
}
}
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 }
pub type PatternResult<T> = Result<T, PatternError>;
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 /// Wrapper for `read_pattern_file` that also recursively expands `include:`
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 /// and `subinclude:` patterns.
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 ///
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 /// The former are expanded in place, while `PatternSyntax::ExpandedSubInclude`
/// is used for the latter to form a tree of patterns.
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 pub fn get_patterns_from_file(
Simon Sapin
rust: Make some file path parameters less generic...
r48169 pattern_file: &Path,
root_dir: &Path,
Raphaël Gomès
dirstate-v2: hash the source of the ignore patterns as well...
r50453 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 let (patterns, mut warnings) =
read_pattern_file(pattern_file, true, inspect_pattern_bytes)?;
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 let patterns = patterns
.into_iter()
.flat_map(|entry| -> PatternResult<_> {
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 Ok(match &entry.syntax {
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 PatternSyntax::Include => {
let inner_include =
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 root_dir.join(get_path_from_bytes(&entry.pattern));
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 let (inner_pats, inner_warnings) = get_patterns_from_file(
&inner_include,
root_dir,
inspect_pattern_bytes,
)?;
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 warnings.extend(inner_warnings);
inner_pats
}
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 PatternSyntax::SubInclude => {
let mut sub_include = SubInclude::new(
Raphaël Gomès
rust-clippy: fix most warnings in `hg-core`...
r50825 root_dir,
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 &entry.pattern,
&entry.source,
)?;
let (inner_patterns, inner_warnings) =
get_patterns_from_file(
&sub_include.path,
&sub_include.root,
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 inspect_pattern_bytes,
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 )?;
sub_include.included_patterns = inner_patterns;
warnings.extend(inner_warnings);
vec![IgnorePattern {
syntax: PatternSyntax::ExpandedSubInclude(Box::new(
sub_include,
)),
..entry
}]
}
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 _ => vec![entry],
})
})
.flatten()
.collect();
Ok((patterns, warnings))
}
/// Holds all the information needed to handle a `subinclude:` pattern.
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 #[derive(Debug, PartialEq, Eq, Clone)]
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 pub struct SubInclude {
/// Will be used for repository (hg) paths that start with this prefix.
/// It is relative to the current working directory, so comparing against
/// repository paths is painless.
pub prefix: HgPathBuf,
/// The file itself, containing the patterns
pub path: PathBuf,
/// Folder in the filesystem where this it applies
pub root: PathBuf,
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170
pub included_patterns: Vec<IgnorePattern>,
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 }
impl SubInclude {
pub fn new(
Simon Sapin
rust: Make some file path parameters less generic...
r48169 root_dir: &Path,
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 pattern: &[u8],
Simon Sapin
rust: Make some file path parameters less generic...
r48169 source: &Path,
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 ) -> Result<SubInclude, HgPathError> {
let normalized_source =
normalize_path_bytes(&get_bytes_from_path(source));
let source_root = get_path_from_bytes(&normalized_source);
Raphaël Gomès
rust: run a clippy pass with the latest stable version...
r52013 let source_root = source_root.parent().unwrap_or(source_root);
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785
let path = source_root.join(get_path_from_bytes(pattern));
Raphaël Gomès
rust: do a clippy pass...
r45500 let new_root = path.parent().unwrap_or_else(|| path.deref());
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785
Simon Sapin
rust: Make some file path parameters less generic...
r48169 let prefix = canonical_path(root_dir, root_dir, new_root)?;
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785
Ok(Self {
Raphaël Gomès
rust-clippy: fix most warnings in `hg-core`...
r50825 prefix: path_to_hg_path_buf(prefix).map(|mut p| {
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 if !p.is_empty() {
Arseniy Alekseyev
rhg: more efficient `HgPath::join`...
r49132 p.push_byte(b'/');
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 }
Raphaël Gomès
rust-clippy: fix most warnings in `hg-core`...
r50825 p
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 })?,
path: path.to_owned(),
root: new_root.to_owned(),
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 included_patterns: Vec::new(),
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 })
}
}
/// Separate and pre-process subincludes from other patterns for the "ignore"
/// phase.
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 pub fn filter_subincludes(
ignore_patterns: Vec<IgnorePattern>,
Raphaël Gomès
rust-filepatterns: don't `Box` subincludes unnecessarily...
r50823 ) -> Result<(Vec<SubInclude>, Vec<IgnorePattern>), HgPathError> {
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 let mut subincludes = vec![];
let mut others = vec![];
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 for pattern in ignore_patterns {
if let PatternSyntax::ExpandedSubInclude(sub_include) = pattern.syntax
{
Raphaël Gomès
rust-filepatterns: don't `Box` subincludes unnecessarily...
r50823 subincludes.push(*sub_include);
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 } else {
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 others.push(pattern)
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 }
}
Ok((subincludes, others))
}
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 #[cfg(test)]
mod tests {
use super::*;
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 use pretty_assertions::assert_eq;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
#[test]
fn escape_pattern_test() {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 let untouched =
br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 assert_eq!(escape_pattern(untouched), untouched.to_vec());
// All escape codes
assert_eq!(
Raphaël Gomès
rust: run a clippy pass with the latest stable version...
r52013 escape_pattern(br"()[]{}?*+-|^$\\.&~#\t\n\r\v\f"),
br"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\\t\\n\\r\\v\\f".to_vec()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 );
}
#[test]
fn glob_test() {
Raphaël Gomès
rust: run a clippy pass with the latest stable version...
r52013 assert_eq!(glob_to_re(br"?"), br".");
assert_eq!(glob_to_re(br"*"), br"[^/]*");
assert_eq!(glob_to_re(br"**"), br".*");
assert_eq!(glob_to_re(br"**/a"), br"(?:.*/)?a");
assert_eq!(glob_to_re(br"a/**/b"), br"a/(?:.*/)?b");
assert_eq!(glob_to_re(br"[a*?!^][^b][!c]"), br"[a*?!^][\^b][^c]");
assert_eq!(glob_to_re(br"{a,b}"), br"(?:a|b)");
assert_eq!(glob_to_re(br".\*\?"), br"\.\*\?");
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
#[test]
fn test_parse_pattern_file_contents() {
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let lines = b"syntax: glob\n*.elc";
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
assert_eq!(
Raphaël Gomès
rust-filepatterns: allow overriding default syntax...
r50377 parse_pattern_file_contents(
lines,
Path::new("file_path"),
None,
Spencer Baugh
rust: simplify pattern file parsing...
r51750 false,
true,
Raphaël Gomès
rust-filepatterns: allow overriding default syntax...
r50377 )
.unwrap()
.0,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 vec![IgnorePattern::new(
PatternSyntax::RelGlob,
b"*.elc",
Path::new("file_path")
)],
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 );
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let lines = b"syntax: include\nsyntax: glob";
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
assert_eq!(
Raphaël Gomès
rust-filepatterns: allow overriding default syntax...
r50377 parse_pattern_file_contents(
lines,
Path::new("file_path"),
None,
Spencer Baugh
rust: simplify pattern file parsing...
r51750 false,
true,
Raphaël Gomès
rust-filepatterns: allow overriding default syntax...
r50377 )
.unwrap()
.0,
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 vec![]
);
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let lines = b"glob:**.o";
assert_eq!(
Raphaël Gomès
rust-filepatterns: allow overriding default syntax...
r50377 parse_pattern_file_contents(
lines,
Path::new("file_path"),
None,
Spencer Baugh
rust: simplify pattern file parsing...
r51750 false,
true,
Raphaël Gomès
rust-filepatterns: allow overriding default syntax...
r50377 )
.unwrap()
.0,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 vec![IgnorePattern::new(
PatternSyntax::RelGlob,
b"**.o",
Path::new("file_path")
)]
);
}
#[test]
fn test_build_single_regex() {
assert_eq!(
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 build_single_regex(
&IgnorePattern::new(
PatternSyntax::RelGlob,
b"rust/target/",
Path::new("")
),
b"(?:/|$)"
)
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 .unwrap(),
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 );
Raphaël Gomès
rust-regex: add test for verbatim regex syntax...
r45349 assert_eq!(
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 build_single_regex(
&IgnorePattern::new(
PatternSyntax::Regexp,
br"rust/target/\d+",
Path::new("")
),
b"(?:/|$)"
)
Raphaël Gomès
rust-regex: add test for verbatim regex syntax...
r45349 .unwrap(),
Some(br"rust/target/\d+".to_vec()),
);
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Raphaël Gomès
rust-regex: fix shortcut for exact matches...
r42631
#[test]
fn test_build_single_regex_shortcut() {
assert_eq!(
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 build_single_regex(
&IgnorePattern::new(
PatternSyntax::RootGlob,
b"",
Path::new("")
),
b"(?:/|$)"
)
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 .unwrap(),
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 None,
Raphaël Gomès
rust-regex: fix shortcut for exact matches...
r42631 );
assert_eq!(
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 build_single_regex(
&IgnorePattern::new(
PatternSyntax::RootGlob,
b"whatever",
Path::new("")
),
b"(?:/|$)"
)
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 .unwrap(),
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 None,
Raphaël Gomès
rust-regex: fix shortcut for exact matches...
r42631 );
assert_eq!(
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 build_single_regex(
&IgnorePattern::new(
PatternSyntax::RootGlob,
b"*.o",
Path::new("")
),
b"(?:/|$)"
)
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 .unwrap(),
Raphaël Gomès
rust-regex: fix issues with regex anchoring and performance...
r45347 Some(br"[^/]*\.o(?:/|$)".to_vec()),
Raphaël Gomès
rust-regex: fix shortcut for exact matches...
r42631 );
}
matcher: fix the issue with regex inline-flag in rust oo...
r50499
#[test]
fn test_build_single_relregex() {
assert_eq!(
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 build_single_regex(
&IgnorePattern::new(
PatternSyntax::RelRegexp,
b"^ba{2}r",
Path::new("")
),
b"(?:/|$)"
)
matcher: fix the issue with regex inline-flag in rust oo...
r50499 .unwrap(),
Some(b"^ba{2}r".to_vec()),
);
assert_eq!(
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 build_single_regex(
&IgnorePattern::new(
PatternSyntax::RelRegexp,
b"ba{2}r",
Path::new("")
),
b"(?:/|$)"
)
matcher: fix the issue with regex inline-flag in rust oo...
r50499 .unwrap(),
Some(b".*ba{2}r".to_vec()),
);
assert_eq!(
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 build_single_regex(
&IgnorePattern::new(
PatternSyntax::RelRegexp,
b"(?ia)ba{2}r",
Path::new("")
),
b"(?:/|$)"
)
matcher: fix the issue with regex inline-flag in rust oo...
r50499 .unwrap(),
Some(b"(?ia:.*ba{2}r)".to_vec()),
);
matcher: do not prepend '.*' to pattern using ^ after flags...
r50500 assert_eq!(
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 build_single_regex(
&IgnorePattern::new(
PatternSyntax::RelRegexp,
b"(?ia)^ba{2}r",
Path::new("")
),
b"(?:/|$)"
)
matcher: do not prepend '.*' to pattern using ^ after flags...
r50500 .unwrap(),
Some(b"(?ia:^ba{2}r)".to_vec()),
);
matcher: fix the issue with regex inline-flag in rust oo...
r50499 }
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }