##// END OF EJS Templates
rust-revlog: rename `end` to `data_end`...
rust-revlog: rename `end` to `data_end` This is more explicit.

File last commit:

r52498:e4b9f8a7 default
r53194:7ffc7155 default
Show More
filepatterns.rs
874 lines | 26.5 KiB | application/rls-services+xml | RustLexer
Raphaël Gomès
rust-docstrings: add missing module docstrings...
r42996 // filepatterns.rs
//
// Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
//! Handling of Mercurial-specific patterns.
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 use crate::{
utils::{
files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
SliceExt,
},
FastHashMap, PatternError,
};
Raphaël Gomès
rust: switch hg-core and hg-cpython to rust 2018 edition...
r42828 use lazy_static::lazy_static;
Yuya Nishihara
rust-filepatterns: unescape comment character property...
r42859 use regex::bytes::{NoExpand, Regex};
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 use std::ops::Deref;
Raphaël Gomès
rust-hgpath: replace all paths and filenames with HgPath/HgPathBuf...
r43227 use std::path::{Path, PathBuf};
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 use std::vec::Vec;
lazy_static! {
Yuya Nishihara
rust-filepatterns: silence warning of non_upper_case_globals
r42683 static ref RE_ESCAPE: Vec<Vec<u8>> = {
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
Spencer Baugh
rust: don't escape spaces in regex...
r51752 let to_escape = b"()[]{}?*+-|^$\\.&~#\t\n\r\x0b\x0c";
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 for byte in to_escape {
v[*byte as usize].insert(0, b'\\');
}
v
};
}
/// These are matched in order
const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
&[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 #[derive(Debug, Clone, PartialEq, Eq)]
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 pub enum PatternSyntax {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// A regular expression
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 Regexp,
/// Glob that matches at the front of the path
RootGlob,
Yuya Nishihara
rust: apply more formatting fixes...
r43109 /// Glob that matches at any suffix of the path (still anchored at
/// slashes)
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 Glob,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// a path relative to repository root, which is matched recursively
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 Path,
Raphaël Gomès
match: add `filepath:` pattern to match an exact filepath relative to the root...
r51588 /// a single exact path relative to repository root
FilePath,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// A path relative to cwd
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 RelPath,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// an unrooted glob (*.rs matches Rust files in all dirs)
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 RelGlob,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// A regexp that needn't match the start of a name
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 RelRegexp,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// A path relative to repository root, which is matched non-recursively
/// (will not match subdirectories)
Arseniy Alekseyev
match: rename RootFiles to RootFilesIn for more consistency
r52461 RootFilesIn,
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 /// A file of patterns to read and include
Include,
/// A file of patterns to match against files under the same directory
SubInclude,
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 /// SubInclude with the result of parsing the included file
///
/// Note: there is no ExpandedInclude because that expansion can be done
/// in place by replacing the Include pattern by the included patterns.
/// SubInclude requires more handling.
///
/// Note: `Box` is used to minimize size impact on other enum variants
ExpandedSubInclude(Box<SubInclude>),
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
/// Transforms a glob pattern into a regex
Georges Racinet
rust-filepatterns: export glob_to_re function...
r52363 pub fn glob_to_re(pat: &[u8]) -> Vec<u8> {
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 let mut input = pat;
let mut res: Vec<u8> = vec![];
let mut group_depth = 0;
while let Some((c, rest)) = input.split_first() {
input = rest;
match c {
b'*' => {
for (source, repl) in GLOB_REPLACEMENTS {
Valentin Gatien-Baron
rustfilepatterns: refactor the pattern of removing a prefix from a &[u8]...
r43129 if let Some(rest) = input.drop_prefix(source) {
input = rest;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 res.extend(*repl);
break;
}
}
}
b'?' => res.extend(b"."),
b'[' => {
match input.iter().skip(1).position(|b| *b == b']') {
None => res.extend(b"\\["),
Some(end) => {
// Account for the one we skipped
let end = end + 1;
res.extend(b"[");
for (i, b) in input[..end].iter().enumerate() {
if *b == b'!' && i == 0 {
res.extend(b"^")
} else if *b == b'^' && i == 0 {
res.extend(b"\\^")
} else if *b == b'\\' {
res.extend(b"\\\\")
} else {
res.push(*b)
}
}
res.extend(b"]");
input = &input[end + 1..];
}
}
}
b'{' => {
group_depth += 1;
res.extend(b"(?:")
}
b'}' if group_depth > 0 => {
group_depth -= 1;
res.extend(b")");
}
b',' if group_depth > 0 => res.extend(b"|"),
b'\\' => {
let c = {
if let Some((c, rest)) = input.split_first() {
input = rest;
c
} else {
c
}
};
Yuya Nishihara
rust-filepatterns: silence warning of non_upper_case_globals
r42683 res.extend(&RE_ESCAPE[*c as usize])
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Yuya Nishihara
rust-filepatterns: silence warning of non_upper_case_globals
r42683 _ => res.extend(&RE_ESCAPE[*c as usize]),
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
}
res
}
fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
pattern
.iter()
Yuya Nishihara
rust-filepatterns: silence warning of non_upper_case_globals
r42683 .flat_map(|c| RE_ESCAPE[*c as usize].clone())
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 .collect()
}
Arseniy Alekseyev
match: simplify the rust-side file pattern kind parsing...
r52498 pub fn parse_pattern_syntax_kind(
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 kind: &[u8],
) -> Result<PatternSyntax, PatternError> {
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 match kind {
Arseniy Alekseyev
match: simplify the rust-side file pattern kind parsing...
r52498 b"re" => Ok(PatternSyntax::Regexp),
b"path" => Ok(PatternSyntax::Path),
b"filepath" => Ok(PatternSyntax::FilePath),
b"relpath" => Ok(PatternSyntax::RelPath),
b"rootfilesin" => Ok(PatternSyntax::RootFilesIn),
b"relglob" => Ok(PatternSyntax::RelGlob),
b"relre" => Ok(PatternSyntax::RelRegexp),
b"glob" => Ok(PatternSyntax::Glob),
b"rootglob" => Ok(PatternSyntax::RootGlob),
b"include" => Ok(PatternSyntax::Include),
b"subinclude" => Ok(PatternSyntax::SubInclude),
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 _ => Err(PatternError::UnsupportedSyntax(
String::from_utf8_lossy(kind).to_string(),
)),
}
}
matcher: fix the issue with regex inline-flag in rust oo...
r50499 lazy_static! {
static ref FLAG_RE: Regex = Regex::new(r"^\(\?[aiLmsux]+\)").unwrap();
}
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 /// Builds the regex that corresponds to the given pattern.
/// If within a `syntax: regexp` context, returns the pattern,
/// otherwise, returns the corresponding regex.
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 fn _build_single_regex(entry: &IgnorePattern, glob_suffix: &[u8]) -> Vec<u8> {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 let IgnorePattern {
syntax, pattern, ..
} = entry;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 if pattern.is_empty() {
return vec![];
}
match syntax {
Raphaël Gomès
rust-regex: fix issues with regex anchoring and performance...
r45347 PatternSyntax::Regexp => pattern.to_owned(),
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 PatternSyntax::RelRegexp => {
Raphaël Gomès
rust-matchers: use the `regex` crate...
r45084 // The `regex` crate accepts `**` while `re2` and Python's `re`
// do not. Checking for `*` correctly triggers the same error all
// engines.
Raphaël Gomès
rust-regex: prevent nonsensical `.*.*` pattern from happening...
r45348 if pattern[0] == b'^'
|| pattern[0] == b'*'
|| pattern.starts_with(b".*")
{
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 return pattern.to_owned();
}
matcher: fix the issue with regex inline-flag in rust oo...
r50499 match FLAG_RE.find(pattern) {
Some(mat) => {
let s = mat.start();
let e = mat.end();
[
&b"(?"[..],
&pattern[s + 2..e - 1],
&b":"[..],
matcher: do not prepend '.*' to pattern using ^ after flags...
r50500 if pattern[e] == b'^'
|| pattern[e] == b'*'
|| pattern[e..].starts_with(b".*")
{
&b""[..]
} else {
&b".*"[..]
},
matcher: fix the issue with regex inline-flag in rust oo...
r50499 &pattern[e..],
&b")"[..],
]
.concat()
}
None => [&b".*"[..], pattern].concat(),
}
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
PatternSyntax::Path | PatternSyntax::RelPath => {
if pattern == b"." {
return vec![];
}
Valentin Gatien-Baron
rustfilepatterns: shorter code for concatenating slices...
r43133 [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Arseniy Alekseyev
match: rename RootFiles to RootFilesIn for more consistency
r52461 PatternSyntax::RootFilesIn => {
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 let mut res = if pattern == b"." {
Raphaël Gomès
rust-regex: fix issues with regex anchoring and performance...
r45347 vec![]
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 } else {
// Pattern is a directory name.
Raphaël Gomès
rust-regex: fix issues with regex anchoring and performance...
r45347 [escape_pattern(pattern).as_slice(), b"/"].concat()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 };
// Anything after the pattern must be a non-directory.
res.extend(b"[^/]+$");
res
}
Valentin Gatien-Baron
match: simplify the regexps created for glob patterns...
r43132 PatternSyntax::RelGlob => {
let glob_re = glob_to_re(pattern);
if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 [b".*", rest, glob_suffix].concat()
Valentin Gatien-Baron
match: simplify the regexps created for glob patterns...
r43132 } else {
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 [b"(?:.*/)?", glob_re.as_slice(), glob_suffix].concat()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Valentin Gatien-Baron
match: simplify the regexps created for glob patterns...
r43132 }
PatternSyntax::Glob | PatternSyntax::RootGlob => {
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 [glob_to_re(pattern).as_slice(), glob_suffix].concat()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 PatternSyntax::Include
| PatternSyntax::SubInclude
Raphaël Gomès
match: add `filepath:` pattern to match an exact filepath relative to the root...
r51588 | PatternSyntax::ExpandedSubInclude(_)
| PatternSyntax::FilePath => unreachable!(),
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
}
const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
[b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 /// TODO support other platforms
#[cfg(unix)]
pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
if bytes.is_empty() {
return b".".to_vec();
}
let sep = b'/';
let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
if initial_slashes > 2 {
// POSIX allows one or two initial slashes, but treats three or more
// as single slash.
initial_slashes = 1;
}
let components = bytes
.split(|b| *b == sep)
.filter(|c| !(c.is_empty() || c == b"."))
.fold(vec![], |mut acc, component| {
if component != b".."
|| (initial_slashes == 0 && acc.is_empty())
|| (!acc.is_empty() && acc[acc.len() - 1] == b"..")
{
acc.push(component)
} else if !acc.is_empty() {
acc.pop();
}
acc
});
let mut new_bytes = components.join(&sep);
if initial_slashes > 0 {
let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
buf.extend(new_bytes);
new_bytes = buf;
}
if new_bytes.is_empty() {
b".".to_vec()
} else {
new_bytes
}
}
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
/// that don't need to be transformed into a regex.
pub fn build_single_regex(
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 entry: &IgnorePattern,
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 glob_suffix: &[u8],
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 ) -> Result<Option<Vec<u8>>, PatternError> {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 let IgnorePattern {
pattern, syntax, ..
} = entry;
let pattern = match syntax {
PatternSyntax::RootGlob
| PatternSyntax::Path
| PatternSyntax::RelGlob
Spencer Baugh
rust-filepatterns: also normalize RelPath...
r51751 | PatternSyntax::RelPath
Arseniy Alekseyev
match: rename RootFiles to RootFilesIn for more consistency
r52461 | PatternSyntax::RootFilesIn => normalize_path_bytes(pattern),
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 PatternSyntax::Include | PatternSyntax::SubInclude => {
return Err(PatternError::NonRegexPattern(entry.clone()))
}
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 _ => pattern.to_owned(),
};
Raphaël Gomès
match: add `filepath:` pattern to match an exact filepath relative to the root...
r51588 let is_simple_rootglob = *syntax == PatternSyntax::RootGlob
&& !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b));
if is_simple_rootglob || syntax == &PatternSyntax::FilePath {
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 Ok(None)
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 } else {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 let mut entry = entry.clone();
entry.pattern = pattern;
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 Ok(Some(_build_single_regex(&entry, glob_suffix)))
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
}
lazy_static! {
Spencer Baugh
rust: simplify pattern file parsing...
r51750 static ref SYNTAXES: FastHashMap<&'static [u8], PatternSyntax> = {
Raphaël Gomès
rust-performance: introduce FastHashMap type alias for HashMap...
r44278 let mut m = FastHashMap::default();
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Spencer Baugh
rust: simplify pattern file parsing...
r51750 m.insert(b"re:".as_ref(), PatternSyntax::Regexp);
m.insert(b"regexp:".as_ref(), PatternSyntax::Regexp);
m.insert(b"path:".as_ref(), PatternSyntax::Path);
m.insert(b"filepath:".as_ref(), PatternSyntax::FilePath);
m.insert(b"relpath:".as_ref(), PatternSyntax::RelPath);
Arseniy Alekseyev
match: rename RootFiles to RootFilesIn for more consistency
r52461 m.insert(b"rootfilesin:".as_ref(), PatternSyntax::RootFilesIn);
Spencer Baugh
rust: simplify pattern file parsing...
r51750 m.insert(b"relglob:".as_ref(), PatternSyntax::RelGlob);
m.insert(b"relre:".as_ref(), PatternSyntax::RelRegexp);
m.insert(b"glob:".as_ref(), PatternSyntax::Glob);
m.insert(b"rootglob:".as_ref(), PatternSyntax::RootGlob);
m.insert(b"include:".as_ref(), PatternSyntax::Include);
m.insert(b"subinclude:".as_ref(), PatternSyntax::SubInclude);
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 m
};
}
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 #[derive(Debug)]
pub enum PatternFileWarning {
/// (file path, syntax bytes)
InvalidSyntax(PathBuf, Vec<u8>),
/// File path
NoSuchFile(PathBuf),
}
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Spencer Baugh
rust: simplify pattern file parsing...
r51750 pub fn parse_one_pattern(
pattern: &[u8],
source: &Path,
default: PatternSyntax,
Spencer Baugh
rhg: support "status FILE"...
r51759 normalize: bool,
Spencer Baugh
rust: simplify pattern file parsing...
r51750 ) -> IgnorePattern {
let mut pattern_bytes: &[u8] = pattern;
let mut syntax = default;
for (s, val) in SYNTAXES.iter() {
if let Some(rest) = pattern_bytes.drop_prefix(s) {
syntax = val.clone();
pattern_bytes = rest;
break;
}
}
Spencer Baugh
rhg: support "status FILE"...
r51759 let pattern = match syntax {
PatternSyntax::RootGlob
| PatternSyntax::Path
| PatternSyntax::Glob
| PatternSyntax::RelGlob
| PatternSyntax::RelPath
Arseniy Alekseyev
match: rename RootFiles to RootFilesIn for more consistency
r52461 | PatternSyntax::RootFilesIn
Spencer Baugh
rhg: support "status FILE"...
r51759 if normalize =>
{
normalize_path_bytes(pattern_bytes)
}
_ => pattern_bytes.to_vec(),
};
Spencer Baugh
rust: simplify pattern file parsing...
r51750
IgnorePattern {
syntax,
pattern,
source: source.to_owned(),
}
}
Simon Sapin
rust: Make some file path parameters less generic...
r48169 pub fn parse_pattern_file_contents(
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 lines: &[u8],
Simon Sapin
rust: Make some file path parameters less generic...
r48169 file_path: &Path,
Spencer Baugh
rust: simplify pattern file parsing...
r51750 default_syntax_override: Option<PatternSyntax>,
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 warn: bool,
Spencer Baugh
rust: simplify pattern file parsing...
r51750 relativize: bool,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
Raphaël Gomès
rust: do a clippy pass...
r45500
#[allow(clippy::trivial_regex)]
Yuya Nishihara
rust-filepatterns: unescape comment character property...
r42859 let comment_escape_regex = Regex::new(r"\\#").unwrap();
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 let mut inputs: Vec<IgnorePattern> = vec![];
let mut warnings: Vec<PatternFileWarning> = vec![];
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Raphaël Gomès
rust-filepatterns: allow overriding default syntax...
r50377 let mut current_syntax =
Spencer Baugh
rust: simplify pattern file parsing...
r51750 default_syntax_override.unwrap_or(PatternSyntax::RelRegexp);
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Spencer Baugh
rust: simplify pattern file parsing...
r51750 for mut line in lines.split(|c| *c == b'\n') {
Yuya Nishihara
rust-filepatterns: unescape comment character property...
r42859 let line_buf;
Yuya Nishihara
rust-filepatterns: use literal b'#' instead of cast
r42858 if line.contains(&b'#') {
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 if let Some(cap) = comment_regex.captures(line) {
line = &line[..cap.get(1).unwrap().end()]
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Yuya Nishihara
rust-filepatterns: unescape comment character property...
r42859 line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
line = &line_buf;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Spencer Baugh
rust: simplify pattern file parsing...
r51750 let line = line.trim_end();
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
if line.is_empty() {
continue;
}
Valentin Gatien-Baron
rustfilepatterns: refactor the pattern of removing a prefix from a &[u8]...
r43129 if let Some(syntax) = line.drop_prefix(b"syntax:") {
let syntax = syntax.trim();
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
Spencer Baugh
rust: simplify pattern file parsing...
r51750 if let Some(parsed) =
SYNTAXES.get([syntax, &b":"[..]].concat().as_slice())
{
current_syntax = parsed.clone();
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 } else if warn {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 warnings.push(PatternFileWarning::InvalidSyntax(
Simon Sapin
rust: Make some file path parameters less generic...
r48169 file_path.to_owned(),
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 syntax.to_owned(),
));
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Spencer Baugh
rust: simplify pattern file parsing...
r51750 } else {
let pattern = parse_one_pattern(
line,
file_path,
current_syntax.clone(),
Spencer Baugh
rhg: support "status FILE"...
r51759 false,
Spencer Baugh
rust: simplify pattern file parsing...
r51750 );
inputs.push(if relativize {
pattern.to_relative()
} else {
pattern
})
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
}
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 Ok((inputs, warnings))
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Spencer Baugh
rhg: support "status FILE"...
r51759 pub fn parse_pattern_args(
patterns: Vec<Vec<u8>>,
cwd: &Path,
root: &Path,
) -> Result<Vec<IgnorePattern>, HgPathError> {
let mut ignore_patterns: Vec<IgnorePattern> = Vec::new();
for pattern in patterns {
let pattern = parse_one_pattern(
&pattern,
Path::new("<args>"),
PatternSyntax::RelPath,
true,
);
match pattern.syntax {
PatternSyntax::RelGlob | PatternSyntax::RelPath => {
let name = get_path_from_bytes(&pattern.pattern);
let canon = canonical_path(root, cwd, name)?;
ignore_patterns.push(IgnorePattern {
syntax: pattern.syntax,
pattern: get_bytes_from_path(canon),
source: pattern.source,
})
}
_ => ignore_patterns.push(pattern.to_owned()),
};
}
Ok(ignore_patterns)
}
Simon Sapin
rust: Make some file path parameters less generic...
r48169 pub fn read_pattern_file(
file_path: &Path,
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 warn: bool,
Raphaël Gomès
dirstate-v2: hash the source of the ignore patterns as well...
r50453 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 match std::fs::read(file_path) {
Ok(contents) => {
Raphaël Gomès
dirstate-v2: hash the source of the ignore patterns as well...
r50453 inspect_pattern_bytes(file_path, &contents);
Spencer Baugh
rust: simplify pattern file parsing...
r51750 parse_pattern_file_contents(&contents, file_path, None, warn, true)
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 }
Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok((
vec![],
vec![PatternFileWarning::NoSuchFile(file_path.to_owned())],
)),
Err(e) => Err(e.into()),
}
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 }
/// Represents an entry in an "ignore" file.
#[derive(Debug, Eq, PartialEq, Clone)]
pub struct IgnorePattern {
pub syntax: PatternSyntax,
pub pattern: Vec<u8>,
pub source: PathBuf,
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 impl IgnorePattern {
Simon Sapin
rust: Make some file path parameters less generic...
r48169 pub fn new(syntax: PatternSyntax, pattern: &[u8], source: &Path) -> Self {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 Self {
syntax,
pattern: pattern.to_owned(),
Simon Sapin
rust: Make some file path parameters less generic...
r48169 source: source.to_owned(),
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 }
}
Spencer Baugh
rust: simplify pattern file parsing...
r51750
pub fn to_relative(self) -> Self {
let Self {
syntax,
pattern,
source,
} = self;
Self {
syntax: match syntax {
PatternSyntax::Regexp => PatternSyntax::RelRegexp,
PatternSyntax::Glob => PatternSyntax::RelGlob,
x => x,
},
pattern,
source,
}
}
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 }
pub type PatternResult<T> = Result<T, PatternError>;
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 /// Wrapper for `read_pattern_file` that also recursively expands `include:`
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 /// and `subinclude:` patterns.
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 ///
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 /// The former are expanded in place, while `PatternSyntax::ExpandedSubInclude`
/// is used for the latter to form a tree of patterns.
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 pub fn get_patterns_from_file(
Simon Sapin
rust: Make some file path parameters less generic...
r48169 pattern_file: &Path,
root_dir: &Path,
Raphaël Gomès
dirstate-v2: hash the source of the ignore patterns as well...
r50453 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 let (patterns, mut warnings) =
read_pattern_file(pattern_file, true, inspect_pattern_bytes)?;
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 let patterns = patterns
.into_iter()
.flat_map(|entry| -> PatternResult<_> {
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 Ok(match &entry.syntax {
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 PatternSyntax::Include => {
let inner_include =
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 root_dir.join(get_path_from_bytes(&entry.pattern));
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 let (inner_pats, inner_warnings) = get_patterns_from_file(
&inner_include,
root_dir,
inspect_pattern_bytes,
)?;
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 warnings.extend(inner_warnings);
inner_pats
}
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 PatternSyntax::SubInclude => {
let mut sub_include = SubInclude::new(
Raphaël Gomès
rust-clippy: fix most warnings in `hg-core`...
r50825 root_dir,
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 &entry.pattern,
&entry.source,
)?;
let (inner_patterns, inner_warnings) =
get_patterns_from_file(
&sub_include.path,
&sub_include.root,
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 inspect_pattern_bytes,
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 )?;
sub_include.included_patterns = inner_patterns;
warnings.extend(inner_warnings);
vec![IgnorePattern {
syntax: PatternSyntax::ExpandedSubInclude(Box::new(
sub_include,
)),
..entry
}]
}
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 _ => vec![entry],
})
})
.flatten()
.collect();
Ok((patterns, warnings))
}
/// Holds all the information needed to handle a `subinclude:` pattern.
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 #[derive(Debug, PartialEq, Eq, Clone)]
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 pub struct SubInclude {
/// Will be used for repository (hg) paths that start with this prefix.
/// It is relative to the current working directory, so comparing against
/// repository paths is painless.
pub prefix: HgPathBuf,
/// The file itself, containing the patterns
pub path: PathBuf,
/// Folder in the filesystem where this it applies
pub root: PathBuf,
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170
pub included_patterns: Vec<IgnorePattern>,
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 }
impl SubInclude {
pub fn new(
Simon Sapin
rust: Make some file path parameters less generic...
r48169 root_dir: &Path,
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 pattern: &[u8],
Simon Sapin
rust: Make some file path parameters less generic...
r48169 source: &Path,
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 ) -> Result<SubInclude, HgPathError> {
let normalized_source =
normalize_path_bytes(&get_bytes_from_path(source));
let source_root = get_path_from_bytes(&normalized_source);
Raphaël Gomès
rust: run a clippy pass with the latest stable version...
r52013 let source_root = source_root.parent().unwrap_or(source_root);
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785
let path = source_root.join(get_path_from_bytes(pattern));
Raphaël Gomès
rust: do a clippy pass...
r45500 let new_root = path.parent().unwrap_or_else(|| path.deref());
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785
Simon Sapin
rust: Make some file path parameters less generic...
r48169 let prefix = canonical_path(root_dir, root_dir, new_root)?;
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785
Ok(Self {
Raphaël Gomès
rust-clippy: fix most warnings in `hg-core`...
r50825 prefix: path_to_hg_path_buf(prefix).map(|mut p| {
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 if !p.is_empty() {
Arseniy Alekseyev
rhg: more efficient `HgPath::join`...
r49132 p.push_byte(b'/');
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 }
Raphaël Gomès
rust-clippy: fix most warnings in `hg-core`...
r50825 p
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 })?,
path: path.to_owned(),
root: new_root.to_owned(),
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 included_patterns: Vec::new(),
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 })
}
}
/// Separate and pre-process subincludes from other patterns for the "ignore"
/// phase.
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 pub fn filter_subincludes(
ignore_patterns: Vec<IgnorePattern>,
Raphaël Gomès
rust-filepatterns: don't `Box` subincludes unnecessarily...
r50823 ) -> Result<(Vec<SubInclude>, Vec<IgnorePattern>), HgPathError> {
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 let mut subincludes = vec![];
let mut others = vec![];
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 for pattern in ignore_patterns {
if let PatternSyntax::ExpandedSubInclude(sub_include) = pattern.syntax
{
Raphaël Gomès
rust-filepatterns: don't `Box` subincludes unnecessarily...
r50823 subincludes.push(*sub_include);
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 } else {
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 others.push(pattern)
Raphaël Gomès
rust-filepatterns: add support for `include` and `subinclude` patterns...
r44785 }
}
Ok((subincludes, others))
}
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 #[cfg(test)]
mod tests {
use super::*;
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 use pretty_assertions::assert_eq;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
#[test]
fn escape_pattern_test() {
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 let untouched =
br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 assert_eq!(escape_pattern(untouched), untouched.to_vec());
// All escape codes
assert_eq!(
Raphaël Gomès
rust: run a clippy pass with the latest stable version...
r52013 escape_pattern(br"()[]{}?*+-|^$\\.&~#\t\n\r\v\f"),
br"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\\t\\n\\r\\v\\f".to_vec()
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 );
}
#[test]
fn glob_test() {
Raphaël Gomès
rust: run a clippy pass with the latest stable version...
r52013 assert_eq!(glob_to_re(br"?"), br".");
assert_eq!(glob_to_re(br"*"), br"[^/]*");
assert_eq!(glob_to_re(br"**"), br".*");
assert_eq!(glob_to_re(br"**/a"), br"(?:.*/)?a");
assert_eq!(glob_to_re(br"a/**/b"), br"a/(?:.*/)?b");
assert_eq!(glob_to_re(br"[a*?!^][^b][!c]"), br"[a*?!^][\^b][^c]");
assert_eq!(glob_to_re(br"{a,b}"), br"(?:a|b)");
assert_eq!(glob_to_re(br".\*\?"), br"\.\*\?");
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
#[test]
fn test_parse_pattern_file_contents() {
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let lines = b"syntax: glob\n*.elc";
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
assert_eq!(
Raphaël Gomès
rust-filepatterns: allow overriding default syntax...
r50377 parse_pattern_file_contents(
lines,
Path::new("file_path"),
None,
Spencer Baugh
rust: simplify pattern file parsing...
r51750 false,
true,
Raphaël Gomès
rust-filepatterns: allow overriding default syntax...
r50377 )
.unwrap()
.0,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 vec![IgnorePattern::new(
PatternSyntax::RelGlob,
b"*.elc",
Path::new("file_path")
)],
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 );
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let lines = b"syntax: include\nsyntax: glob";
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514
assert_eq!(
Raphaël Gomès
rust-filepatterns: allow overriding default syntax...
r50377 parse_pattern_file_contents(
lines,
Path::new("file_path"),
None,
Spencer Baugh
rust: simplify pattern file parsing...
r51750 false,
true,
Raphaël Gomès
rust-filepatterns: allow overriding default syntax...
r50377 )
.unwrap()
.0,
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 vec![]
);
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 let lines = b"glob:**.o";
assert_eq!(
Raphaël Gomès
rust-filepatterns: allow overriding default syntax...
r50377 parse_pattern_file_contents(
lines,
Path::new("file_path"),
None,
Spencer Baugh
rust: simplify pattern file parsing...
r51750 false,
true,
Raphaël Gomès
rust-filepatterns: allow overriding default syntax...
r50377 )
.unwrap()
.0,
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 vec![IgnorePattern::new(
PatternSyntax::RelGlob,
b"**.o",
Path::new("file_path")
)]
);
}
#[test]
fn test_build_single_regex() {
assert_eq!(
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 build_single_regex(
&IgnorePattern::new(
PatternSyntax::RelGlob,
b"rust/target/",
Path::new("")
),
b"(?:/|$)"
)
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 .unwrap(),
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
Raphaël Gomès
rust-filepatterns: use bytes instead of String...
r42630 );
Raphaël Gomès
rust-regex: add test for verbatim regex syntax...
r45349 assert_eq!(
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 build_single_regex(
&IgnorePattern::new(
PatternSyntax::Regexp,
br"rust/target/\d+",
Path::new("")
),
b"(?:/|$)"
)
Raphaël Gomès
rust-regex: add test for verbatim regex syntax...
r45349 .unwrap(),
Some(br"rust/target/\d+".to_vec()),
);
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }
Raphaël Gomès
rust-regex: fix shortcut for exact matches...
r42631
#[test]
fn test_build_single_regex_shortcut() {
assert_eq!(
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 build_single_regex(
&IgnorePattern::new(
PatternSyntax::RootGlob,
b"",
Path::new("")
),
b"(?:/|$)"
)
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 .unwrap(),
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 None,
Raphaël Gomès
rust-regex: fix shortcut for exact matches...
r42631 );
assert_eq!(
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 build_single_regex(
&IgnorePattern::new(
PatternSyntax::RootGlob,
b"whatever",
Path::new("")
),
b"(?:/|$)"
)
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 .unwrap(),
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 None,
Raphaël Gomès
rust-regex: fix shortcut for exact matches...
r42631 );
assert_eq!(
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 build_single_regex(
&IgnorePattern::new(
PatternSyntax::RootGlob,
b"*.o",
Path::new("")
),
b"(?:/|$)"
)
Raphaël Gomès
rust-filepatterns: improve API and robustness for pattern files parsing...
r44784 .unwrap(),
Raphaël Gomès
rust-regex: fix issues with regex anchoring and performance...
r45347 Some(br"[^/]*\.o(?:/|$)".to_vec()),
Raphaël Gomès
rust-regex: fix shortcut for exact matches...
r42631 );
}
matcher: fix the issue with regex inline-flag in rust oo...
r50499
#[test]
fn test_build_single_relregex() {
assert_eq!(
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 build_single_regex(
&IgnorePattern::new(
PatternSyntax::RelRegexp,
b"^ba{2}r",
Path::new("")
),
b"(?:/|$)"
)
matcher: fix the issue with regex inline-flag in rust oo...
r50499 .unwrap(),
Some(b"^ba{2}r".to_vec()),
);
assert_eq!(
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 build_single_regex(
&IgnorePattern::new(
PatternSyntax::RelRegexp,
b"ba{2}r",
Path::new("")
),
b"(?:/|$)"
)
matcher: fix the issue with regex inline-flag in rust oo...
r50499 .unwrap(),
Some(b".*ba{2}r".to_vec()),
);
assert_eq!(
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 build_single_regex(
&IgnorePattern::new(
PatternSyntax::RelRegexp,
b"(?ia)ba{2}r",
Path::new("")
),
b"(?:/|$)"
)
matcher: fix the issue with regex inline-flag in rust oo...
r50499 .unwrap(),
Some(b"(?ia:.*ba{2}r)".to_vec()),
);
matcher: do not prepend '.*' to pattern using ^ after flags...
r50500 assert_eq!(
Spencer Baugh
rust: de-hardcode glob_suffix...
r51754 build_single_regex(
&IgnorePattern::new(
PatternSyntax::RelRegexp,
b"(?ia)^ba{2}r",
Path::new("")
),
b"(?:/|$)"
)
matcher: do not prepend '.*' to pattern using ^ after flags...
r50500 .unwrap(),
Some(b"(?ia:^ba{2}r)".to_vec()),
);
matcher: fix the issue with regex inline-flag in rust oo...
r50499 }
Raphaël Gomès
rust-filepatterns: add a Rust implementation of pattern-related utils...
r42514 }