##// END OF EJS Templates
rhg: Port Python’s `ui.configlist` as `Config::get_list`...
rhg: Port Python’s `ui.configlist` as `Config::get_list` This new method is not used yet outside of its own unit tests, so this changeset should make no observable change. The Rust parser implementation attempts to exactly replicate the behavior of the Python one, even in edge cases where that behavior is… surprising. New unit tests capture some of these edge cases. This started as a line-by-line port. The main changes are: * Pass around a parser mode enum instead of parser functions * Inline the whole parser into one function * Use `[u8]::get` which returns an `Option`, instead of indexing after explicitly checking the length. Differential Revision: https://phab.mercurial-scm.org/D11389

File last commit:

r48202:0ef8231e default
r48762:6961eca0 default
Show More
matchers.rs
909 lines | 29.0 KiB | application/rls-services+xml | RustLexer
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 // matchers.rs
//
// Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
//! Structs and types for matching files and directories.
Raphaël Gomès
rust-matchers: add function to generate a regex matcher function...
r45006 use crate::{
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 dirstate::dirs_multiset::DirsChildrenMultiset,
filepatterns::{
build_single_regex, filter_subincludes, get_patterns_from_file,
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 PatternFileWarning, PatternResult,
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 },
utils::{
files::find_dirs,
hg_path::{HgPath, HgPathBuf},
Escaped,
},
DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
Raphaël Gomès
rust-matchers: add functions to get roots, dirs and parents from patterns...
r45007 PatternSyntax,
Raphaël Gomès
rust-matchers: add function to generate a regex matcher function...
r45006 };
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 use crate::filepatterns::normalize_path_bytes;
Raphaël Gomès
rust-status: only involve ignore mechanism when needed...
r45088 use std::borrow::ToOwned;
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 use std::collections::HashSet;
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 use std::fmt::{Display, Error, Formatter};
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 use std::iter::FromIterator;
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 use std::ops::Deref;
Raphaël Gomès
rust-status: only involve ignore mechanism when needed...
r45088 use std::path::{Path, PathBuf};
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742
Raphaël Gomès
rust-matchers: add timing tracing to regex compilation...
r45288 use micro_timer::timed;
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 #[derive(Debug, PartialEq)]
Raphaël Gomès
rust-matchers: improve `Matcher` trait ergonomics...
r44284 pub enum VisitChildrenSet<'a> {
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 /// Don't visit anything
Empty,
/// Only visit this directory
This,
/// Visit this directory and these subdirectories
/// TODO Should we implement a `NonEmptyHashSet`?
Raphaël Gomès
rust-matchers: improve `Matcher` trait ergonomics...
r44284 Set(HashSet<&'a HgPath>),
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 /// Visit this directory and all subdirectories
Recursive,
}
pub trait Matcher {
/// Explicitly listed files
Raphaël Gomès
rust-matchers: improve `Matcher` trait ergonomics...
r44284 fn file_set(&self) -> Option<&HashSet<&HgPath>>;
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 /// Returns whether `filename` is in `file_set`
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn exact_match(&self, filename: &HgPath) -> bool;
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 /// Returns whether `filename` is matched by this matcher
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn matches(&self, filename: &HgPath) -> bool;
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 /// Decides whether a directory should be visited based on whether it
/// has potential matches in it or one of its subdirectories, and
/// potentially lists which subdirectories of that directory should be
/// visited. This is based on the match's primary, included, and excluded
/// patterns.
///
/// # Example
///
/// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
/// return the following values (assuming the implementation of
/// visit_children_set is capable of recognizing this; some implementations
/// are not).
///
Georges Racinet
rust-matchers: fixing cargo doc...
r44458 /// ```text
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 /// ```ignore
/// '' -> {'foo', 'qux'}
/// 'baz' -> set()
/// 'foo' -> {'bar'}
/// // Ideally this would be `Recursive`, but since the prefix nature of
/// // matchers is applied to the entire matcher, we have to downgrade this
/// // to `This` due to the (yet to be implemented in Rust) non-prefix
/// // `RootFilesIn'-kind matcher being mixed in.
/// 'foo/bar' -> 'this'
/// 'qux' -> 'this'
/// ```
/// # Important
///
/// Most matchers do not know if they're representing files or
/// directories. They see `['path:dir/f']` and don't know whether `f` is a
/// file or a directory, so `visit_children_set('dir')` for most matchers
/// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
/// a file (like the yet to be implemented in Rust `ExactMatcher` does),
/// it may return `VisitChildrenSet::This`.
/// Do not rely on the return being a `HashSet` indicating that there are
/// no files in this dir to investigate (or equivalently that if there are
/// files to investigate in 'dir' that it will always return
/// `VisitChildrenSet::This`).
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet;
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 /// Matcher will match everything and `files_set()` will be empty:
/// optimization might be possible.
Raphaël Gomès
rust-matchers: remove default implementations for `Matcher` trait...
r44009 fn matches_everything(&self) -> bool;
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 /// Matcher will match exactly the files in `files_set()`: optimization
/// might be possible.
Raphaël Gomès
rust-matchers: remove default implementations for `Matcher` trait...
r44009 fn is_exact(&self) -> bool;
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 }
/// Matches everything.
Raphaël Gomès
rust-matchers: add doctests for `AlwaysMatcher`...
r44286 ///```
/// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
///
/// let matcher = AlwaysMatcher;
///
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
/// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
/// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
/// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
Raphaël Gomès
rust-matchers: add doctests for `AlwaysMatcher`...
r44286 /// ```
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 #[derive(Debug)]
pub struct AlwaysMatcher;
impl Matcher for AlwaysMatcher {
Raphaël Gomès
rust-matchers: improve `Matcher` trait ergonomics...
r44284 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
None
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 }
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn exact_match(&self, _filename: &HgPath) -> bool {
Raphaël Gomès
rust-matchers: remove default implementations for `Matcher` trait...
r44009 false
}
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn matches(&self, _filename: &HgPath) -> bool {
Raphaël Gomès
rust-matchers: remove default implementations for `Matcher` trait...
r44009 true
}
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 VisitChildrenSet::Recursive
}
Raphaël Gomès
rust-matchers: remove default implementations for `Matcher` trait...
r44009 fn matches_everything(&self) -> bool {
true
}
fn is_exact(&self) -> bool {
false
}
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 }
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366
/// Matches the input files exactly. They are interpreted as paths, not
/// patterns.
///
///```
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::{HgPath, HgPathBuf} };
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 ///
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 /// let files = [HgPathBuf::from_bytes(b"a.txt"), HgPathBuf::from_bytes(br"re:.*\.c$")];
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 /// let matcher = FileMatcher::new(&files).unwrap();
///
/// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
/// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
/// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
/// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
/// ```
#[derive(Debug)]
pub struct FileMatcher<'a> {
files: HashSet<&'a HgPath>,
dirs: DirsMultiset,
}
impl<'a> FileMatcher<'a> {
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 pub fn new(files: &'a [HgPathBuf]) -> Result<Self, DirstateMapError> {
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 Ok(Self {
Raphaël Gomès
rust: do a clippy pass...
r45500 files: HashSet::from_iter(files.iter().map(AsRef::as_ref)),
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 dirs: DirsMultiset::from_manifest(files)?,
})
}
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn inner_matches(&self, filename: &HgPath) -> bool {
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 self.files.contains(filename.as_ref())
}
}
impl<'a> Matcher for FileMatcher<'a> {
fn file_set(&self) -> Option<&HashSet<&HgPath>> {
Some(&self.files)
}
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn exact_match(&self, filename: &HgPath) -> bool {
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 self.inner_matches(filename)
}
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn matches(&self, filename: &HgPath) -> bool {
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 self.inner_matches(filename)
}
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 if self.files.is_empty() || !self.dirs.contains(&directory) {
return VisitChildrenSet::Empty;
}
Raphaël Gomès
rust: do a clippy pass...
r45500 let dirs_as_set = self.dirs.iter().map(Deref::deref).collect();
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828
let mut candidates: HashSet<&HgPath> =
Raphaël Gomès
rust: do a clippy pass...
r45500 self.files.union(&dirs_as_set).cloned().collect();
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 candidates.remove(HgPath::new(b""));
if !directory.as_ref().is_empty() {
let directory = [directory.as_ref().as_bytes(), b"/"].concat();
candidates = candidates
.iter()
.filter_map(|c| {
if c.as_bytes().starts_with(&directory) {
Some(HgPath::new(&c.as_bytes()[directory.len()..]))
} else {
None
}
})
.collect();
}
// `self.dirs` includes all of the directories, recursively, so if
// we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
// 'foo/bar' in it. Thus we can safely ignore a candidate that has a
// '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
// subdir will be in there without a slash.
VisitChildrenSet::Set(
candidates
.iter()
.filter_map(|c| {
if c.bytes().all(|b| *b != b'/') {
Some(*c)
} else {
None
}
})
.collect(),
)
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 }
fn matches_everything(&self) -> bool {
false
}
fn is_exact(&self) -> bool {
true
}
}
Raphaël Gomès
rust-matchers: add function to generate a regex matcher function...
r45006
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 /// Matches files that are included in the ignore rules.
Raphaël Gomès
rust: remove support for `re2`...
r45406 /// ```
/// use hg::{
/// matchers::{IncludeMatcher, Matcher},
/// IgnorePattern,
/// PatternSyntax,
/// utils::hg_path::HgPath
/// };
/// use std::path::Path;
/// ///
/// let ignore_patterns =
/// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 /// let matcher = IncludeMatcher::new(ignore_patterns).unwrap();
Raphaël Gomès
rust: remove support for `re2`...
r45406 /// ///
/// assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
/// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
/// assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
/// assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
/// ```
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 pub struct IncludeMatcher<'a> {
patterns: Vec<u8>,
match_fn: Box<dyn for<'r> Fn(&'r HgPath) -> bool + 'a + Sync>,
/// Whether all the patterns match a prefix (i.e. recursively)
prefix: bool,
roots: HashSet<HgPathBuf>,
dirs: HashSet<HgPathBuf>,
parents: HashSet<HgPathBuf>,
}
impl<'a> Matcher for IncludeMatcher<'a> {
fn file_set(&self) -> Option<&HashSet<&HgPath>> {
None
}
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn exact_match(&self, _filename: &HgPath) -> bool {
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 false
}
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn matches(&self, filename: &HgPath) -> bool {
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 (self.match_fn)(filename.as_ref())
}
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 let dir = directory.as_ref();
if self.prefix && self.roots.contains(dir) {
return VisitChildrenSet::Recursive;
}
if self.roots.contains(HgPath::new(b""))
|| self.roots.contains(dir)
|| self.dirs.contains(dir)
|| find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
{
return VisitChildrenSet::This;
}
if self.parents.contains(directory.as_ref()) {
let multiset = self.get_all_parents_children();
if let Some(children) = multiset.get(dir) {
return VisitChildrenSet::Set(children.to_owned());
}
}
VisitChildrenSet::Empty
}
fn matches_everything(&self) -> bool {
false
}
fn is_exact(&self) -> bool {
false
}
}
Raphaël Gomès
rust-matchers: use the `regex` crate...
r45084 /// Returns a function that matches an `HgPath` against the given regex
/// pattern.
///
/// This can fail when the pattern is invalid or not supported by the
/// underlying engine (the `regex` crate), for instance anything with
/// back-references.
Raphaël Gomès
rust-matchers: add timing tracing to regex compilation...
r45288 #[timed]
Raphaël Gomès
rust-matchers: use the `regex` crate...
r45084 fn re_matcher(
pattern: &[u8],
) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
use std::io::Write;
Raphaël Gomès
rust-regex: fix issues with regex anchoring and performance...
r45347 // The `regex` crate adds `.*` to the start and end of expressions if there
// are no anchors, so add the start anchor.
let mut escaped_bytes = vec![b'^', b'(', b'?', b':'];
Raphaël Gomès
rust-matchers: use the `regex` crate...
r45084 for byte in pattern {
if *byte > 127 {
write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
} else {
escaped_bytes.push(*byte);
}
}
Raphaël Gomès
rust-regex: fix issues with regex anchoring and performance...
r45347 escaped_bytes.push(b')');
Raphaël Gomès
rust-matchers: use the `regex` crate...
r45084
// Avoid the cost of UTF8 checking
//
// # Safety
// This is safe because we escaped all non-ASCII bytes.
let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
let re = regex::bytes::RegexBuilder::new(&pattern_string)
.unicode(false)
Raphaël Gomès
rust-regex: increase the DFA size limit for the `regex` crate...
r45286 // Big repos with big `.hgignore` will hit the default limit and
// incur a significant performance hit. One repo's `hg status` hit
// multiple *minutes*.
.dfa_size_limit(50 * (1 << 20))
Raphaël Gomès
rust-matchers: use the `regex` crate...
r45084 .build()
.map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
Ok(move |path: &HgPath| re.is_match(path.as_bytes()))
Raphaël Gomès
rust-matchers: add function to generate a regex matcher function...
r45006 }
Raphaël Gomès
rust-matchers: add `build_regex_match` function...
r45008 /// Returns the regex pattern and a function that matches an `HgPath` against
/// said regex formed by the given ignore patterns.
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 fn build_regex_match(
ignore_patterns: &[IgnorePattern],
Raphaël Gomès
rust-matchers: add `build_regex_match` function...
r45008 ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)> {
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 let mut regexps = vec![];
let mut exact_set = HashSet::new();
for pattern in ignore_patterns {
if let Some(re) = build_single_regex(pattern)? {
regexps.push(re);
} else {
let exact = normalize_path_bytes(&pattern.pattern);
exact_set.insert(HgPathBuf::from_bytes(&exact));
}
}
Raphaël Gomès
rust-matchers: add `build_regex_match` function...
r45008 let full_regex = regexps.join(&b'|');
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 // An empty pattern would cause the regex engine to incorrectly match the
// (empty) root directory
let func = if !(regexps.is_empty()) {
let matcher = re_matcher(&full_regex)?;
let func = move |filename: &HgPath| {
exact_set.contains(filename) || matcher(filename)
};
Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
} else {
let func = move |filename: &HgPath| exact_set.contains(filename);
Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
};
Raphaël Gomès
rust-matchers: add `build_regex_match` function...
r45008
Ok((full_regex, func))
}
Raphaël Gomès
rust-matchers: add functions to get roots, dirs and parents from patterns...
r45007 /// Returns roots and directories corresponding to each pattern.
///
/// This calculates the roots and directories exactly matching the patterns and
/// returns a tuple of (roots, dirs). It does not return other directories
/// which may also need to be considered, like the parent directories.
fn roots_and_dirs(
ignore_patterns: &[IgnorePattern],
) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
let mut roots = Vec::new();
let mut dirs = Vec::new();
for ignore_pattern in ignore_patterns {
let IgnorePattern {
syntax, pattern, ..
} = ignore_pattern;
match syntax {
PatternSyntax::RootGlob | PatternSyntax::Glob => {
let mut root = vec![];
for p in pattern.split(|c| *c == b'/') {
if p.iter().any(|c| match *c {
b'[' | b'{' | b'*' | b'?' => true,
_ => false,
}) {
break;
}
root.push(HgPathBuf::from_bytes(p));
}
let buf =
root.iter().fold(HgPathBuf::new(), |acc, r| acc.join(r));
roots.push(buf);
}
PatternSyntax::Path | PatternSyntax::RelPath => {
let pat = HgPath::new(if pattern == b"." {
&[] as &[u8]
} else {
pattern
});
roots.push(pat.to_owned());
}
PatternSyntax::RootFiles => {
let pat = if pattern == b"." {
&[] as &[u8]
} else {
pattern
};
dirs.push(HgPathBuf::from_bytes(pat));
}
_ => {
roots.push(HgPathBuf::new());
}
}
}
(roots, dirs)
}
/// Paths extracted from patterns
#[derive(Debug, PartialEq)]
struct RootsDirsAndParents {
/// Directories to match recursively
pub roots: HashSet<HgPathBuf>,
/// Directories to match non-recursively
pub dirs: HashSet<HgPathBuf>,
/// Implicitly required directories to go to items in either roots or dirs
pub parents: HashSet<HgPathBuf>,
}
/// Extract roots, dirs and parents from patterns.
fn roots_dirs_and_parents(
ignore_patterns: &[IgnorePattern],
) -> PatternResult<RootsDirsAndParents> {
let (roots, dirs) = roots_and_dirs(ignore_patterns);
let mut parents = HashSet::new();
parents.extend(
DirsMultiset::from_manifest(&dirs)
.map_err(|e| match e {
DirstateMapError::InvalidPath(e) => e,
_ => unreachable!(),
})?
.iter()
Raphaël Gomès
rust: do a clippy pass...
r45500 .map(ToOwned::to_owned),
Raphaël Gomès
rust-matchers: add functions to get roots, dirs and parents from patterns...
r45007 );
parents.extend(
DirsMultiset::from_manifest(&roots)
.map_err(|e| match e {
DirstateMapError::InvalidPath(e) => e,
_ => unreachable!(),
})?
.iter()
Raphaël Gomès
rust: do a clippy pass...
r45500 .map(ToOwned::to_owned),
Raphaël Gomès
rust-matchers: add functions to get roots, dirs and parents from patterns...
r45007 );
Ok(RootsDirsAndParents {
roots: HashSet::from_iter(roots),
dirs: HashSet::from_iter(dirs),
parents,
})
}
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 /// Returns a function that checks whether a given file (in the general sense)
/// should be matched.
fn build_match<'a, 'b>(
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 ignore_patterns: Vec<IgnorePattern>,
) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + 'b + Sync>)> {
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![];
// For debugging and printing
let mut patterns = vec![];
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 let (subincludes, ignore_patterns) = filter_subincludes(ignore_patterns)?;
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009
if !subincludes.is_empty() {
// Build prefix-based matcher functions for subincludes
let mut submatchers = FastHashMap::default();
let mut prefixes = vec![];
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 for sub_include in subincludes {
let matcher = IncludeMatcher::new(sub_include.included_patterns)?;
let match_fn =
Box::new(move |path: &HgPath| matcher.matches(path));
prefixes.push(sub_include.prefix.clone());
submatchers.insert(sub_include.prefix.clone(), match_fn);
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 }
let match_subinclude = move |filename: &HgPath| {
for prefix in prefixes.iter() {
if let Some(rel) = filename.relative_to(prefix) {
Raphaël Gomès
rust: do a clippy pass...
r45500 if (submatchers[prefix])(rel) {
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 return true;
}
}
}
false
};
match_funcs.push(Box::new(match_subinclude));
}
if !ignore_patterns.is_empty() {
// Either do dumb matching if all patterns are rootfiles, or match
// with a regex.
if ignore_patterns
.iter()
.all(|k| k.syntax == PatternSyntax::RootFiles)
{
let dirs: HashSet<_> = ignore_patterns
.iter()
.map(|k| k.pattern.to_owned())
.collect();
let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
let match_func = move |path: &HgPath| -> bool {
let path = path.as_bytes();
let i = path.iter().rfind(|a| **a == b'/');
let dir = if let Some(i) = i {
&path[..*i as usize]
} else {
b"."
};
dirs.contains(dir.deref())
};
match_funcs.push(Box::new(match_func));
patterns.extend(b"rootfilesin: ");
dirs_vec.sort();
patterns.extend(dirs_vec.escaped_bytes());
} else {
let (new_re, match_func) = build_regex_match(&ignore_patterns)?;
patterns = new_re;
match_funcs.push(match_func)
}
}
Ok(if match_funcs.len() == 1 {
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 (patterns, match_funcs.remove(0))
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 } else {
(
patterns,
Box::new(move |f: &HgPath| -> bool {
match_funcs.iter().any(|match_func| match_func(f))
}),
)
})
}
/// Parses all "ignore" files with their recursive includes and returns a
/// function that checks whether a given file (in the general sense) should be
/// ignored.
pub fn get_ignore_function<'a>(
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 mut all_pattern_files: Vec<PathBuf>,
Simon Sapin
rust: Make some file path parameters less generic...
r48169 root_dir: &Path,
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 inspect_pattern_bytes: &mut impl FnMut(&[u8]),
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 ) -> PatternResult<(
Raphaël Gomès
rust-status: only involve ignore mechanism when needed...
r45088 Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>,
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 Vec<PatternFileWarning>,
)> {
let mut all_patterns = vec![];
let mut all_warnings = vec![];
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 // Sort to make the ordering of calls to `inspect_pattern_bytes`
// deterministic even if the ordering of `all_pattern_files` is not (such
// as when a iteration order of a Python dict or Rust HashMap is involved).
// Sort by "string" representation instead of the default by component
// (with a Rust-specific definition of a component)
all_pattern_files
.sort_unstable_by(|a, b| a.as_os_str().cmp(b.as_os_str()));
Simon Sapin
rust: Make some file path parameters less generic...
r48169 for pattern_file in &all_pattern_files {
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 let (patterns, warnings) = get_patterns_from_file(
pattern_file,
root_dir,
inspect_pattern_bytes,
)?;
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009
Raphaël Gomès
rust-status: only involve ignore mechanism when needed...
r45088 all_patterns.extend(patterns.to_owned());
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 all_warnings.extend(warnings);
}
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 let matcher = IncludeMatcher::new(all_patterns)?;
Raphaël Gomès
rust-status: only involve ignore mechanism when needed...
r45088 Ok((
Box::new(move |path: &HgPath| matcher.matches(path)),
all_warnings,
))
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 }
impl<'a> IncludeMatcher<'a> {
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 let RootsDirsAndParents {
roots,
dirs,
parents,
} = roots_dirs_and_parents(&ignore_patterns)?;
let prefix = ignore_patterns.iter().any(|k| match k.syntax {
PatternSyntax::Path | PatternSyntax::RelPath => true,
_ => false,
});
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 let (patterns, match_fn) = build_match(ignore_patterns)?;
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 Ok(Self {
patterns,
match_fn,
prefix,
roots,
dirs,
parents,
})
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 }
fn get_all_parents_children(&self) -> DirsChildrenMultiset {
// TODO cache
let thing = self
.dirs
.iter()
.chain(self.roots.iter())
.chain(self.parents.iter());
DirsChildrenMultiset::new(thing, Some(&self.parents))
}
}
impl<'a> Display for IncludeMatcher<'a> {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
Raphaël Gomès
rust-matchers: add TODO about incomplete `Display` for `IncludeMatcher`...
r45312 // XXX What about exact matches?
// I'm not sure it's worth it to clone the HashSet and keep it
// around just in case someone wants to display the matcher, plus
// it's going to be unreadable after a few entries, but we need to
// inform in this display that exact matches are being used and are
// (on purpose) missing from the `includes`.
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 write!(
f,
"IncludeMatcher(includes='{}')",
String::from_utf8_lossy(&self.patterns.escaped_bytes())
)
}
}
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 #[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
Raphaël Gomès
rust-matchers: add functions to get roots, dirs and parents from patterns...
r45007 use std::path::Path;
#[test]
fn test_roots_and_dirs() {
let pats = vec![
IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
];
let (roots, dirs) = roots_and_dirs(&pats);
assert_eq!(
roots,
vec!(
HgPathBuf::from_bytes(b"g/h"),
HgPathBuf::from_bytes(b"g/h"),
HgPathBuf::new()
),
);
assert_eq!(dirs, vec!());
}
#[test]
fn test_roots_dirs_and_parents() {
let pats = vec![
IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
];
let mut roots = HashSet::new();
roots.insert(HgPathBuf::from_bytes(b"g/h"));
roots.insert(HgPathBuf::new());
let dirs = HashSet::new();
let mut parents = HashSet::new();
parents.insert(HgPathBuf::new());
parents.insert(HgPathBuf::from_bytes(b"g"));
assert_eq!(
roots_dirs_and_parents(&pats).unwrap(),
Raphaël Gomès
rust-status: refactor options into a `StatusOptions` struct...
r45011 RootsDirsAndParents {
roots,
dirs,
parents
}
Raphaël Gomès
rust-matchers: add functions to get roots, dirs and parents from patterns...
r45007 );
}
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828
#[test]
fn test_filematcher_visit_children_set() {
// Visitchildrenset
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 let files = vec![HgPathBuf::from_bytes(b"dir/subdir/foo.txt")];
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 let matcher = FileMatcher::new(&files).unwrap();
let mut set = HashSet::new();
set.insert(HgPath::new(b"dir"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
set.insert(HgPath::new(b"subdir"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
set.insert(HgPath::new(b"foo.txt"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir")),
VisitChildrenSet::Set(set)
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"folder")),
VisitChildrenSet::Empty
);
}
#[test]
fn test_filematcher_visit_children_set_files_and_dirs() {
let files = vec![
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 HgPathBuf::from_bytes(b"rootfile.txt"),
HgPathBuf::from_bytes(b"a/file1.txt"),
HgPathBuf::from_bytes(b"a/b/file2.txt"),
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 // No file in a/b/c
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 HgPathBuf::from_bytes(b"a/b/c/d/file4.txt"),
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 ];
let matcher = FileMatcher::new(&files).unwrap();
let mut set = HashSet::new();
set.insert(HgPath::new(b"a"));
set.insert(HgPath::new(b"rootfile.txt"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
set.insert(HgPath::new(b"b"));
set.insert(HgPath::new(b"file1.txt"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"a")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
set.insert(HgPath::new(b"c"));
set.insert(HgPath::new(b"file2.txt"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"a/b")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
set.insert(HgPath::new(b"d"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"a/b/c")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
set.insert(HgPath::new(b"file4.txt"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
VisitChildrenSet::Set(set)
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"folder")),
VisitChildrenSet::Empty
);
}
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009
#[test]
fn test_includematcher() {
// VisitchildrensetPrefix
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
PatternSyntax::RelPath,
b"dir/subdir",
Path::new(""),
)])
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 .unwrap();
let mut set = HashSet::new();
set.insert(HgPath::new(b"dir"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
set.insert(HgPath::new(b"subdir"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir")),
VisitChildrenSet::Set(set)
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir")),
VisitChildrenSet::Recursive
);
// OPT: This should probably be 'all' if its parent is?
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
VisitChildrenSet::This
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"folder")),
VisitChildrenSet::Empty
);
// VisitchildrensetRootfilesin
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
PatternSyntax::RootFiles,
b"dir/subdir",
Path::new(""),
)])
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 .unwrap();
let mut set = HashSet::new();
set.insert(HgPath::new(b"dir"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
set.insert(HgPath::new(b"subdir"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir")),
VisitChildrenSet::Set(set)
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir")),
VisitChildrenSet::This
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"folder")),
VisitChildrenSet::Empty
);
// VisitchildrensetGlob
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
PatternSyntax::Glob,
b"dir/z*",
Path::new(""),
)])
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 .unwrap();
let mut set = HashSet::new();
set.insert(HgPath::new(b"dir"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"")),
VisitChildrenSet::Set(set)
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"folder")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir")),
VisitChildrenSet::This
);
// OPT: these should probably be set().
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir")),
VisitChildrenSet::This
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
VisitChildrenSet::This
);
}
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 }