##// END OF EJS Templates
recover: only apply last journal record per file (issue6423)...
recover: only apply last journal record per file (issue6423) This got broken in 2019 when the size check was introduced. It is most noticable when dealing with transactions that involve an inline to non-inline revlog storage transaction. It wasn't seen as much at the time because the in-memory journal actually de-duplicated the entry implicity, but since 63edc384d3b7 the on-disk journal is used for rollback as well as recover. Differential Revision: https://phab.mercurial-scm.org/D10726

File last commit:

r46182:75f78588 default
r48061:672f48cf default
Show More
matchers.rs
923 lines | 29.1 KiB | application/rls-services+xml | RustLexer
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 // matchers.rs
//
// Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
//! Structs and types for matching files and directories.
Raphaël Gomès
rust-matchers: add function to generate a regex matcher function...
r45006 use crate::{
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 dirstate::dirs_multiset::DirsChildrenMultiset,
filepatterns::{
build_single_regex, filter_subincludes, get_patterns_from_file,
PatternFileWarning, PatternResult, SubInclude,
},
utils::{
files::find_dirs,
hg_path::{HgPath, HgPathBuf},
Escaped,
},
DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
Raphaël Gomès
rust-matchers: add functions to get roots, dirs and parents from patterns...
r45007 PatternSyntax,
Raphaël Gomès
rust-matchers: add function to generate a regex matcher function...
r45006 };
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 use crate::filepatterns::normalize_path_bytes;
Raphaël Gomès
rust-status: only involve ignore mechanism when needed...
r45088 use std::borrow::ToOwned;
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 use std::collections::HashSet;
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 use std::fmt::{Display, Error, Formatter};
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 use std::iter::FromIterator;
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 use std::ops::Deref;
Raphaël Gomès
rust-status: only involve ignore mechanism when needed...
r45088 use std::path::{Path, PathBuf};
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742
Raphaël Gomès
rust-matchers: add timing tracing to regex compilation...
r45288 use micro_timer::timed;
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 #[derive(Debug, PartialEq)]
Raphaël Gomès
rust-matchers: improve `Matcher` trait ergonomics...
r44284 pub enum VisitChildrenSet<'a> {
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 /// Don't visit anything
Empty,
/// Only visit this directory
This,
/// Visit this directory and these subdirectories
/// TODO Should we implement a `NonEmptyHashSet`?
Raphaël Gomès
rust-matchers: improve `Matcher` trait ergonomics...
r44284 Set(HashSet<&'a HgPath>),
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 /// Visit this directory and all subdirectories
Recursive,
}
pub trait Matcher {
/// Explicitly listed files
Raphaël Gomès
rust-matchers: improve `Matcher` trait ergonomics...
r44284 fn file_set(&self) -> Option<&HashSet<&HgPath>>;
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 /// Returns whether `filename` is in `file_set`
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn exact_match(&self, filename: &HgPath) -> bool;
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 /// Returns whether `filename` is matched by this matcher
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn matches(&self, filename: &HgPath) -> bool;
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 /// Decides whether a directory should be visited based on whether it
/// has potential matches in it or one of its subdirectories, and
/// potentially lists which subdirectories of that directory should be
/// visited. This is based on the match's primary, included, and excluded
/// patterns.
///
/// # Example
///
/// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
/// return the following values (assuming the implementation of
/// visit_children_set is capable of recognizing this; some implementations
/// are not).
///
Georges Racinet
rust-matchers: fixing cargo doc...
r44458 /// ```text
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 /// ```ignore
/// '' -> {'foo', 'qux'}
/// 'baz' -> set()
/// 'foo' -> {'bar'}
/// // Ideally this would be `Recursive`, but since the prefix nature of
/// // matchers is applied to the entire matcher, we have to downgrade this
/// // to `This` due to the (yet to be implemented in Rust) non-prefix
/// // `RootFilesIn'-kind matcher being mixed in.
/// 'foo/bar' -> 'this'
/// 'qux' -> 'this'
/// ```
/// # Important
///
/// Most matchers do not know if they're representing files or
/// directories. They see `['path:dir/f']` and don't know whether `f` is a
/// file or a directory, so `visit_children_set('dir')` for most matchers
/// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
/// a file (like the yet to be implemented in Rust `ExactMatcher` does),
/// it may return `VisitChildrenSet::This`.
/// Do not rely on the return being a `HashSet` indicating that there are
/// no files in this dir to investigate (or equivalently that if there are
/// files to investigate in 'dir' that it will always return
/// `VisitChildrenSet::This`).
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet;
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 /// Matcher will match everything and `files_set()` will be empty:
/// optimization might be possible.
Raphaël Gomès
rust-matchers: remove default implementations for `Matcher` trait...
r44009 fn matches_everything(&self) -> bool;
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 /// Matcher will match exactly the files in `files_set()`: optimization
/// might be possible.
Raphaël Gomès
rust-matchers: remove default implementations for `Matcher` trait...
r44009 fn is_exact(&self) -> bool;
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 }
/// Matches everything.
Raphaël Gomès
rust-matchers: add doctests for `AlwaysMatcher`...
r44286 ///```
/// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
///
/// let matcher = AlwaysMatcher;
///
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
/// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
/// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
/// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
Raphaël Gomès
rust-matchers: add doctests for `AlwaysMatcher`...
r44286 /// ```
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 #[derive(Debug)]
pub struct AlwaysMatcher;
impl Matcher for AlwaysMatcher {
Raphaël Gomès
rust-matchers: improve `Matcher` trait ergonomics...
r44284 fn file_set(&self) -> Option<&HashSet<&HgPath>> {
None
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 }
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn exact_match(&self, _filename: &HgPath) -> bool {
Raphaël Gomès
rust-matchers: remove default implementations for `Matcher` trait...
r44009 false
}
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn matches(&self, _filename: &HgPath) -> bool {
Raphaël Gomès
rust-matchers: remove default implementations for `Matcher` trait...
r44009 true
}
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 VisitChildrenSet::Recursive
}
Raphaël Gomès
rust-matchers: remove default implementations for `Matcher` trait...
r44009 fn matches_everything(&self) -> bool {
true
}
fn is_exact(&self) -> bool {
false
}
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 }
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366
/// Matches the input files exactly. They are interpreted as paths, not
/// patterns.
///
///```
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::{HgPath, HgPathBuf} };
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 ///
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 /// let files = [HgPathBuf::from_bytes(b"a.txt"), HgPathBuf::from_bytes(br"re:.*\.c$")];
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 /// let matcher = FileMatcher::new(&files).unwrap();
///
/// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
/// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
/// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
/// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
/// ```
#[derive(Debug)]
pub struct FileMatcher<'a> {
files: HashSet<&'a HgPath>,
dirs: DirsMultiset,
}
impl<'a> FileMatcher<'a> {
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 pub fn new(files: &'a [HgPathBuf]) -> Result<Self, DirstateMapError> {
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 Ok(Self {
Raphaël Gomès
rust: do a clippy pass...
r45500 files: HashSet::from_iter(files.iter().map(AsRef::as_ref)),
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 dirs: DirsMultiset::from_manifest(files)?,
})
}
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn inner_matches(&self, filename: &HgPath) -> bool {
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 self.files.contains(filename.as_ref())
}
}
impl<'a> Matcher for FileMatcher<'a> {
fn file_set(&self) -> Option<&HashSet<&HgPath>> {
Some(&self.files)
}
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn exact_match(&self, filename: &HgPath) -> bool {
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 self.inner_matches(filename)
}
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn matches(&self, filename: &HgPath) -> bool {
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 self.inner_matches(filename)
}
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 if self.files.is_empty() || !self.dirs.contains(&directory) {
return VisitChildrenSet::Empty;
}
Raphaël Gomès
rust: do a clippy pass...
r45500 let dirs_as_set = self.dirs.iter().map(Deref::deref).collect();
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828
let mut candidates: HashSet<&HgPath> =
Raphaël Gomès
rust: do a clippy pass...
r45500 self.files.union(&dirs_as_set).cloned().collect();
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 candidates.remove(HgPath::new(b""));
if !directory.as_ref().is_empty() {
let directory = [directory.as_ref().as_bytes(), b"/"].concat();
candidates = candidates
.iter()
.filter_map(|c| {
if c.as_bytes().starts_with(&directory) {
Some(HgPath::new(&c.as_bytes()[directory.len()..]))
} else {
None
}
})
.collect();
}
// `self.dirs` includes all of the directories, recursively, so if
// we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
// 'foo/bar' in it. Thus we can safely ignore a candidate that has a
// '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
// subdir will be in there without a slash.
VisitChildrenSet::Set(
candidates
.iter()
.filter_map(|c| {
if c.bytes().all(|b| *b != b'/') {
Some(*c)
} else {
None
}
})
.collect(),
)
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 }
fn matches_everything(&self) -> bool {
false
}
fn is_exact(&self) -> bool {
true
}
}
Raphaël Gomès
rust-matchers: add function to generate a regex matcher function...
r45006
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 /// Matches files that are included in the ignore rules.
Raphaël Gomès
rust: remove support for `re2`...
r45406 /// ```
/// use hg::{
/// matchers::{IncludeMatcher, Matcher},
/// IgnorePattern,
/// PatternSyntax,
/// utils::hg_path::HgPath
/// };
/// use std::path::Path;
/// ///
/// let ignore_patterns =
/// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
/// let (matcher, _) = IncludeMatcher::new(ignore_patterns, "").unwrap();
/// ///
/// assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
/// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
/// assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
/// assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
/// ```
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 pub struct IncludeMatcher<'a> {
patterns: Vec<u8>,
match_fn: Box<dyn for<'r> Fn(&'r HgPath) -> bool + 'a + Sync>,
/// Whether all the patterns match a prefix (i.e. recursively)
prefix: bool,
roots: HashSet<HgPathBuf>,
dirs: HashSet<HgPathBuf>,
parents: HashSet<HgPathBuf>,
}
impl<'a> Matcher for IncludeMatcher<'a> {
fn file_set(&self) -> Option<&HashSet<&HgPath>> {
None
}
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn exact_match(&self, _filename: &HgPath) -> bool {
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 false
}
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn matches(&self, filename: &HgPath) -> bool {
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 (self.match_fn)(filename.as_ref())
}
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 let dir = directory.as_ref();
if self.prefix && self.roots.contains(dir) {
return VisitChildrenSet::Recursive;
}
if self.roots.contains(HgPath::new(b""))
|| self.roots.contains(dir)
|| self.dirs.contains(dir)
|| find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
{
return VisitChildrenSet::This;
}
if self.parents.contains(directory.as_ref()) {
let multiset = self.get_all_parents_children();
if let Some(children) = multiset.get(dir) {
return VisitChildrenSet::Set(children.to_owned());
}
}
VisitChildrenSet::Empty
}
fn matches_everything(&self) -> bool {
false
}
fn is_exact(&self) -> bool {
false
}
}
Raphaël Gomès
rust-matchers: use the `regex` crate...
r45084 /// Returns a function that matches an `HgPath` against the given regex
/// pattern.
///
/// This can fail when the pattern is invalid or not supported by the
/// underlying engine (the `regex` crate), for instance anything with
/// back-references.
Raphaël Gomès
rust-matchers: add timing tracing to regex compilation...
r45288 #[timed]
Raphaël Gomès
rust-matchers: use the `regex` crate...
r45084 fn re_matcher(
pattern: &[u8],
) -> PatternResult<impl Fn(&HgPath) -> bool + Sync> {
use std::io::Write;
Raphaël Gomès
rust-regex: fix issues with regex anchoring and performance...
r45347 // The `regex` crate adds `.*` to the start and end of expressions if there
// are no anchors, so add the start anchor.
let mut escaped_bytes = vec![b'^', b'(', b'?', b':'];
Raphaël Gomès
rust-matchers: use the `regex` crate...
r45084 for byte in pattern {
if *byte > 127 {
write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
} else {
escaped_bytes.push(*byte);
}
}
Raphaël Gomès
rust-regex: fix issues with regex anchoring and performance...
r45347 escaped_bytes.push(b')');
Raphaël Gomès
rust-matchers: use the `regex` crate...
r45084
// Avoid the cost of UTF8 checking
//
// # Safety
// This is safe because we escaped all non-ASCII bytes.
let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
let re = regex::bytes::RegexBuilder::new(&pattern_string)
.unicode(false)
Raphaël Gomès
rust-regex: increase the DFA size limit for the `regex` crate...
r45286 // Big repos with big `.hgignore` will hit the default limit and
// incur a significant performance hit. One repo's `hg status` hit
// multiple *minutes*.
.dfa_size_limit(50 * (1 << 20))
Raphaël Gomès
rust-matchers: use the `regex` crate...
r45084 .build()
.map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
Ok(move |path: &HgPath| re.is_match(path.as_bytes()))
Raphaël Gomès
rust-matchers: add function to generate a regex matcher function...
r45006 }
Raphaël Gomès
rust-matchers: add `build_regex_match` function...
r45008 /// Returns the regex pattern and a function that matches an `HgPath` against
/// said regex formed by the given ignore patterns.
fn build_regex_match<'a>(
ignore_patterns: &'a [&'a IgnorePattern],
) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)> {
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 let mut regexps = vec![];
let mut exact_set = HashSet::new();
for pattern in ignore_patterns {
if let Some(re) = build_single_regex(pattern)? {
regexps.push(re);
} else {
let exact = normalize_path_bytes(&pattern.pattern);
exact_set.insert(HgPathBuf::from_bytes(&exact));
}
}
Raphaël Gomès
rust-matchers: add `build_regex_match` function...
r45008 let full_regex = regexps.join(&b'|');
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 // An empty pattern would cause the regex engine to incorrectly match the
// (empty) root directory
let func = if !(regexps.is_empty()) {
let matcher = re_matcher(&full_regex)?;
let func = move |filename: &HgPath| {
exact_set.contains(filename) || matcher(filename)
};
Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
} else {
let func = move |filename: &HgPath| exact_set.contains(filename);
Box::new(func) as Box<dyn Fn(&HgPath) -> bool + Sync>
};
Raphaël Gomès
rust-matchers: add `build_regex_match` function...
r45008
Ok((full_regex, func))
}
Raphaël Gomès
rust-matchers: add functions to get roots, dirs and parents from patterns...
r45007 /// Returns roots and directories corresponding to each pattern.
///
/// This calculates the roots and directories exactly matching the patterns and
/// returns a tuple of (roots, dirs). It does not return other directories
/// which may also need to be considered, like the parent directories.
fn roots_and_dirs(
ignore_patterns: &[IgnorePattern],
) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
let mut roots = Vec::new();
let mut dirs = Vec::new();
for ignore_pattern in ignore_patterns {
let IgnorePattern {
syntax, pattern, ..
} = ignore_pattern;
match syntax {
PatternSyntax::RootGlob | PatternSyntax::Glob => {
let mut root = vec![];
for p in pattern.split(|c| *c == b'/') {
if p.iter().any(|c| match *c {
b'[' | b'{' | b'*' | b'?' => true,
_ => false,
}) {
break;
}
root.push(HgPathBuf::from_bytes(p));
}
let buf =
root.iter().fold(HgPathBuf::new(), |acc, r| acc.join(r));
roots.push(buf);
}
PatternSyntax::Path | PatternSyntax::RelPath => {
let pat = HgPath::new(if pattern == b"." {
&[] as &[u8]
} else {
pattern
});
roots.push(pat.to_owned());
}
PatternSyntax::RootFiles => {
let pat = if pattern == b"." {
&[] as &[u8]
} else {
pattern
};
dirs.push(HgPathBuf::from_bytes(pat));
}
_ => {
roots.push(HgPathBuf::new());
}
}
}
(roots, dirs)
}
/// Paths extracted from patterns
#[derive(Debug, PartialEq)]
struct RootsDirsAndParents {
/// Directories to match recursively
pub roots: HashSet<HgPathBuf>,
/// Directories to match non-recursively
pub dirs: HashSet<HgPathBuf>,
/// Implicitly required directories to go to items in either roots or dirs
pub parents: HashSet<HgPathBuf>,
}
/// Extract roots, dirs and parents from patterns.
fn roots_dirs_and_parents(
ignore_patterns: &[IgnorePattern],
) -> PatternResult<RootsDirsAndParents> {
let (roots, dirs) = roots_and_dirs(ignore_patterns);
let mut parents = HashSet::new();
parents.extend(
DirsMultiset::from_manifest(&dirs)
.map_err(|e| match e {
DirstateMapError::InvalidPath(e) => e,
_ => unreachable!(),
})?
.iter()
Raphaël Gomès
rust: do a clippy pass...
r45500 .map(ToOwned::to_owned),
Raphaël Gomès
rust-matchers: add functions to get roots, dirs and parents from patterns...
r45007 );
parents.extend(
DirsMultiset::from_manifest(&roots)
.map_err(|e| match e {
DirstateMapError::InvalidPath(e) => e,
_ => unreachable!(),
})?
.iter()
Raphaël Gomès
rust: do a clippy pass...
r45500 .map(ToOwned::to_owned),
Raphaël Gomès
rust-matchers: add functions to get roots, dirs and parents from patterns...
r45007 );
Ok(RootsDirsAndParents {
roots: HashSet::from_iter(roots),
dirs: HashSet::from_iter(dirs),
parents,
})
}
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 /// Returns a function that checks whether a given file (in the general sense)
/// should be matched.
fn build_match<'a, 'b>(
ignore_patterns: &'a [IgnorePattern],
root_dir: impl AsRef<Path>,
) -> PatternResult<(
Vec<u8>,
Box<dyn Fn(&HgPath) -> bool + 'b + Sync>,
Vec<PatternFileWarning>,
)> {
let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![];
// For debugging and printing
let mut patterns = vec![];
let mut all_warnings = vec![];
let (subincludes, ignore_patterns) =
filter_subincludes(ignore_patterns, root_dir)?;
if !subincludes.is_empty() {
// Build prefix-based matcher functions for subincludes
let mut submatchers = FastHashMap::default();
let mut prefixes = vec![];
for SubInclude { prefix, root, path } in subincludes.into_iter() {
Raphaël Gomès
rust-status: only involve ignore mechanism when needed...
r45088 let (match_fn, warnings) =
get_ignore_function(vec![path.to_path_buf()], root)?;
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 all_warnings.extend(warnings);
prefixes.push(prefix.to_owned());
submatchers.insert(prefix.to_owned(), match_fn);
}
let match_subinclude = move |filename: &HgPath| {
for prefix in prefixes.iter() {
if let Some(rel) = filename.relative_to(prefix) {
Raphaël Gomès
rust: do a clippy pass...
r45500 if (submatchers[prefix])(rel) {
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 return true;
}
}
}
false
};
match_funcs.push(Box::new(match_subinclude));
}
if !ignore_patterns.is_empty() {
// Either do dumb matching if all patterns are rootfiles, or match
// with a regex.
if ignore_patterns
.iter()
.all(|k| k.syntax == PatternSyntax::RootFiles)
{
let dirs: HashSet<_> = ignore_patterns
.iter()
.map(|k| k.pattern.to_owned())
.collect();
let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
let match_func = move |path: &HgPath| -> bool {
let path = path.as_bytes();
let i = path.iter().rfind(|a| **a == b'/');
let dir = if let Some(i) = i {
&path[..*i as usize]
} else {
b"."
};
dirs.contains(dir.deref())
};
match_funcs.push(Box::new(match_func));
patterns.extend(b"rootfilesin: ");
dirs_vec.sort();
patterns.extend(dirs_vec.escaped_bytes());
} else {
let (new_re, match_func) = build_regex_match(&ignore_patterns)?;
patterns = new_re;
match_funcs.push(match_func)
}
}
Ok(if match_funcs.len() == 1 {
(patterns, match_funcs.remove(0), all_warnings)
} else {
(
patterns,
Box::new(move |f: &HgPath| -> bool {
match_funcs.iter().any(|match_func| match_func(f))
}),
all_warnings,
)
})
}
/// Parses all "ignore" files with their recursive includes and returns a
/// function that checks whether a given file (in the general sense) should be
/// ignored.
pub fn get_ignore_function<'a>(
Raphaël Gomès
rust-status: only involve ignore mechanism when needed...
r45088 all_pattern_files: Vec<PathBuf>,
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 root_dir: impl AsRef<Path>,
) -> PatternResult<(
Raphaël Gomès
rust-status: only involve ignore mechanism when needed...
r45088 Box<dyn for<'r> Fn(&'r HgPath) -> bool + Sync + 'a>,
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 Vec<PatternFileWarning>,
)> {
let mut all_patterns = vec![];
let mut all_warnings = vec![];
for pattern_file in all_pattern_files.into_iter() {
let (patterns, warnings) =
get_patterns_from_file(pattern_file, &root_dir)?;
Raphaël Gomès
rust-status: only involve ignore mechanism when needed...
r45088 all_patterns.extend(patterns.to_owned());
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 all_warnings.extend(warnings);
}
let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?;
all_warnings.extend(warnings);
Raphaël Gomès
rust-status: only involve ignore mechanism when needed...
r45088 Ok((
Box::new(move |path: &HgPath| matcher.matches(path)),
all_warnings,
))
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 }
impl<'a> IncludeMatcher<'a> {
pub fn new(
ignore_patterns: Vec<IgnorePattern>,
root_dir: impl AsRef<Path>,
) -> PatternResult<(Self, Vec<PatternFileWarning>)> {
let (patterns, match_fn, warnings) =
build_match(&ignore_patterns, root_dir)?;
let RootsDirsAndParents {
roots,
dirs,
parents,
} = roots_dirs_and_parents(&ignore_patterns)?;
let prefix = ignore_patterns.iter().any(|k| match k.syntax {
PatternSyntax::Path | PatternSyntax::RelPath => true,
_ => false,
});
Ok((
Self {
patterns,
match_fn,
prefix,
roots,
dirs,
parents,
},
warnings,
))
}
fn get_all_parents_children(&self) -> DirsChildrenMultiset {
// TODO cache
let thing = self
.dirs
.iter()
.chain(self.roots.iter())
.chain(self.parents.iter());
DirsChildrenMultiset::new(thing, Some(&self.parents))
}
}
impl<'a> Display for IncludeMatcher<'a> {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
Raphaël Gomès
rust-matchers: add TODO about incomplete `Display` for `IncludeMatcher`...
r45312 // XXX What about exact matches?
// I'm not sure it's worth it to clone the HashSet and keep it
// around just in case someone wants to display the matcher, plus
// it's going to be unreadable after a few entries, but we need to
// inform in this display that exact matches are being used and are
// (on purpose) missing from the `includes`.
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 write!(
f,
"IncludeMatcher(includes='{}')",
String::from_utf8_lossy(&self.patterns.escaped_bytes())
)
}
}
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 #[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
Raphaël Gomès
rust-matchers: add functions to get roots, dirs and parents from patterns...
r45007 use std::path::Path;
#[test]
fn test_roots_and_dirs() {
let pats = vec![
IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
];
let (roots, dirs) = roots_and_dirs(&pats);
assert_eq!(
roots,
vec!(
HgPathBuf::from_bytes(b"g/h"),
HgPathBuf::from_bytes(b"g/h"),
HgPathBuf::new()
),
);
assert_eq!(dirs, vec!());
}
#[test]
fn test_roots_dirs_and_parents() {
let pats = vec![
IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
];
let mut roots = HashSet::new();
roots.insert(HgPathBuf::from_bytes(b"g/h"));
roots.insert(HgPathBuf::new());
let dirs = HashSet::new();
let mut parents = HashSet::new();
parents.insert(HgPathBuf::new());
parents.insert(HgPathBuf::from_bytes(b"g"));
assert_eq!(
roots_dirs_and_parents(&pats).unwrap(),
Raphaël Gomès
rust-status: refactor options into a `StatusOptions` struct...
r45011 RootsDirsAndParents {
roots,
dirs,
parents
}
Raphaël Gomès
rust-matchers: add functions to get roots, dirs and parents from patterns...
r45007 );
}
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828
#[test]
fn test_filematcher_visit_children_set() {
// Visitchildrenset
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 let files = vec![HgPathBuf::from_bytes(b"dir/subdir/foo.txt")];
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 let matcher = FileMatcher::new(&files).unwrap();
let mut set = HashSet::new();
set.insert(HgPath::new(b"dir"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
set.insert(HgPath::new(b"subdir"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
set.insert(HgPath::new(b"foo.txt"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir")),
VisitChildrenSet::Set(set)
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"folder")),
VisitChildrenSet::Empty
);
}
#[test]
fn test_filematcher_visit_children_set_files_and_dirs() {
let files = vec![
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 HgPathBuf::from_bytes(b"rootfile.txt"),
HgPathBuf::from_bytes(b"a/file1.txt"),
HgPathBuf::from_bytes(b"a/b/file2.txt"),
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 // No file in a/b/c
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 HgPathBuf::from_bytes(b"a/b/c/d/file4.txt"),
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 ];
let matcher = FileMatcher::new(&files).unwrap();
let mut set = HashSet::new();
set.insert(HgPath::new(b"a"));
set.insert(HgPath::new(b"rootfile.txt"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
set.insert(HgPath::new(b"b"));
set.insert(HgPath::new(b"file1.txt"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"a")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
set.insert(HgPath::new(b"c"));
set.insert(HgPath::new(b"file2.txt"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"a/b")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
set.insert(HgPath::new(b"d"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"a/b/c")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
set.insert(HgPath::new(b"file4.txt"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
VisitChildrenSet::Set(set)
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"folder")),
VisitChildrenSet::Empty
);
}
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009
#[test]
fn test_includematcher() {
// VisitchildrensetPrefix
let (matcher, _) = IncludeMatcher::new(
vec![IgnorePattern::new(
PatternSyntax::RelPath,
b"dir/subdir",
Path::new(""),
)],
"",
)
.unwrap();
let mut set = HashSet::new();
set.insert(HgPath::new(b"dir"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
set.insert(HgPath::new(b"subdir"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir")),
VisitChildrenSet::Set(set)
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir")),
VisitChildrenSet::Recursive
);
// OPT: This should probably be 'all' if its parent is?
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
VisitChildrenSet::This
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"folder")),
VisitChildrenSet::Empty
);
// VisitchildrensetRootfilesin
let (matcher, _) = IncludeMatcher::new(
vec![IgnorePattern::new(
PatternSyntax::RootFiles,
b"dir/subdir",
Path::new(""),
)],
"",
)
.unwrap();
let mut set = HashSet::new();
set.insert(HgPath::new(b"dir"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
set.insert(HgPath::new(b"subdir"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir")),
VisitChildrenSet::Set(set)
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir")),
VisitChildrenSet::This
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"folder")),
VisitChildrenSet::Empty
);
// VisitchildrensetGlob
let (matcher, _) = IncludeMatcher::new(
vec![IgnorePattern::new(
PatternSyntax::Glob,
b"dir/z*",
Path::new(""),
)],
"",
)
.unwrap();
let mut set = HashSet::new();
set.insert(HgPath::new(b"dir"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"")),
VisitChildrenSet::Set(set)
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"folder")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir")),
VisitChildrenSet::This
);
// OPT: these should probably be set().
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir")),
VisitChildrenSet::This
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
VisitChildrenSet::This
);
}
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 }