##// END OF EJS Templates
dirstate-v2: complain early on docket name collision...
dirstate-v2: complain early on docket name collision The alternative is that the dirstate gets deleted so the corruption persists and is hard to investigate. This happened to me in tests, where the dirstate names are taken from file, since the file got reverted. I expect this can also happen in prod with non-trivial probability (1/4 billion).

File last commit:

r50825:e98fd81b default
r50992:ca9d65d6 stable
Show More
matchers.rs
1722 lines | 54.1 KiB | application/rls-services+xml | RustLexer
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 // matchers.rs
//
// Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
//! Structs and types for matching files and directories.
Raphaël Gomès
rust-matchers: add function to generate a regex matcher function...
r45006 use crate::{
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 dirstate::dirs_multiset::DirsChildrenMultiset,
filepatterns::{
build_single_regex, filter_subincludes, get_patterns_from_file,
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 PatternFileWarning, PatternResult,
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 },
utils::{
files::find_dirs,
hg_path::{HgPath, HgPathBuf},
Escaped,
},
DirsMultiset, DirstateMapError, FastHashMap, IgnorePattern, PatternError,
Raphaël Gomès
rust-matchers: add functions to get roots, dirs and parents from patterns...
r45007 PatternSyntax,
Raphaël Gomès
rust-matchers: add function to generate a regex matcher function...
r45006 };
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009
Arseniy Alekseyev
rhg: refactor to use IgnoreFnType alias more widely...
r49177 use crate::dirstate::status::IgnoreFnType;
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 use crate::filepatterns::normalize_path_bytes;
Raphaël Gomès
rust-status: only involve ignore mechanism when needed...
r45088 use std::borrow::ToOwned;
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 use std::collections::HashSet;
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 use std::fmt::{Display, Error, Formatter};
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 use std::iter::FromIterator;
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 use std::ops::Deref;
Raphaël Gomès
rust-status: only involve ignore mechanism when needed...
r45088 use std::path::{Path, PathBuf};
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742
Raphaël Gomès
rust-matchers: add timing tracing to regex compilation...
r45288 use micro_timer::timed;
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 #[derive(Debug, PartialEq)]
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 pub enum VisitChildrenSet {
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 /// Don't visit anything
Empty,
/// Only visit this directory
This,
/// Visit this directory and these subdirectories
/// TODO Should we implement a `NonEmptyHashSet`?
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 Set(HashSet<HgPathBuf>),
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 /// Visit this directory and all subdirectories
Recursive,
}
Raphaël Gomès
rust: add Debug constraint to Matcher trait...
r50381 pub trait Matcher: core::fmt::Debug {
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 /// Explicitly listed files
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 fn file_set(&self) -> Option<&HashSet<HgPathBuf>>;
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 /// Returns whether `filename` is in `file_set`
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn exact_match(&self, filename: &HgPath) -> bool;
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 /// Returns whether `filename` is matched by this matcher
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn matches(&self, filename: &HgPath) -> bool;
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 /// Decides whether a directory should be visited based on whether it
/// has potential matches in it or one of its subdirectories, and
/// potentially lists which subdirectories of that directory should be
/// visited. This is based on the match's primary, included, and excluded
/// patterns.
///
/// # Example
///
/// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
/// return the following values (assuming the implementation of
/// visit_children_set is capable of recognizing this; some implementations
/// are not).
///
Georges Racinet
rust-matchers: fixing cargo doc...
r44458 /// ```text
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 /// ```ignore
/// '' -> {'foo', 'qux'}
/// 'baz' -> set()
/// 'foo' -> {'bar'}
/// // Ideally this would be `Recursive`, but since the prefix nature of
/// // matchers is applied to the entire matcher, we have to downgrade this
/// // to `This` due to the (yet to be implemented in Rust) non-prefix
/// // `RootFilesIn'-kind matcher being mixed in.
/// 'foo/bar' -> 'this'
/// 'qux' -> 'this'
/// ```
/// # Important
///
/// Most matchers do not know if they're representing files or
/// directories. They see `['path:dir/f']` and don't know whether `f` is a
/// file or a directory, so `visit_children_set('dir')` for most matchers
/// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
/// a file (like the yet to be implemented in Rust `ExactMatcher` does),
/// it may return `VisitChildrenSet::This`.
/// Do not rely on the return being a `HashSet` indicating that there are
/// no files in this dir to investigate (or equivalently that if there are
/// files to investigate in 'dir' that it will always return
/// `VisitChildrenSet::This`).
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet;
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 /// Matcher will match everything and `files_set()` will be empty:
/// optimization might be possible.
Raphaël Gomès
rust-matchers: remove default implementations for `Matcher` trait...
r44009 fn matches_everything(&self) -> bool;
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 /// Matcher will match exactly the files in `files_set()`: optimization
/// might be possible.
Raphaël Gomès
rust-matchers: remove default implementations for `Matcher` trait...
r44009 fn is_exact(&self) -> bool;
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 }
/// Matches everything.
Raphaël Gomès
rust-matchers: add doctests for `AlwaysMatcher`...
r44286 ///```
/// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
///
/// let matcher = AlwaysMatcher;
///
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
/// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
/// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
/// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
Raphaël Gomès
rust-matchers: add doctests for `AlwaysMatcher`...
r44286 /// ```
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 #[derive(Debug)]
pub struct AlwaysMatcher;
impl Matcher for AlwaysMatcher {
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
Raphaël Gomès
rust-matchers: improve `Matcher` trait ergonomics...
r44284 None
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 }
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn exact_match(&self, _filename: &HgPath) -> bool {
Raphaël Gomès
rust-matchers: remove default implementations for `Matcher` trait...
r44009 false
}
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn matches(&self, _filename: &HgPath) -> bool {
Raphaël Gomès
rust-matchers: remove default implementations for `Matcher` trait...
r44009 true
}
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 VisitChildrenSet::Recursive
}
Raphaël Gomès
rust-matchers: remove default implementations for `Matcher` trait...
r44009 fn matches_everything(&self) -> bool {
true
}
fn is_exact(&self) -> bool {
false
}
Raphaël Gomès
rust-matchers: add `Matcher` trait and implement `AlwaysMatcher`...
r43742 }
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366
Raphaël Gomès
rust-dirstate: add support for nevermatcher...
r50247 /// Matches nothing.
#[derive(Debug)]
pub struct NeverMatcher;
impl Matcher for NeverMatcher {
fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
None
}
fn exact_match(&self, _filename: &HgPath) -> bool {
false
}
fn matches(&self, _filename: &HgPath) -> bool {
false
}
fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet {
VisitChildrenSet::Empty
}
fn matches_everything(&self) -> bool {
false
}
fn is_exact(&self) -> bool {
true
}
}
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 /// Matches the input files exactly. They are interpreted as paths, not
/// patterns.
///
///```
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::{HgPath, HgPathBuf} };
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 ///
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 /// let files = vec![HgPathBuf::from_bytes(b"a.txt"), HgPathBuf::from_bytes(br"re:.*\.c$")];
/// let matcher = FileMatcher::new(files).unwrap();
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 ///
/// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
/// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
/// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
/// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
/// ```
#[derive(Debug)]
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 pub struct FileMatcher {
files: HashSet<HgPathBuf>,
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 dirs: DirsMultiset,
}
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 impl FileMatcher {
pub fn new(files: Vec<HgPathBuf>) -> Result<Self, DirstateMapError> {
let dirs = DirsMultiset::from_manifest(&files)?;
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 Ok(Self {
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 files: HashSet::from_iter(files.into_iter()),
dirs,
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 })
}
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn inner_matches(&self, filename: &HgPath) -> bool {
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 self.files.contains(filename.as_ref())
}
}
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 impl Matcher for FileMatcher {
fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 Some(&self.files)
}
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn exact_match(&self, filename: &HgPath) -> bool {
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 self.inner_matches(filename)
}
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn matches(&self, filename: &HgPath) -> bool {
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 self.inner_matches(filename)
}
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 if self.files.is_empty() || !self.dirs.contains(&directory) {
return VisitChildrenSet::Empty;
}
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 let mut candidates: HashSet<HgPathBuf> =
self.dirs.iter().cloned().collect();
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 candidates.extend(self.files.iter().cloned());
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 candidates.remove(HgPath::new(b""));
if !directory.as_ref().is_empty() {
let directory = [directory.as_ref().as_bytes(), b"/"].concat();
candidates = candidates
.iter()
.filter_map(|c| {
if c.as_bytes().starts_with(&directory) {
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 Some(HgPathBuf::from_bytes(
&c.as_bytes()[directory.len()..],
))
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 } else {
None
}
})
.collect();
}
// `self.dirs` includes all of the directories, recursively, so if
// we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
// 'foo/bar' in it. Thus we can safely ignore a candidate that has a
// '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
// subdir will be in there without a slash.
VisitChildrenSet::Set(
candidates
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 .into_iter()
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 .filter_map(|c| {
if c.bytes().all(|b| *b != b'/') {
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 Some(c)
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 } else {
None
}
})
.collect(),
)
Raphaël Gomès
rust-matchers: add `FileMatcher` implementation...
r44366 }
fn matches_everything(&self) -> bool {
false
}
fn is_exact(&self) -> bool {
true
}
}
Raphaël Gomès
rust-matchers: add function to generate a regex matcher function...
r45006
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 /// Matches files that are included in the ignore rules.
Raphaël Gomès
rust: remove support for `re2`...
r45406 /// ```
/// use hg::{
/// matchers::{IncludeMatcher, Matcher},
/// IgnorePattern,
/// PatternSyntax,
/// utils::hg_path::HgPath
/// };
/// use std::path::Path;
/// ///
/// let ignore_patterns =
/// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 /// let matcher = IncludeMatcher::new(ignore_patterns).unwrap();
Raphaël Gomès
rust: remove support for `re2`...
r45406 /// ///
/// assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
/// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
/// assert_eq!(matcher.matches(HgPath::new(b"this also")), true);
/// assert_eq!(matcher.matches(HgPath::new(b"but not this")), false);
/// ```
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 pub struct IncludeMatcher<'a> {
patterns: Vec<u8>,
Arseniy Alekseyev
rhg: refactor to use IgnoreFnType alias more widely...
r49177 match_fn: IgnoreFnType<'a>,
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 /// Whether all the patterns match a prefix (i.e. recursively)
prefix: bool,
roots: HashSet<HgPathBuf>,
dirs: HashSet<HgPathBuf>,
parents: HashSet<HgPathBuf>,
}
Raphaël Gomès
rust: add Debug constraint to Matcher trait...
r50381 impl core::fmt::Debug for IncludeMatcher<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("IncludeMatcher")
.field("patterns", &String::from_utf8_lossy(&self.patterns))
.field("prefix", &self.prefix)
.field("roots", &self.roots)
.field("dirs", &self.dirs)
.field("parents", &self.parents)
.finish()
}
}
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 impl<'a> Matcher for IncludeMatcher<'a> {
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 None
}
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn exact_match(&self, _filename: &HgPath) -> bool {
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 false
}
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn matches(&self, filename: &HgPath) -> bool {
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 (self.match_fn)(filename.as_ref())
}
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 let dir = directory.as_ref();
if self.prefix && self.roots.contains(dir) {
return VisitChildrenSet::Recursive;
}
if self.roots.contains(HgPath::new(b""))
|| self.roots.contains(dir)
|| self.dirs.contains(dir)
|| find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir))
{
return VisitChildrenSet::This;
}
if self.parents.contains(directory.as_ref()) {
let multiset = self.get_all_parents_children();
if let Some(children) = multiset.get(dir) {
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 return VisitChildrenSet::Set(
children.into_iter().map(HgPathBuf::from).collect(),
);
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 }
}
VisitChildrenSet::Empty
}
fn matches_everything(&self) -> bool {
false
}
fn is_exact(&self) -> bool {
false
}
}
Raphaël Gomès
rust: add UnionMatcher...
r50243 /// The union of multiple matchers. Will match if any of the matchers match.
Raphaël Gomès
rust: add Debug constraint to Matcher trait...
r50381 #[derive(Debug)]
Raphaël Gomès
rust: add UnionMatcher...
r50243 pub struct UnionMatcher {
matchers: Vec<Box<dyn Matcher + Sync>>,
}
impl Matcher for UnionMatcher {
fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
None
}
fn exact_match(&self, _filename: &HgPath) -> bool {
false
}
fn matches(&self, filename: &HgPath) -> bool {
self.matchers.iter().any(|m| m.matches(filename))
}
fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
let mut result = HashSet::new();
let mut this = false;
for matcher in self.matchers.iter() {
let visit = matcher.visit_children_set(directory);
match visit {
VisitChildrenSet::Empty => continue,
VisitChildrenSet::This => {
this = true;
// Don't break, we might have an 'all' in here.
continue;
}
VisitChildrenSet::Set(set) => {
result.extend(set);
}
VisitChildrenSet::Recursive => {
return visit;
}
}
}
if this {
return VisitChildrenSet::This;
}
if result.is_empty() {
VisitChildrenSet::Empty
} else {
VisitChildrenSet::Set(result)
}
}
fn matches_everything(&self) -> bool {
// TODO Maybe if all are AlwaysMatcher?
false
}
fn is_exact(&self) -> bool {
false
}
}
impl UnionMatcher {
pub fn new(matchers: Vec<Box<dyn Matcher + Sync>>) -> Self {
Self { matchers }
}
}
Raphaël Gomès
rust: add Debug constraint to Matcher trait...
r50381 #[derive(Debug)]
Raphaël Gomès
rust: add IntersectionMatcher...
r50245 pub struct IntersectionMatcher {
m1: Box<dyn Matcher + Sync>,
m2: Box<dyn Matcher + Sync>,
files: Option<HashSet<HgPathBuf>>,
}
impl Matcher for IntersectionMatcher {
fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
self.files.as_ref()
}
fn exact_match(&self, filename: &HgPath) -> bool {
self.files.as_ref().map_or(false, |f| f.contains(filename))
}
fn matches(&self, filename: &HgPath) -> bool {
self.m1.matches(filename) && self.m2.matches(filename)
}
fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
let m1_set = self.m1.visit_children_set(directory);
if m1_set == VisitChildrenSet::Empty {
return VisitChildrenSet::Empty;
}
let m2_set = self.m2.visit_children_set(directory);
if m2_set == VisitChildrenSet::Empty {
return VisitChildrenSet::Empty;
}
if m1_set == VisitChildrenSet::Recursive {
return m2_set;
} else if m2_set == VisitChildrenSet::Recursive {
return m1_set;
}
match (&m1_set, &m2_set) {
(VisitChildrenSet::Recursive, _) => m2_set,
(_, VisitChildrenSet::Recursive) => m1_set,
(VisitChildrenSet::This, _) | (_, VisitChildrenSet::This) => {
VisitChildrenSet::This
}
(VisitChildrenSet::Set(m1), VisitChildrenSet::Set(m2)) => {
let set: HashSet<_> = m1.intersection(&m2).cloned().collect();
if set.is_empty() {
VisitChildrenSet::Empty
} else {
VisitChildrenSet::Set(set)
}
}
_ => unreachable!(),
}
}
fn matches_everything(&self) -> bool {
self.m1.matches_everything() && self.m2.matches_everything()
}
fn is_exact(&self) -> bool {
self.m1.is_exact() || self.m2.is_exact()
}
}
impl IntersectionMatcher {
pub fn new(
mut m1: Box<dyn Matcher + Sync>,
mut m2: Box<dyn Matcher + Sync>,
) -> Self {
let files = if m1.is_exact() || m2.is_exact() {
if !m1.is_exact() {
std::mem::swap(&mut m1, &mut m2);
}
m1.file_set().map(|m1_files| {
m1_files.iter().cloned().filter(|f| m2.matches(f)).collect()
})
} else {
None
};
Self { m1, m2, files }
}
}
Raphaël Gomès
rust: add Debug constraint to Matcher trait...
r50381 #[derive(Debug)]
Raphaël Gomès
rust-matchers: implement DifferenceMatcher...
r50373 pub struct DifferenceMatcher {
base: Box<dyn Matcher + Sync>,
excluded: Box<dyn Matcher + Sync>,
files: Option<HashSet<HgPathBuf>>,
}
impl Matcher for DifferenceMatcher {
fn file_set(&self) -> Option<&HashSet<HgPathBuf>> {
self.files.as_ref()
}
fn exact_match(&self, filename: &HgPath) -> bool {
self.files.as_ref().map_or(false, |f| f.contains(filename))
}
fn matches(&self, filename: &HgPath) -> bool {
self.base.matches(filename) && !self.excluded.matches(filename)
}
fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
let excluded_set = self.excluded.visit_children_set(directory);
if excluded_set == VisitChildrenSet::Recursive {
return VisitChildrenSet::Empty;
}
let base_set = self.base.visit_children_set(directory);
// Possible values for base: 'recursive', 'this', set(...), set()
// Possible values for excluded: 'this', set(...), set()
// If excluded has nothing under here that we care about, return base,
// even if it's 'recursive'.
if excluded_set == VisitChildrenSet::Empty {
return base_set;
}
match base_set {
VisitChildrenSet::This | VisitChildrenSet::Recursive => {
// Never return 'recursive' here if excluded_set is any kind of
// non-empty (either 'this' or set(foo)), since excluded might
// return set() for a subdirectory.
VisitChildrenSet::This
}
set => {
// Possible values for base: set(...), set()
// Possible values for excluded: 'this', set(...)
// We ignore excluded set results. They're possibly incorrect:
// base = path:dir/subdir
// excluded=rootfilesin:dir,
// visit_children_set(''):
// base returns {'dir'}, excluded returns {'dir'}, if we
// subtracted we'd return set(), which is *not* correct, we
// still need to visit 'dir'!
set
}
}
}
fn matches_everything(&self) -> bool {
false
}
fn is_exact(&self) -> bool {
self.base.is_exact()
}
}
impl DifferenceMatcher {
pub fn new(
base: Box<dyn Matcher + Sync>,
excluded: Box<dyn Matcher + Sync>,
) -> Self {
let base_is_exact = base.is_exact();
let base_files = base.file_set().map(ToOwned::to_owned);
let mut new = Self {
base,
excluded,
files: None,
};
if base_is_exact {
new.files = base_files.map(|files| {
files.iter().cloned().filter(|f| new.matches(f)).collect()
});
}
new
}
}
Raphaël Gomès
rust: create wrapper struct to reduce `regex` contention issues...
r50476 /// Wraps [`regex::bytes::Regex`] to improve performance in multithreaded
/// contexts.
///
/// The `status` algorithm makes heavy use of threads, and calling `is_match`
/// from many threads at once is prone to contention, probably within the
/// scratch space needed as the regex DFA is built lazily.
///
/// We are in the process of raising the issue upstream, but for now
/// the workaround used here is to store the `Regex` in a lazily populated
/// thread-local variable, sharing the initial read-only compilation, but
/// not the lazy dfa scratch space mentioned above.
///
/// This reduces the contention observed with 16+ threads, but does not
/// completely remove it. Hopefully this can be addressed upstream.
struct RegexMatcher {
/// Compiled at the start of the status algorithm, used as a base for
/// cloning in each thread-local `self.local`, thus sharing the expensive
/// first compilation.
base: regex::bytes::Regex,
/// Thread-local variable that holds the `Regex` that is actually queried
/// from each thread.
local: thread_local::ThreadLocal<regex::bytes::Regex>,
}
impl RegexMatcher {
/// Returns whether the path matches the stored `Regex`.
pub fn is_match(&self, path: &HgPath) -> bool {
self.local
.get_or(|| self.base.clone())
.is_match(path.as_bytes())
}
}
Raphaël Gomès
rust-matchers: use the `regex` crate...
r45084 /// Returns a function that matches an `HgPath` against the given regex
/// pattern.
///
/// This can fail when the pattern is invalid or not supported by the
/// underlying engine (the `regex` crate), for instance anything with
/// back-references.
Raphaël Gomès
rust-matchers: add timing tracing to regex compilation...
r45288 #[timed]
Raphaël Gomès
rust: create wrapper struct to reduce `regex` contention issues...
r50476 fn re_matcher(pattern: &[u8]) -> PatternResult<RegexMatcher> {
Raphaël Gomès
rust-matchers: use the `regex` crate...
r45084 use std::io::Write;
Raphaël Gomès
rust-regex: fix issues with regex anchoring and performance...
r45347 // The `regex` crate adds `.*` to the start and end of expressions if there
// are no anchors, so add the start anchor.
let mut escaped_bytes = vec![b'^', b'(', b'?', b':'];
Raphaël Gomès
rust-matchers: use the `regex` crate...
r45084 for byte in pattern {
if *byte > 127 {
write!(escaped_bytes, "\\x{:x}", *byte).unwrap();
} else {
escaped_bytes.push(*byte);
}
}
Raphaël Gomès
rust-regex: fix issues with regex anchoring and performance...
r45347 escaped_bytes.push(b')');
Raphaël Gomès
rust-matchers: use the `regex` crate...
r45084
// Avoid the cost of UTF8 checking
//
// # Safety
// This is safe because we escaped all non-ASCII bytes.
let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) };
let re = regex::bytes::RegexBuilder::new(&pattern_string)
.unicode(false)
Raphaël Gomès
rust-regex: increase the DFA size limit for the `regex` crate...
r45286 // Big repos with big `.hgignore` will hit the default limit and
// incur a significant performance hit. One repo's `hg status` hit
// multiple *minutes*.
.dfa_size_limit(50 * (1 << 20))
Raphaël Gomès
rust-matchers: use the `regex` crate...
r45084 .build()
.map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?;
Raphaël Gomès
rust: create wrapper struct to reduce `regex` contention issues...
r50476 Ok(RegexMatcher {
base: re,
local: Default::default(),
})
Raphaël Gomès
rust-matchers: add function to generate a regex matcher function...
r45006 }
Raphaël Gomès
rust-matchers: add `build_regex_match` function...
r45008 /// Returns the regex pattern and a function that matches an `HgPath` against
/// said regex formed by the given ignore patterns.
Arseniy Alekseyev
rhg: refactor to use IgnoreFnType alias more widely...
r49177 fn build_regex_match<'a, 'b>(
ignore_patterns: &'a [IgnorePattern],
) -> PatternResult<(Vec<u8>, IgnoreFnType<'b>)> {
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 let mut regexps = vec![];
let mut exact_set = HashSet::new();
for pattern in ignore_patterns {
if let Some(re) = build_single_regex(pattern)? {
regexps.push(re);
} else {
let exact = normalize_path_bytes(&pattern.pattern);
exact_set.insert(HgPathBuf::from_bytes(&exact));
}
}
Raphaël Gomès
rust-matchers: add `build_regex_match` function...
r45008 let full_regex = regexps.join(&b'|');
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 // An empty pattern would cause the regex engine to incorrectly match the
// (empty) root directory
let func = if !(regexps.is_empty()) {
let matcher = re_matcher(&full_regex)?;
let func = move |filename: &HgPath| {
Raphaël Gomès
rust: create wrapper struct to reduce `regex` contention issues...
r50476 exact_set.contains(filename) || matcher.is_match(filename)
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 };
Arseniy Alekseyev
rhg: refactor to use IgnoreFnType alias more widely...
r49177 Box::new(func) as IgnoreFnType
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 } else {
let func = move |filename: &HgPath| exact_set.contains(filename);
Arseniy Alekseyev
rhg: refactor to use IgnoreFnType alias more widely...
r49177 Box::new(func) as IgnoreFnType
Raphaël Gomès
rust-filepatterns: match exact `rootglob`s with a `HashSet`, not in the regex...
r45311 };
Raphaël Gomès
rust-matchers: add `build_regex_match` function...
r45008
Ok((full_regex, func))
}
Raphaël Gomès
rust-matchers: add functions to get roots, dirs and parents from patterns...
r45007 /// Returns roots and directories corresponding to each pattern.
///
/// This calculates the roots and directories exactly matching the patterns and
/// returns a tuple of (roots, dirs). It does not return other directories
/// which may also need to be considered, like the parent directories.
fn roots_and_dirs(
ignore_patterns: &[IgnorePattern],
) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) {
let mut roots = Vec::new();
let mut dirs = Vec::new();
for ignore_pattern in ignore_patterns {
let IgnorePattern {
syntax, pattern, ..
} = ignore_pattern;
match syntax {
PatternSyntax::RootGlob | PatternSyntax::Glob => {
Arseniy Alekseyev
rhg: more efficient `HgPath::join`...
r49132 let mut root = HgPathBuf::new();
Raphaël Gomès
rust-matchers: add functions to get roots, dirs and parents from patterns...
r45007 for p in pattern.split(|c| *c == b'/') {
if p.iter().any(|c| match *c {
b'[' | b'{' | b'*' | b'?' => true,
_ => false,
}) {
break;
}
Arseniy Alekseyev
rhg: more efficient `HgPath::join`...
r49132 root.push(HgPathBuf::from_bytes(p).as_ref());
Raphaël Gomès
rust-matchers: add functions to get roots, dirs and parents from patterns...
r45007 }
Arseniy Alekseyev
rhg: more efficient `HgPath::join`...
r49132 roots.push(root);
Raphaël Gomès
rust-matchers: add functions to get roots, dirs and parents from patterns...
r45007 }
PatternSyntax::Path | PatternSyntax::RelPath => {
let pat = HgPath::new(if pattern == b"." {
&[] as &[u8]
} else {
pattern
});
roots.push(pat.to_owned());
}
PatternSyntax::RootFiles => {
let pat = if pattern == b"." {
&[] as &[u8]
} else {
pattern
};
dirs.push(HgPathBuf::from_bytes(pat));
}
_ => {
roots.push(HgPathBuf::new());
}
}
}
(roots, dirs)
}
/// Paths extracted from patterns
#[derive(Debug, PartialEq)]
struct RootsDirsAndParents {
/// Directories to match recursively
pub roots: HashSet<HgPathBuf>,
/// Directories to match non-recursively
pub dirs: HashSet<HgPathBuf>,
/// Implicitly required directories to go to items in either roots or dirs
pub parents: HashSet<HgPathBuf>,
}
/// Extract roots, dirs and parents from patterns.
fn roots_dirs_and_parents(
ignore_patterns: &[IgnorePattern],
) -> PatternResult<RootsDirsAndParents> {
let (roots, dirs) = roots_and_dirs(ignore_patterns);
let mut parents = HashSet::new();
parents.extend(
DirsMultiset::from_manifest(&dirs)
.map_err(|e| match e {
DirstateMapError::InvalidPath(e) => e,
_ => unreachable!(),
})?
.iter()
Raphaël Gomès
rust: do a clippy pass...
r45500 .map(ToOwned::to_owned),
Raphaël Gomès
rust-matchers: add functions to get roots, dirs and parents from patterns...
r45007 );
parents.extend(
DirsMultiset::from_manifest(&roots)
.map_err(|e| match e {
DirstateMapError::InvalidPath(e) => e,
_ => unreachable!(),
})?
.iter()
Raphaël Gomès
rust: do a clippy pass...
r45500 .map(ToOwned::to_owned),
Raphaël Gomès
rust-matchers: add functions to get roots, dirs and parents from patterns...
r45007 );
Ok(RootsDirsAndParents {
roots: HashSet::from_iter(roots),
dirs: HashSet::from_iter(dirs),
parents,
})
}
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 /// Returns a function that checks whether a given file (in the general sense)
/// should be matched.
fn build_match<'a, 'b>(
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 ignore_patterns: Vec<IgnorePattern>,
Arseniy Alekseyev
rhg: refactor to use IgnoreFnType alias more widely...
r49177 ) -> PatternResult<(Vec<u8>, IgnoreFnType<'b>)> {
let mut match_funcs: Vec<IgnoreFnType<'b>> = vec![];
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 // For debugging and printing
let mut patterns = vec![];
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 let (subincludes, ignore_patterns) = filter_subincludes(ignore_patterns)?;
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009
if !subincludes.is_empty() {
// Build prefix-based matcher functions for subincludes
let mut submatchers = FastHashMap::default();
let mut prefixes = vec![];
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 for sub_include in subincludes {
let matcher = IncludeMatcher::new(sub_include.included_patterns)?;
let match_fn =
Box::new(move |path: &HgPath| matcher.matches(path));
prefixes.push(sub_include.prefix.clone());
submatchers.insert(sub_include.prefix.clone(), match_fn);
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 }
let match_subinclude = move |filename: &HgPath| {
for prefix in prefixes.iter() {
if let Some(rel) = filename.relative_to(prefix) {
Raphaël Gomès
rust: do a clippy pass...
r45500 if (submatchers[prefix])(rel) {
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 return true;
}
}
}
false
};
match_funcs.push(Box::new(match_subinclude));
}
if !ignore_patterns.is_empty() {
// Either do dumb matching if all patterns are rootfiles, or match
// with a regex.
if ignore_patterns
.iter()
.all(|k| k.syntax == PatternSyntax::RootFiles)
{
let dirs: HashSet<_> = ignore_patterns
.iter()
.map(|k| k.pattern.to_owned())
.collect();
let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect();
let match_func = move |path: &HgPath| -> bool {
let path = path.as_bytes();
let i = path.iter().rfind(|a| **a == b'/');
let dir = if let Some(i) = i {
&path[..*i as usize]
} else {
b"."
};
dirs.contains(dir.deref())
};
match_funcs.push(Box::new(match_func));
patterns.extend(b"rootfilesin: ");
dirs_vec.sort();
patterns.extend(dirs_vec.escaped_bytes());
} else {
let (new_re, match_func) = build_regex_match(&ignore_patterns)?;
patterns = new_re;
match_funcs.push(match_func)
}
}
Ok(if match_funcs.len() == 1 {
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 (patterns, match_funcs.remove(0))
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 } else {
(
patterns,
Box::new(move |f: &HgPath| -> bool {
match_funcs.iter().any(|match_func| match_func(f))
}),
)
})
}
/// Parses all "ignore" files with their recursive includes and returns a
/// function that checks whether a given file (in the general sense) should be
/// ignored.
Arseniy Alekseyev
rhg: implement the debugignorerhg subcommand...
r49178 pub fn get_ignore_matcher<'a>(
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 mut all_pattern_files: Vec<PathBuf>,
Simon Sapin
rust: Make some file path parameters less generic...
r48169 root_dir: &Path,
Raphaël Gomès
dirstate-v2: hash the source of the ignore patterns as well...
r50453 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
Arseniy Alekseyev
rhg: implement the debugignorerhg subcommand...
r49178 ) -> PatternResult<(IncludeMatcher<'a>, Vec<PatternFileWarning>)> {
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 let mut all_patterns = vec![];
let mut all_warnings = vec![];
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 // Sort to make the ordering of calls to `inspect_pattern_bytes`
// deterministic even if the ordering of `all_pattern_files` is not (such
// as when a iteration order of a Python dict or Rust HashMap is involved).
// Sort by "string" representation instead of the default by component
// (with a Rust-specific definition of a component)
all_pattern_files
.sort_unstable_by(|a, b| a.as_os_str().cmp(b.as_os_str()));
Simon Sapin
rust: Make some file path parameters less generic...
r48169 for pattern_file in &all_pattern_files {
Simon Sapin
dirstate-v2: Store a hash of ignore patterns (.hgignore)...
r48202 let (patterns, warnings) = get_patterns_from_file(
pattern_file,
root_dir,
inspect_pattern_bytes,
)?;
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009
Raphaël Gomès
rust-status: only involve ignore mechanism when needed...
r45088 all_patterns.extend(patterns.to_owned());
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 all_warnings.extend(warnings);
}
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 let matcher = IncludeMatcher::new(all_patterns)?;
Arseniy Alekseyev
rhg: implement the debugignorerhg subcommand...
r49178 Ok((matcher, all_warnings))
}
/// Parses all "ignore" files with their recursive includes and returns a
/// function that checks whether a given file (in the general sense) should be
/// ignored.
pub fn get_ignore_function<'a>(
all_pattern_files: Vec<PathBuf>,
root_dir: &Path,
Raphaël Gomès
dirstate-v2: hash the source of the ignore patterns as well...
r50453 inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
Arseniy Alekseyev
rhg: implement the debugignorerhg subcommand...
r49178 ) -> PatternResult<(IgnoreFnType<'a>, Vec<PatternFileWarning>)> {
let res =
get_ignore_matcher(all_pattern_files, root_dir, inspect_pattern_bytes);
res.map(|(matcher, all_warnings)| {
let res: IgnoreFnType<'a> =
Box::new(move |path: &HgPath| matcher.matches(path));
(res, all_warnings)
})
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 }
impl<'a> IncludeMatcher<'a> {
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 let RootsDirsAndParents {
roots,
dirs,
parents,
} = roots_dirs_and_parents(&ignore_patterns)?;
Raphaël Gomès
rust-matchers: fix behavior of `IncludeMatcher` with multiple includes...
r50359 let prefix = ignore_patterns.iter().all(|k| match k.syntax {
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 PatternSyntax::Path | PatternSyntax::RelPath => true,
_ => false,
});
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 let (patterns, match_fn) = build_match(ignore_patterns)?;
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 Ok(Self {
patterns,
match_fn,
prefix,
roots,
dirs,
parents,
})
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 }
fn get_all_parents_children(&self) -> DirsChildrenMultiset {
// TODO cache
let thing = self
.dirs
.iter()
.chain(self.roots.iter())
.chain(self.parents.iter());
DirsChildrenMultiset::new(thing, Some(&self.parents))
}
Arseniy Alekseyev
rhg: implement the debugignorerhg subcommand...
r49178
pub fn debug_get_patterns(&self) -> &[u8] {
self.patterns.as_ref()
}
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 }
impl<'a> Display for IncludeMatcher<'a> {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
Raphaël Gomès
rust-matchers: add TODO about incomplete `Display` for `IncludeMatcher`...
r45312 // XXX What about exact matches?
// I'm not sure it's worth it to clone the HashSet and keep it
// around just in case someone wants to display the matcher, plus
// it's going to be unreadable after a few entries, but we need to
// inform in this display that exact matches are being used and are
// (on purpose) missing from the `includes`.
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 write!(
f,
"IncludeMatcher(includes='{}')",
String::from_utf8_lossy(&self.patterns.escaped_bytes())
)
}
}
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 #[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
Raphaël Gomès
rust-matchers: add functions to get roots, dirs and parents from patterns...
r45007 use std::path::Path;
#[test]
fn test_roots_and_dirs() {
let pats = vec![
IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
];
let (roots, dirs) = roots_and_dirs(&pats);
assert_eq!(
roots,
vec!(
HgPathBuf::from_bytes(b"g/h"),
HgPathBuf::from_bytes(b"g/h"),
HgPathBuf::new()
),
);
assert_eq!(dirs, vec!());
}
#[test]
fn test_roots_dirs_and_parents() {
let pats = vec![
IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")),
IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")),
IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
];
let mut roots = HashSet::new();
roots.insert(HgPathBuf::from_bytes(b"g/h"));
roots.insert(HgPathBuf::new());
let dirs = HashSet::new();
let mut parents = HashSet::new();
parents.insert(HgPathBuf::new());
parents.insert(HgPathBuf::from_bytes(b"g"));
assert_eq!(
roots_dirs_and_parents(&pats).unwrap(),
Raphaël Gomès
rust-status: refactor options into a `StatusOptions` struct...
r45011 RootsDirsAndParents {
roots,
dirs,
parents
}
Raphaël Gomès
rust-matchers: add functions to get roots, dirs and parents from patterns...
r45007 );
}
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828
#[test]
fn test_filematcher_visit_children_set() {
// Visitchildrenset
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 let files = vec![HgPathBuf::from_bytes(b"dir/subdir/foo.txt")];
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 let matcher = FileMatcher::new(files).unwrap();
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828
let mut set = HashSet::new();
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 set.insert(HgPathBuf::from_bytes(b"dir"));
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 assert_eq!(
matcher.visit_children_set(HgPath::new(b"")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 set.insert(HgPathBuf::from_bytes(b"subdir"));
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 set.insert(HgPathBuf::from_bytes(b"foo.txt"));
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir")),
VisitChildrenSet::Set(set)
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"folder")),
VisitChildrenSet::Empty
);
}
#[test]
fn test_filematcher_visit_children_set_files_and_dirs() {
let files = vec![
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 HgPathBuf::from_bytes(b"rootfile.txt"),
HgPathBuf::from_bytes(b"a/file1.txt"),
HgPathBuf::from_bytes(b"a/b/file2.txt"),
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 // No file in a/b/c
Raphaël Gomès
rust-matchers: make `Matcher` trait object-safe...
r46182 HgPathBuf::from_bytes(b"a/b/c/d/file4.txt"),
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 ];
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 let matcher = FileMatcher::new(files).unwrap();
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828
let mut set = HashSet::new();
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 set.insert(HgPathBuf::from_bytes(b"a"));
set.insert(HgPathBuf::from_bytes(b"rootfile.txt"));
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 assert_eq!(
matcher.visit_children_set(HgPath::new(b"")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 set.insert(HgPathBuf::from_bytes(b"b"));
set.insert(HgPathBuf::from_bytes(b"file1.txt"));
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 assert_eq!(
matcher.visit_children_set(HgPath::new(b"a")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 set.insert(HgPathBuf::from_bytes(b"c"));
set.insert(HgPathBuf::from_bytes(b"file2.txt"));
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 assert_eq!(
matcher.visit_children_set(HgPath::new(b"a/b")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 set.insert(HgPathBuf::from_bytes(b"d"));
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 assert_eq!(
matcher.visit_children_set(HgPath::new(b"a/b/c")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 set.insert(HgPathBuf::from_bytes(b"file4.txt"));
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 assert_eq!(
matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
VisitChildrenSet::Set(set)
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"folder")),
VisitChildrenSet::Empty
);
}
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009
#[test]
fn test_includematcher() {
// VisitchildrensetPrefix
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
PatternSyntax::RelPath,
b"dir/subdir",
Path::new(""),
)])
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 .unwrap();
let mut set = HashSet::new();
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 set.insert(HgPathBuf::from_bytes(b"dir"));
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 assert_eq!(
matcher.visit_children_set(HgPath::new(b"")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 set.insert(HgPathBuf::from_bytes(b"subdir"));
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir")),
VisitChildrenSet::Set(set)
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir")),
VisitChildrenSet::Recursive
);
// OPT: This should probably be 'all' if its parent is?
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
VisitChildrenSet::This
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"folder")),
VisitChildrenSet::Empty
);
// VisitchildrensetRootfilesin
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
PatternSyntax::RootFiles,
b"dir/subdir",
Path::new(""),
)])
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 .unwrap();
let mut set = HashSet::new();
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 set.insert(HgPathBuf::from_bytes(b"dir"));
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 assert_eq!(
matcher.visit_children_set(HgPath::new(b"")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 set.insert(HgPathBuf::from_bytes(b"subdir"));
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir")),
VisitChildrenSet::Set(set)
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir")),
VisitChildrenSet::This
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"folder")),
VisitChildrenSet::Empty
);
// VisitchildrensetGlob
Simon Sapin
rust: Parse "subinclude"d files along the way, not later...
r48170 let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
PatternSyntax::Glob,
b"dir/z*",
Path::new(""),
)])
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 .unwrap();
let mut set = HashSet::new();
Raphaël Gomès
rust: use owned types in `Matcher`...
r50241 set.insert(HgPathBuf::from_bytes(b"dir"));
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 assert_eq!(
matcher.visit_children_set(HgPath::new(b"")),
VisitChildrenSet::Set(set)
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"folder")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir")),
VisitChildrenSet::This
);
// OPT: these should probably be set().
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir")),
VisitChildrenSet::This
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
VisitChildrenSet::This
);
Raphaël Gomès
rust-matchers: fix behavior of `IncludeMatcher` with multiple includes...
r50359
// Test multiple patterns
let matcher = IncludeMatcher::new(vec![
IgnorePattern::new(PatternSyntax::RelPath, b"foo", Path::new("")),
IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")),
])
.unwrap();
assert_eq!(
matcher.visit_children_set(HgPath::new(b"")),
VisitChildrenSet::This
);
// Test multiple patterns
let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
PatternSyntax::Glob,
b"**/*.exe",
Path::new(""),
)])
.unwrap();
assert_eq!(
matcher.visit_children_set(HgPath::new(b"")),
VisitChildrenSet::This
);
Raphaël Gomès
rust-matchers: add `IgnoreMatcher`...
r45009 }
Raphaël Gomès
rust: add UnionMatcher...
r50243
#[test]
fn test_unionmatcher() {
// Path + Rootfiles
let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
PatternSyntax::RelPath,
b"dir/subdir",
Path::new(""),
)])
.unwrap();
let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
PatternSyntax::RootFiles,
b"dir",
Path::new(""),
)])
.unwrap();
let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
let mut set = HashSet::new();
set.insert(HgPathBuf::from_bytes(b"dir"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"")),
VisitChildrenSet::Set(set)
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir")),
VisitChildrenSet::This
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir")),
VisitChildrenSet::Recursive
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/foo")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"folder")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"folder")),
VisitChildrenSet::Empty
);
// OPT: These next two could be 'all' instead of 'this'.
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
VisitChildrenSet::This
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
VisitChildrenSet::This
);
// Path + unrelated Path
let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
PatternSyntax::RelPath,
b"dir/subdir",
Path::new(""),
)])
.unwrap();
let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
PatternSyntax::RelPath,
b"folder",
Path::new(""),
)])
.unwrap();
let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
let mut set = HashSet::new();
set.insert(HgPathBuf::from_bytes(b"folder"));
set.insert(HgPathBuf::from_bytes(b"dir"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
set.insert(HgPathBuf::from_bytes(b"subdir"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir")),
VisitChildrenSet::Set(set)
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir")),
VisitChildrenSet::Recursive
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/foo")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"folder")),
VisitChildrenSet::Recursive
);
// OPT: These next two could be 'all' instead of 'this'.
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
VisitChildrenSet::This
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
VisitChildrenSet::This
);
// Path + subpath
let m1 = IncludeMatcher::new(vec![IgnorePattern::new(
PatternSyntax::RelPath,
b"dir/subdir/x",
Path::new(""),
)])
.unwrap();
let m2 = IncludeMatcher::new(vec![IgnorePattern::new(
PatternSyntax::RelPath,
b"dir/subdir",
Path::new(""),
)])
.unwrap();
let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]);
let mut set = HashSet::new();
set.insert(HgPathBuf::from_bytes(b"dir"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
set.insert(HgPathBuf::from_bytes(b"subdir"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir")),
VisitChildrenSet::Set(set)
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir")),
VisitChildrenSet::Recursive
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/foo")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"folder")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
VisitChildrenSet::Recursive
);
// OPT: this should probably be 'all' not 'this'.
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
VisitChildrenSet::This
);
}
Raphaël Gomès
rust: add IntersectionMatcher...
r50245
#[test]
fn test_intersectionmatcher() {
// Include path + Include rootfiles
let m1 = Box::new(
IncludeMatcher::new(vec![IgnorePattern::new(
PatternSyntax::RelPath,
b"dir/subdir",
Path::new(""),
)])
.unwrap(),
);
let m2 = Box::new(
IncludeMatcher::new(vec![IgnorePattern::new(
PatternSyntax::RootFiles,
b"dir",
Path::new(""),
)])
.unwrap(),
);
let matcher = IntersectionMatcher::new(m1, m2);
let mut set = HashSet::new();
set.insert(HgPathBuf::from_bytes(b"dir"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"")),
VisitChildrenSet::Set(set)
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir")),
VisitChildrenSet::This
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/foo")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"folder")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
VisitChildrenSet::Empty
);
// Non intersecting paths
let m1 = Box::new(
IncludeMatcher::new(vec![IgnorePattern::new(
PatternSyntax::RelPath,
b"dir/subdir",
Path::new(""),
)])
.unwrap(),
);
let m2 = Box::new(
IncludeMatcher::new(vec![IgnorePattern::new(
PatternSyntax::RelPath,
b"folder",
Path::new(""),
)])
.unwrap(),
);
let matcher = IntersectionMatcher::new(m1, m2);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/foo")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"folder")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
VisitChildrenSet::Empty
);
// Nested paths
let m1 = Box::new(
IncludeMatcher::new(vec![IgnorePattern::new(
PatternSyntax::RelPath,
b"dir/subdir/x",
Path::new(""),
)])
.unwrap(),
);
let m2 = Box::new(
IncludeMatcher::new(vec![IgnorePattern::new(
PatternSyntax::RelPath,
b"dir/subdir",
Path::new(""),
)])
.unwrap(),
);
let matcher = IntersectionMatcher::new(m1, m2);
let mut set = HashSet::new();
set.insert(HgPathBuf::from_bytes(b"dir"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
set.insert(HgPathBuf::from_bytes(b"subdir"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
set.insert(HgPathBuf::from_bytes(b"x"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir")),
VisitChildrenSet::Set(set)
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/foo")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"folder")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
VisitChildrenSet::Empty
);
// OPT: this should probably be 'all' not 'this'.
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
VisitChildrenSet::This
);
// Diverging paths
let m1 = Box::new(
IncludeMatcher::new(vec![IgnorePattern::new(
PatternSyntax::RelPath,
b"dir/subdir/x",
Path::new(""),
)])
.unwrap(),
);
let m2 = Box::new(
IncludeMatcher::new(vec![IgnorePattern::new(
PatternSyntax::RelPath,
b"dir/subdir/z",
Path::new(""),
)])
.unwrap(),
);
let matcher = IntersectionMatcher::new(m1, m2);
// OPT: these next two could probably be Empty as well.
let mut set = HashSet::new();
set.insert(HgPathBuf::from_bytes(b"dir"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"")),
VisitChildrenSet::Set(set)
);
// OPT: these next two could probably be Empty as well.
let mut set = HashSet::new();
set.insert(HgPathBuf::from_bytes(b"subdir"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir")),
VisitChildrenSet::Set(set)
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/foo")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"folder")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
VisitChildrenSet::Empty
);
}
Raphaël Gomès
rust-matchers: implement DifferenceMatcher...
r50373
#[test]
fn test_differencematcher() {
// Two alwaysmatchers should function like a nevermatcher
let m1 = AlwaysMatcher;
let m2 = AlwaysMatcher;
let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2));
for case in &[
&b""[..],
b"dir",
b"dir/subdir",
b"dir/subdir/z",
b"dir/foo",
b"dir/subdir/x",
b"folder",
] {
assert_eq!(
matcher.visit_children_set(HgPath::new(case)),
VisitChildrenSet::Empty
);
}
// One always and one never should behave the same as an always
let m1 = AlwaysMatcher;
let m2 = NeverMatcher;
let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2));
for case in &[
&b""[..],
b"dir",
b"dir/subdir",
b"dir/subdir/z",
b"dir/foo",
b"dir/subdir/x",
b"folder",
] {
assert_eq!(
matcher.visit_children_set(HgPath::new(case)),
VisitChildrenSet::Recursive
);
}
// Two include matchers
let m1 = Box::new(
IncludeMatcher::new(vec![IgnorePattern::new(
PatternSyntax::RelPath,
b"dir/subdir",
Path::new("/repo"),
)])
.unwrap(),
);
let m2 = Box::new(
IncludeMatcher::new(vec![IgnorePattern::new(
PatternSyntax::RootFiles,
b"dir",
Path::new("/repo"),
)])
.unwrap(),
);
let matcher = DifferenceMatcher::new(m1, m2);
let mut set = HashSet::new();
set.insert(HgPathBuf::from_bytes(b"dir"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
set.insert(HgPathBuf::from_bytes(b"subdir"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir")),
VisitChildrenSet::Set(set)
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir")),
VisitChildrenSet::Recursive
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/foo")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"folder")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/z")),
VisitChildrenSet::This
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
VisitChildrenSet::This
);
}
Raphaël Gomès
rust-matchers: implement `visit_children_set` for `FileMatcher`...
r44828 }