matchers.rs
2109 lines
| 66.8 KiB
| application/rls-services+xml
|
RustLexer
Raphaël Gomès
|
r43742 | // matchers.rs | ||
// | ||||
// Copyright 2019 Raphaël Gomès <rgomes@octobus.net> | ||||
// | ||||
// This software may be used and distributed according to the terms of the | ||||
// GNU General Public License version 2 or any later version. | ||||
//! Structs and types for matching files and directories. | ||||
Raphaël Gomès
|
r52002 | use format_bytes::format_bytes; | ||
use once_cell::sync::OnceCell; | ||||
Raphaël Gomès
|
r45006 | use crate::{ | ||
Raphaël Gomès
|
r45009 | dirstate::dirs_multiset::DirsChildrenMultiset, | ||
filepatterns::{ | ||||
build_single_regex, filter_subincludes, get_patterns_from_file, | ||||
Simon Sapin
|
r48170 | PatternFileWarning, PatternResult, | ||
Raphaël Gomès
|
r45009 | }, | ||
utils::{ | ||||
files::find_dirs, | ||||
Spencer Baugh
|
r51753 | hg_path::{HgPath, HgPathBuf, HgPathError}, | ||
Raphaël Gomès
|
r45009 | Escaped, | ||
}, | ||||
Spencer Baugh
|
r51753 | DirsMultiset, FastHashMap, IgnorePattern, PatternError, PatternSyntax, | ||
Raphaël Gomès
|
r45006 | }; | ||
Raphaël Gomès
|
r45009 | |||
Arseniy Alekseyev
|
r49177 | use crate::dirstate::status::IgnoreFnType; | ||
Raphaël Gomès
|
r45311 | use crate::filepatterns::normalize_path_bytes; | ||
Raphaël Gomès
|
r43742 | use std::collections::HashSet; | ||
Raphaël Gomès
|
r45009 | use std::fmt::{Display, Error, Formatter}; | ||
Raphaël Gomès
|
r45088 | use std::path::{Path, PathBuf}; | ||
Raphaël Gomès
|
r52002 | use std::{borrow::ToOwned, collections::BTreeSet}; | ||
Raphaël Gomès
|
r43742 | |||
Raphaël Gomès
|
r44828 | #[derive(Debug, PartialEq)] | ||
Raphaël Gomès
|
r50241 | pub enum VisitChildrenSet { | ||
Raphaël Gomès
|
r43742 | /// Don't visit anything | ||
Empty, | ||||
/// Only visit this directory | ||||
This, | ||||
/// Visit this directory and these subdirectories | ||||
/// TODO Should we implement a `NonEmptyHashSet`? | ||||
Raphaël Gomès
|
r50241 | Set(HashSet<HgPathBuf>), | ||
Raphaël Gomès
|
r43742 | /// Visit this directory and all subdirectories | ||
Recursive, | ||||
} | ||||
Raphaël Gomès
|
r50381 | pub trait Matcher: core::fmt::Debug { | ||
Raphaël Gomès
|
r43742 | /// Explicitly listed files | ||
Raphaël Gomès
|
r50241 | fn file_set(&self) -> Option<&HashSet<HgPathBuf>>; | ||
Raphaël Gomès
|
r43742 | /// Returns whether `filename` is in `file_set` | ||
Raphaël Gomès
|
r46182 | fn exact_match(&self, filename: &HgPath) -> bool; | ||
Raphaël Gomès
|
r43742 | /// Returns whether `filename` is matched by this matcher | ||
Raphaël Gomès
|
r46182 | fn matches(&self, filename: &HgPath) -> bool; | ||
Raphaël Gomès
|
r43742 | /// Decides whether a directory should be visited based on whether it | ||
/// has potential matches in it or one of its subdirectories, and | ||||
/// potentially lists which subdirectories of that directory should be | ||||
/// visited. This is based on the match's primary, included, and excluded | ||||
/// patterns. | ||||
/// | ||||
/// # Example | ||||
/// | ||||
/// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would | ||||
/// return the following values (assuming the implementation of | ||||
/// visit_children_set is capable of recognizing this; some implementations | ||||
/// are not). | ||||
/// | ||||
Georges Racinet
|
r44458 | /// ```text | ||
Raphaël Gomès
|
r43742 | /// ```ignore | ||
/// '' -> {'foo', 'qux'} | ||||
/// 'baz' -> set() | ||||
/// 'foo' -> {'bar'} | ||||
/// // Ideally this would be `Recursive`, but since the prefix nature of | ||||
/// // matchers is applied to the entire matcher, we have to downgrade this | ||||
/// // to `This` due to the (yet to be implemented in Rust) non-prefix | ||||
/// // `RootFilesIn'-kind matcher being mixed in. | ||||
/// 'foo/bar' -> 'this' | ||||
/// 'qux' -> 'this' | ||||
/// ``` | ||||
/// # Important | ||||
/// | ||||
/// Most matchers do not know if they're representing files or | ||||
/// directories. They see `['path:dir/f']` and don't know whether `f` is a | ||||
/// file or a directory, so `visit_children_set('dir')` for most matchers | ||||
/// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's | ||||
/// a file (like the yet to be implemented in Rust `ExactMatcher` does), | ||||
/// it may return `VisitChildrenSet::This`. | ||||
/// Do not rely on the return being a `HashSet` indicating that there are | ||||
/// no files in this dir to investigate (or equivalently that if there are | ||||
/// files to investigate in 'dir' that it will always return | ||||
/// `VisitChildrenSet::This`). | ||||
Raphaël Gomès
|
r46182 | fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet; | ||
Raphaël Gomès
|
r43742 | /// Matcher will match everything and `files_set()` will be empty: | ||
/// optimization might be possible. | ||||
Raphaël Gomès
|
r44009 | fn matches_everything(&self) -> bool; | ||
Raphaël Gomès
|
r43742 | /// Matcher will match exactly the files in `files_set()`: optimization | ||
/// might be possible. | ||||
Raphaël Gomès
|
r44009 | fn is_exact(&self) -> bool; | ||
Raphaël Gomès
|
r43742 | } | ||
/// Matches everything. | ||||
Raphaël Gomès
|
r44286 | ///``` | ||
/// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath }; | ||||
/// | ||||
/// let matcher = AlwaysMatcher; | ||||
/// | ||||
Raphaël Gomès
|
r44366 | /// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true); | ||
/// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true); | ||||
/// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true); | ||||
/// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true); | ||||
Raphaël Gomès
|
r44286 | /// ``` | ||
Raphaël Gomès
|
r43742 | #[derive(Debug)] | ||
pub struct AlwaysMatcher; | ||||
impl Matcher for AlwaysMatcher { | ||||
Raphaël Gomès
|
r50241 | fn file_set(&self) -> Option<&HashSet<HgPathBuf>> { | ||
Raphaël Gomès
|
r44284 | None | ||
Raphaël Gomès
|
r43742 | } | ||
Raphaël Gomès
|
r46182 | fn exact_match(&self, _filename: &HgPath) -> bool { | ||
Raphaël Gomès
|
r44009 | false | ||
} | ||||
Raphaël Gomès
|
r46182 | fn matches(&self, _filename: &HgPath) -> bool { | ||
Raphaël Gomès
|
r44009 | true | ||
} | ||||
Raphaël Gomès
|
r46182 | fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet { | ||
Raphaël Gomès
|
r43742 | VisitChildrenSet::Recursive | ||
} | ||||
Raphaël Gomès
|
r44009 | fn matches_everything(&self) -> bool { | ||
true | ||||
} | ||||
fn is_exact(&self) -> bool { | ||||
false | ||||
} | ||||
Raphaël Gomès
|
r43742 | } | ||
Raphaël Gomès
|
r44366 | |||
Raphaël Gomès
|
r50247 | /// Matches nothing. | ||
#[derive(Debug)] | ||||
pub struct NeverMatcher; | ||||
impl Matcher for NeverMatcher { | ||||
fn file_set(&self) -> Option<&HashSet<HgPathBuf>> { | ||||
None | ||||
} | ||||
fn exact_match(&self, _filename: &HgPath) -> bool { | ||||
false | ||||
} | ||||
fn matches(&self, _filename: &HgPath) -> bool { | ||||
false | ||||
} | ||||
fn visit_children_set(&self, _directory: &HgPath) -> VisitChildrenSet { | ||||
VisitChildrenSet::Empty | ||||
} | ||||
fn matches_everything(&self) -> bool { | ||||
false | ||||
} | ||||
fn is_exact(&self) -> bool { | ||||
true | ||||
} | ||||
} | ||||
Raphaël Gomès
|
r44366 | /// Matches the input files exactly. They are interpreted as paths, not | ||
/// patterns. | ||||
/// | ||||
///``` | ||||
Raphaël Gomès
|
r46182 | /// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::{HgPath, HgPathBuf} }; | ||
Raphaël Gomès
|
r44366 | /// | ||
Raphaël Gomès
|
r50241 | /// let files = vec![HgPathBuf::from_bytes(b"a.txt"), HgPathBuf::from_bytes(br"re:.*\.c$")]; | ||
/// let matcher = FileMatcher::new(files).unwrap(); | ||||
Raphaël Gomès
|
r44366 | /// | ||
/// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true); | ||||
/// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false); | ||||
/// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false); | ||||
/// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true); | ||||
/// ``` | ||||
#[derive(Debug)] | ||||
Raphaël Gomès
|
r50241 | pub struct FileMatcher { | ||
files: HashSet<HgPathBuf>, | ||||
Raphaël Gomès
|
r44366 | dirs: DirsMultiset, | ||
Raphaël Gomès
|
r52002 | sorted_visitchildrenset_candidates: OnceCell<BTreeSet<HgPathBuf>>, | ||
Raphaël Gomès
|
r44366 | } | ||
Raphaël Gomès
|
r50241 | impl FileMatcher { | ||
Spencer Baugh
|
r51753 | pub fn new(files: Vec<HgPathBuf>) -> Result<Self, HgPathError> { | ||
Raphaël Gomès
|
r50241 | let dirs = DirsMultiset::from_manifest(&files)?; | ||
Raphaël Gomès
|
r44366 | Ok(Self { | ||
Raphaël Gomès
|
r52013 | files: HashSet::from_iter(files), | ||
Raphaël Gomès
|
r50241 | dirs, | ||
Raphaël Gomès
|
r52002 | sorted_visitchildrenset_candidates: OnceCell::new(), | ||
Raphaël Gomès
|
r44366 | }) | ||
} | ||||
Raphaël Gomès
|
r46182 | fn inner_matches(&self, filename: &HgPath) -> bool { | ||
Raphaël Gomès
|
r44366 | self.files.contains(filename.as_ref()) | ||
} | ||||
} | ||||
Raphaël Gomès
|
r50241 | impl Matcher for FileMatcher { | ||
fn file_set(&self) -> Option<&HashSet<HgPathBuf>> { | ||||
Raphaël Gomès
|
r44366 | Some(&self.files) | ||
} | ||||
Raphaël Gomès
|
r46182 | fn exact_match(&self, filename: &HgPath) -> bool { | ||
Raphaël Gomès
|
r44366 | self.inner_matches(filename) | ||
} | ||||
Raphaël Gomès
|
r46182 | fn matches(&self, filename: &HgPath) -> bool { | ||
Raphaël Gomès
|
r44366 | self.inner_matches(filename) | ||
} | ||||
Raphaël Gomès
|
r46182 | fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet { | ||
Raphaël Gomès
|
r52002 | if self.files.is_empty() || !self.dirs.contains(directory) { | ||
Raphaël Gomès
|
r44828 | return VisitChildrenSet::Empty; | ||
} | ||||
Raphaël Gomès
|
r52002 | let compute_candidates = || -> BTreeSet<HgPathBuf> { | ||
let mut candidates: BTreeSet<HgPathBuf> = | ||||
self.dirs.iter().cloned().collect(); | ||||
candidates.extend(self.files.iter().cloned()); | ||||
candidates.remove(HgPath::new(b"")); | ||||
candidates | ||||
}; | ||||
let candidates = | ||||
if directory.as_ref().is_empty() { | ||||
compute_candidates() | ||||
} else { | ||||
let sorted_candidates = self | ||||
.sorted_visitchildrenset_candidates | ||||
.get_or_init(compute_candidates); | ||||
let directory_bytes = directory.as_ref().as_bytes(); | ||||
let start: HgPathBuf = | ||||
format_bytes!(b"{}/", directory_bytes).into(); | ||||
let start_len = start.len(); | ||||
// `0` sorts after `/` | ||||
let end = format_bytes!(b"{}0", directory_bytes).into(); | ||||
BTreeSet::from_iter(sorted_candidates.range(start..end).map( | ||||
|c| HgPathBuf::from_bytes(&c.as_bytes()[start_len..]), | ||||
)) | ||||
}; | ||||
Raphaël Gomès
|
r44828 | |||
// `self.dirs` includes all of the directories, recursively, so if | ||||
// we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo', | ||||
// 'foo/bar' in it. Thus we can safely ignore a candidate that has a | ||||
// '/' in it, indicating it's for a subdir-of-a-subdir; the immediate | ||||
// subdir will be in there without a slash. | ||||
VisitChildrenSet::Set( | ||||
candidates | ||||
Raphaël Gomès
|
r50241 | .into_iter() | ||
Raphaël Gomès
|
r44828 | .filter_map(|c| { | ||
if c.bytes().all(|b| *b != b'/') { | ||||
Raphaël Gomès
|
r50241 | Some(c) | ||
Raphaël Gomès
|
r44828 | } else { | ||
None | ||||
} | ||||
}) | ||||
.collect(), | ||||
) | ||||
Raphaël Gomès
|
r44366 | } | ||
fn matches_everything(&self) -> bool { | ||||
false | ||||
} | ||||
fn is_exact(&self) -> bool { | ||||
true | ||||
} | ||||
} | ||||
Raphaël Gomès
|
r45006 | |||
Spencer Baugh
|
r51758 | /// Matches a set of (kind, pat, source) against a 'root' directory. | ||
/// (Currently the 'root' directory is effectively always empty) | ||||
/// ``` | ||||
/// use hg::{ | ||||
/// matchers::{PatternMatcher, Matcher}, | ||||
/// IgnorePattern, | ||||
/// PatternSyntax, | ||||
/// utils::hg_path::{HgPath, HgPathBuf} | ||||
/// }; | ||||
/// use std::collections::HashSet; | ||||
/// use std::path::Path; | ||||
/// /// | ||||
/// let ignore_patterns : Vec<IgnorePattern> = | ||||
/// vec![IgnorePattern::new(PatternSyntax::Regexp, br".*\.c$", Path::new("")), | ||||
/// IgnorePattern::new(PatternSyntax::Path, b"foo/a", Path::new("")), | ||||
/// IgnorePattern::new(PatternSyntax::RelPath, b"b", Path::new("")), | ||||
/// IgnorePattern::new(PatternSyntax::Glob, b"*.h", Path::new("")), | ||||
/// ]; | ||||
/// let matcher = PatternMatcher::new(ignore_patterns).unwrap(); | ||||
/// /// | ||||
/// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true); // matches re:.*\.c$ | ||||
/// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false); | ||||
/// assert_eq!(matcher.matches(HgPath::new(b"foo/a")), true); // matches path:foo/a | ||||
/// assert_eq!(matcher.matches(HgPath::new(b"a")), false); // does not match path:b, since 'root' is 'foo' | ||||
/// assert_eq!(matcher.matches(HgPath::new(b"b")), true); // matches relpath:b, since 'root' is 'foo' | ||||
/// assert_eq!(matcher.matches(HgPath::new(b"lib.h")), true); // matches glob:*.h | ||||
/// assert_eq!(matcher.file_set().unwrap(), | ||||
/// &HashSet::from([HgPathBuf::from_bytes(b""), HgPathBuf::from_bytes(b"foo/a"), | ||||
/// HgPathBuf::from_bytes(b""), HgPathBuf::from_bytes(b"b")])); | ||||
/// assert_eq!(matcher.exact_match(HgPath::new(b"foo/a")), true); | ||||
/// assert_eq!(matcher.exact_match(HgPath::new(b"b")), true); | ||||
/// assert_eq!(matcher.exact_match(HgPath::new(b"lib.h")), false); // exact matches are for (rel)path kinds | ||||
/// ``` | ||||
pub struct PatternMatcher<'a> { | ||||
patterns: Vec<u8>, | ||||
match_fn: IgnoreFnType<'a>, | ||||
/// Whether all the patterns match a prefix (i.e. recursively) | ||||
prefix: bool, | ||||
files: HashSet<HgPathBuf>, | ||||
dirs: DirsMultiset, | ||||
} | ||||
impl core::fmt::Debug for PatternMatcher<'_> { | ||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { | ||||
f.debug_struct("PatternMatcher") | ||||
.field("patterns", &String::from_utf8_lossy(&self.patterns)) | ||||
.field("prefix", &self.prefix) | ||||
.field("files", &self.files) | ||||
.field("dirs", &self.dirs) | ||||
.finish() | ||||
} | ||||
} | ||||
impl<'a> PatternMatcher<'a> { | ||||
pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> { | ||||
let (files, _) = roots_and_dirs(&ignore_patterns); | ||||
let dirs = DirsMultiset::from_manifest(&files)?; | ||||
Raphaël Gomès
|
r52013 | let files: HashSet<HgPathBuf> = HashSet::from_iter(files); | ||
Spencer Baugh
|
r51758 | |||
let prefix = ignore_patterns.iter().all(|k| { | ||||
matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath) | ||||
}); | ||||
let (patterns, match_fn) = build_match(ignore_patterns, b"$")?; | ||||
Ok(Self { | ||||
patterns, | ||||
match_fn, | ||||
prefix, | ||||
files, | ||||
dirs, | ||||
}) | ||||
} | ||||
} | ||||
impl<'a> Matcher for PatternMatcher<'a> { | ||||
fn file_set(&self) -> Option<&HashSet<HgPathBuf>> { | ||||
Some(&self.files) | ||||
} | ||||
fn exact_match(&self, filename: &HgPath) -> bool { | ||||
self.files.contains(filename) | ||||
} | ||||
fn matches(&self, filename: &HgPath) -> bool { | ||||
if self.files.contains(filename) { | ||||
return true; | ||||
} | ||||
(self.match_fn)(filename) | ||||
} | ||||
fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet { | ||||
if self.prefix && self.files.contains(directory) { | ||||
return VisitChildrenSet::Recursive; | ||||
} | ||||
let path_or_parents_in_set = find_dirs(directory) | ||||
.any(|parent_dir| self.files.contains(parent_dir)); | ||||
if self.dirs.contains(directory) || path_or_parents_in_set { | ||||
VisitChildrenSet::This | ||||
} else { | ||||
VisitChildrenSet::Empty | ||||
} | ||||
} | ||||
fn matches_everything(&self) -> bool { | ||||
false | ||||
} | ||||
fn is_exact(&self) -> bool { | ||||
false | ||||
} | ||||
} | ||||
Raphaël Gomès
|
r45009 | /// Matches files that are included in the ignore rules. | ||
Raphaël Gomès
|
r45406 | /// ``` | ||
/// use hg::{ | ||||
/// matchers::{IncludeMatcher, Matcher}, | ||||
/// IgnorePattern, | ||||
/// PatternSyntax, | ||||
/// utils::hg_path::HgPath | ||||
/// }; | ||||
/// use std::path::Path; | ||||
/// /// | ||||
/// let ignore_patterns = | ||||
/// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))]; | ||||
Simon Sapin
|
r48170 | /// let matcher = IncludeMatcher::new(ignore_patterns).unwrap(); | ||
Raphaël Gomès
|
r45406 | /// /// | ||
/// assert_eq!(matcher.matches(HgPath::new(b"testing")), false); | ||||
/// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true); | ||||
/// assert_eq!(matcher.matches(HgPath::new(b"this also")), true); | ||||
/// assert_eq!(matcher.matches(HgPath::new(b"but not this")), false); | ||||
/// ``` | ||||
Raphaël Gomès
|
r45009 | pub struct IncludeMatcher<'a> { | ||
patterns: Vec<u8>, | ||||
Arseniy Alekseyev
|
r49177 | match_fn: IgnoreFnType<'a>, | ||
Raphaël Gomès
|
r45009 | /// Whether all the patterns match a prefix (i.e. recursively) | ||
prefix: bool, | ||||
roots: HashSet<HgPathBuf>, | ||||
dirs: HashSet<HgPathBuf>, | ||||
parents: HashSet<HgPathBuf>, | ||||
} | ||||
Raphaël Gomès
|
r50381 | impl core::fmt::Debug for IncludeMatcher<'_> { | ||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { | ||||
f.debug_struct("IncludeMatcher") | ||||
.field("patterns", &String::from_utf8_lossy(&self.patterns)) | ||||
.field("prefix", &self.prefix) | ||||
.field("roots", &self.roots) | ||||
.field("dirs", &self.dirs) | ||||
.field("parents", &self.parents) | ||||
.finish() | ||||
} | ||||
} | ||||
Raphaël Gomès
|
r45009 | impl<'a> Matcher for IncludeMatcher<'a> { | ||
Raphaël Gomès
|
r50241 | fn file_set(&self) -> Option<&HashSet<HgPathBuf>> { | ||
Raphaël Gomès
|
r45009 | None | ||
} | ||||
Raphaël Gomès
|
r46182 | fn exact_match(&self, _filename: &HgPath) -> bool { | ||
Raphaël Gomès
|
r45009 | false | ||
} | ||||
Raphaël Gomès
|
r46182 | fn matches(&self, filename: &HgPath) -> bool { | ||
Raphaël Gomès
|
r50825 | (self.match_fn)(filename) | ||
Raphaël Gomès
|
r45009 | } | ||
Raphaël Gomès
|
r46182 | fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet { | ||
Raphaël Gomès
|
r50825 | let dir = directory; | ||
Raphaël Gomès
|
r45009 | if self.prefix && self.roots.contains(dir) { | ||
return VisitChildrenSet::Recursive; | ||||
} | ||||
if self.roots.contains(HgPath::new(b"")) | ||||
|| self.roots.contains(dir) | ||||
|| self.dirs.contains(dir) | ||||
|| find_dirs(dir).any(|parent_dir| self.roots.contains(parent_dir)) | ||||
{ | ||||
return VisitChildrenSet::This; | ||||
} | ||||
Raphaël Gomès
|
r50825 | if self.parents.contains(dir.as_ref()) { | ||
Raphaël Gomès
|
r45009 | let multiset = self.get_all_parents_children(); | ||
if let Some(children) = multiset.get(dir) { | ||||
Raphaël Gomès
|
r50241 | return VisitChildrenSet::Set( | ||
Raphaël Gomès
|
r50825 | children.iter().map(HgPathBuf::from).collect(), | ||
Raphaël Gomès
|
r50241 | ); | ||
Raphaël Gomès
|
r45009 | } | ||
} | ||||
VisitChildrenSet::Empty | ||||
} | ||||
fn matches_everything(&self) -> bool { | ||||
false | ||||
} | ||||
fn is_exact(&self) -> bool { | ||||
false | ||||
} | ||||
} | ||||
Raphaël Gomès
|
r50243 | /// The union of multiple matchers. Will match if any of the matchers match. | ||
Raphaël Gomès
|
r50381 | #[derive(Debug)] | ||
Raphaël Gomès
|
r50243 | pub struct UnionMatcher { | ||
matchers: Vec<Box<dyn Matcher + Sync>>, | ||||
} | ||||
impl Matcher for UnionMatcher { | ||||
fn file_set(&self) -> Option<&HashSet<HgPathBuf>> { | ||||
None | ||||
} | ||||
fn exact_match(&self, _filename: &HgPath) -> bool { | ||||
false | ||||
} | ||||
fn matches(&self, filename: &HgPath) -> bool { | ||||
self.matchers.iter().any(|m| m.matches(filename)) | ||||
} | ||||
fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet { | ||||
let mut result = HashSet::new(); | ||||
let mut this = false; | ||||
for matcher in self.matchers.iter() { | ||||
let visit = matcher.visit_children_set(directory); | ||||
match visit { | ||||
VisitChildrenSet::Empty => continue, | ||||
VisitChildrenSet::This => { | ||||
this = true; | ||||
// Don't break, we might have an 'all' in here. | ||||
continue; | ||||
} | ||||
VisitChildrenSet::Set(set) => { | ||||
result.extend(set); | ||||
} | ||||
VisitChildrenSet::Recursive => { | ||||
return visit; | ||||
} | ||||
} | ||||
} | ||||
if this { | ||||
return VisitChildrenSet::This; | ||||
} | ||||
if result.is_empty() { | ||||
VisitChildrenSet::Empty | ||||
} else { | ||||
VisitChildrenSet::Set(result) | ||||
} | ||||
} | ||||
fn matches_everything(&self) -> bool { | ||||
// TODO Maybe if all are AlwaysMatcher? | ||||
false | ||||
} | ||||
fn is_exact(&self) -> bool { | ||||
false | ||||
} | ||||
} | ||||
impl UnionMatcher { | ||||
pub fn new(matchers: Vec<Box<dyn Matcher + Sync>>) -> Self { | ||||
Self { matchers } | ||||
} | ||||
} | ||||
Raphaël Gomès
|
r50381 | #[derive(Debug)] | ||
Raphaël Gomès
|
r50245 | pub struct IntersectionMatcher { | ||
m1: Box<dyn Matcher + Sync>, | ||||
m2: Box<dyn Matcher + Sync>, | ||||
files: Option<HashSet<HgPathBuf>>, | ||||
} | ||||
impl Matcher for IntersectionMatcher { | ||||
fn file_set(&self) -> Option<&HashSet<HgPathBuf>> { | ||||
self.files.as_ref() | ||||
} | ||||
fn exact_match(&self, filename: &HgPath) -> bool { | ||||
self.files.as_ref().map_or(false, |f| f.contains(filename)) | ||||
} | ||||
fn matches(&self, filename: &HgPath) -> bool { | ||||
self.m1.matches(filename) && self.m2.matches(filename) | ||||
} | ||||
fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet { | ||||
let m1_set = self.m1.visit_children_set(directory); | ||||
if m1_set == VisitChildrenSet::Empty { | ||||
return VisitChildrenSet::Empty; | ||||
} | ||||
let m2_set = self.m2.visit_children_set(directory); | ||||
if m2_set == VisitChildrenSet::Empty { | ||||
return VisitChildrenSet::Empty; | ||||
} | ||||
if m1_set == VisitChildrenSet::Recursive { | ||||
return m2_set; | ||||
} else if m2_set == VisitChildrenSet::Recursive { | ||||
return m1_set; | ||||
} | ||||
match (&m1_set, &m2_set) { | ||||
(VisitChildrenSet::Recursive, _) => m2_set, | ||||
(_, VisitChildrenSet::Recursive) => m1_set, | ||||
(VisitChildrenSet::This, _) | (_, VisitChildrenSet::This) => { | ||||
VisitChildrenSet::This | ||||
} | ||||
(VisitChildrenSet::Set(m1), VisitChildrenSet::Set(m2)) => { | ||||
Raphaël Gomès
|
r50825 | let set: HashSet<_> = m1.intersection(m2).cloned().collect(); | ||
Raphaël Gomès
|
r50245 | if set.is_empty() { | ||
VisitChildrenSet::Empty | ||||
} else { | ||||
VisitChildrenSet::Set(set) | ||||
} | ||||
} | ||||
_ => unreachable!(), | ||||
} | ||||
} | ||||
fn matches_everything(&self) -> bool { | ||||
self.m1.matches_everything() && self.m2.matches_everything() | ||||
} | ||||
fn is_exact(&self) -> bool { | ||||
self.m1.is_exact() || self.m2.is_exact() | ||||
} | ||||
} | ||||
impl IntersectionMatcher { | ||||
pub fn new( | ||||
mut m1: Box<dyn Matcher + Sync>, | ||||
mut m2: Box<dyn Matcher + Sync>, | ||||
) -> Self { | ||||
let files = if m1.is_exact() || m2.is_exact() { | ||||
if !m1.is_exact() { | ||||
std::mem::swap(&mut m1, &mut m2); | ||||
} | ||||
m1.file_set().map(|m1_files| { | ||||
m1_files.iter().cloned().filter(|f| m2.matches(f)).collect() | ||||
}) | ||||
} else { | ||||
Spencer Baugh
|
r51749 | // without exact input file sets, we can't do an exact | ||
// intersection, so we must over-approximate by | ||||
// unioning instead | ||||
m1.file_set().map(|m1_files| match m2.file_set() { | ||||
Some(m2_files) => m1_files.union(m2_files).cloned().collect(), | ||||
None => m1_files.iter().cloned().collect(), | ||||
}) | ||||
Raphaël Gomès
|
r50245 | }; | ||
Self { m1, m2, files } | ||||
} | ||||
} | ||||
Raphaël Gomès
|
r50381 | #[derive(Debug)] | ||
Raphaël Gomès
|
r50373 | pub struct DifferenceMatcher { | ||
base: Box<dyn Matcher + Sync>, | ||||
excluded: Box<dyn Matcher + Sync>, | ||||
files: Option<HashSet<HgPathBuf>>, | ||||
} | ||||
impl Matcher for DifferenceMatcher { | ||||
fn file_set(&self) -> Option<&HashSet<HgPathBuf>> { | ||||
self.files.as_ref() | ||||
} | ||||
fn exact_match(&self, filename: &HgPath) -> bool { | ||||
self.files.as_ref().map_or(false, |f| f.contains(filename)) | ||||
} | ||||
fn matches(&self, filename: &HgPath) -> bool { | ||||
self.base.matches(filename) && !self.excluded.matches(filename) | ||||
} | ||||
fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet { | ||||
let excluded_set = self.excluded.visit_children_set(directory); | ||||
if excluded_set == VisitChildrenSet::Recursive { | ||||
return VisitChildrenSet::Empty; | ||||
} | ||||
let base_set = self.base.visit_children_set(directory); | ||||
// Possible values for base: 'recursive', 'this', set(...), set() | ||||
// Possible values for excluded: 'this', set(...), set() | ||||
// If excluded has nothing under here that we care about, return base, | ||||
// even if it's 'recursive'. | ||||
if excluded_set == VisitChildrenSet::Empty { | ||||
return base_set; | ||||
} | ||||
match base_set { | ||||
VisitChildrenSet::This | VisitChildrenSet::Recursive => { | ||||
// Never return 'recursive' here if excluded_set is any kind of | ||||
// non-empty (either 'this' or set(foo)), since excluded might | ||||
// return set() for a subdirectory. | ||||
VisitChildrenSet::This | ||||
} | ||||
set => { | ||||
// Possible values for base: set(...), set() | ||||
// Possible values for excluded: 'this', set(...) | ||||
// We ignore excluded set results. They're possibly incorrect: | ||||
// base = path:dir/subdir | ||||
// excluded=rootfilesin:dir, | ||||
// visit_children_set(''): | ||||
// base returns {'dir'}, excluded returns {'dir'}, if we | ||||
// subtracted we'd return set(), which is *not* correct, we | ||||
// still need to visit 'dir'! | ||||
set | ||||
} | ||||
} | ||||
} | ||||
fn matches_everything(&self) -> bool { | ||||
false | ||||
} | ||||
fn is_exact(&self) -> bool { | ||||
self.base.is_exact() | ||||
} | ||||
} | ||||
impl DifferenceMatcher { | ||||
pub fn new( | ||||
base: Box<dyn Matcher + Sync>, | ||||
excluded: Box<dyn Matcher + Sync>, | ||||
) -> Self { | ||||
let base_is_exact = base.is_exact(); | ||||
let base_files = base.file_set().map(ToOwned::to_owned); | ||||
let mut new = Self { | ||||
base, | ||||
excluded, | ||||
files: None, | ||||
}; | ||||
if base_is_exact { | ||||
new.files = base_files.map(|files| { | ||||
files.iter().cloned().filter(|f| new.matches(f)).collect() | ||||
}); | ||||
} | ||||
new | ||||
} | ||||
} | ||||
Raphaël Gomès
|
r50476 | /// Wraps [`regex::bytes::Regex`] to improve performance in multithreaded | ||
/// contexts. | ||||
/// | ||||
/// The `status` algorithm makes heavy use of threads, and calling `is_match` | ||||
/// from many threads at once is prone to contention, probably within the | ||||
/// scratch space needed as the regex DFA is built lazily. | ||||
/// | ||||
/// We are in the process of raising the issue upstream, but for now | ||||
/// the workaround used here is to store the `Regex` in a lazily populated | ||||
/// thread-local variable, sharing the initial read-only compilation, but | ||||
/// not the lazy dfa scratch space mentioned above. | ||||
/// | ||||
/// This reduces the contention observed with 16+ threads, but does not | ||||
/// completely remove it. Hopefully this can be addressed upstream. | ||||
struct RegexMatcher { | ||||
/// Compiled at the start of the status algorithm, used as a base for | ||||
/// cloning in each thread-local `self.local`, thus sharing the expensive | ||||
/// first compilation. | ||||
base: regex::bytes::Regex, | ||||
/// Thread-local variable that holds the `Regex` that is actually queried | ||||
/// from each thread. | ||||
local: thread_local::ThreadLocal<regex::bytes::Regex>, | ||||
} | ||||
impl RegexMatcher { | ||||
/// Returns whether the path matches the stored `Regex`. | ||||
pub fn is_match(&self, path: &HgPath) -> bool { | ||||
self.local | ||||
.get_or(|| self.base.clone()) | ||||
.is_match(path.as_bytes()) | ||||
} | ||||
} | ||||
Raphaël Gomès
|
r45084 | /// Returns a function that matches an `HgPath` against the given regex | ||
/// pattern. | ||||
/// | ||||
/// This can fail when the pattern is invalid or not supported by the | ||||
/// underlying engine (the `regex` crate), for instance anything with | ||||
/// back-references. | ||||
Raphaël Gomès
|
r50808 | #[logging_timer::time("trace")] | ||
Raphaël Gomès
|
r50476 | fn re_matcher(pattern: &[u8]) -> PatternResult<RegexMatcher> { | ||
Raphaël Gomès
|
r45084 | use std::io::Write; | ||
Raphaël Gomès
|
r45347 | // The `regex` crate adds `.*` to the start and end of expressions if there | ||
// are no anchors, so add the start anchor. | ||||
let mut escaped_bytes = vec![b'^', b'(', b'?', b':']; | ||||
Raphaël Gomès
|
r45084 | for byte in pattern { | ||
if *byte > 127 { | ||||
write!(escaped_bytes, "\\x{:x}", *byte).unwrap(); | ||||
} else { | ||||
escaped_bytes.push(*byte); | ||||
} | ||||
} | ||||
Raphaël Gomès
|
r45347 | escaped_bytes.push(b')'); | ||
Raphaël Gomès
|
r45084 | |||
// Avoid the cost of UTF8 checking | ||||
// | ||||
// # Safety | ||||
// This is safe because we escaped all non-ASCII bytes. | ||||
let pattern_string = unsafe { String::from_utf8_unchecked(escaped_bytes) }; | ||||
let re = regex::bytes::RegexBuilder::new(&pattern_string) | ||||
.unicode(false) | ||||
Raphaël Gomès
|
r45286 | // Big repos with big `.hgignore` will hit the default limit and | ||
// incur a significant performance hit. One repo's `hg status` hit | ||||
// multiple *minutes*. | ||||
.dfa_size_limit(50 * (1 << 20)) | ||||
Raphaël Gomès
|
r45084 | .build() | ||
.map_err(|e| PatternError::UnsupportedSyntax(e.to_string()))?; | ||||
Raphaël Gomès
|
r50476 | Ok(RegexMatcher { | ||
base: re, | ||||
local: Default::default(), | ||||
}) | ||||
Raphaël Gomès
|
r45006 | } | ||
Raphaël Gomès
|
r45008 | /// Returns the regex pattern and a function that matches an `HgPath` against | ||
/// said regex formed by the given ignore patterns. | ||||
Raphaël Gomès
|
r52013 | fn build_regex_match<'a>( | ||
ignore_patterns: &[IgnorePattern], | ||||
Spencer Baugh
|
r51754 | glob_suffix: &[u8], | ||
Raphaël Gomès
|
r52013 | ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> { | ||
Raphaël Gomès
|
r45311 | let mut regexps = vec![]; | ||
let mut exact_set = HashSet::new(); | ||||
for pattern in ignore_patterns { | ||||
Spencer Baugh
|
r51754 | if let Some(re) = build_single_regex(pattern, glob_suffix)? { | ||
Raphaël Gomès
|
r45311 | regexps.push(re); | ||
} else { | ||||
let exact = normalize_path_bytes(&pattern.pattern); | ||||
exact_set.insert(HgPathBuf::from_bytes(&exact)); | ||||
} | ||||
} | ||||
Raphaël Gomès
|
r45008 | let full_regex = regexps.join(&b'|'); | ||
Raphaël Gomès
|
r45311 | // An empty pattern would cause the regex engine to incorrectly match the | ||
// (empty) root directory | ||||
let func = if !(regexps.is_empty()) { | ||||
let matcher = re_matcher(&full_regex)?; | ||||
let func = move |filename: &HgPath| { | ||||
Raphaël Gomès
|
r50476 | exact_set.contains(filename) || matcher.is_match(filename) | ||
Raphaël Gomès
|
r45311 | }; | ||
Arseniy Alekseyev
|
r49177 | Box::new(func) as IgnoreFnType | ||
Raphaël Gomès
|
r45311 | } else { | ||
let func = move |filename: &HgPath| exact_set.contains(filename); | ||||
Arseniy Alekseyev
|
r49177 | Box::new(func) as IgnoreFnType | ||
Raphaël Gomès
|
r45311 | }; | ||
Raphaël Gomès
|
r45008 | |||
Ok((full_regex, func)) | ||||
} | ||||
Raphaël Gomès
|
r45007 | /// Returns roots and directories corresponding to each pattern. | ||
/// | ||||
/// This calculates the roots and directories exactly matching the patterns and | ||||
/// returns a tuple of (roots, dirs). It does not return other directories | ||||
/// which may also need to be considered, like the parent directories. | ||||
fn roots_and_dirs( | ||||
ignore_patterns: &[IgnorePattern], | ||||
) -> (Vec<HgPathBuf>, Vec<HgPathBuf>) { | ||||
let mut roots = Vec::new(); | ||||
let mut dirs = Vec::new(); | ||||
for ignore_pattern in ignore_patterns { | ||||
let IgnorePattern { | ||||
syntax, pattern, .. | ||||
} = ignore_pattern; | ||||
match syntax { | ||||
PatternSyntax::RootGlob | PatternSyntax::Glob => { | ||||
Arseniy Alekseyev
|
r49132 | let mut root = HgPathBuf::new(); | ||
Raphaël Gomès
|
r45007 | for p in pattern.split(|c| *c == b'/') { | ||
Raphaël Gomès
|
r50825 | if p.iter() | ||
.any(|c| matches!(*c, b'[' | b'{' | b'*' | b'?')) | ||||
{ | ||||
Raphaël Gomès
|
r45007 | break; | ||
} | ||||
Arseniy Alekseyev
|
r49132 | root.push(HgPathBuf::from_bytes(p).as_ref()); | ||
Raphaël Gomès
|
r45007 | } | ||
Arseniy Alekseyev
|
r49132 | roots.push(root); | ||
Raphaël Gomès
|
r45007 | } | ||
Raphaël Gomès
|
r51588 | PatternSyntax::Path | ||
| PatternSyntax::RelPath | ||||
| PatternSyntax::FilePath => { | ||||
Raphaël Gomès
|
r45007 | let pat = HgPath::new(if pattern == b"." { | ||
&[] as &[u8] | ||||
} else { | ||||
pattern | ||||
}); | ||||
roots.push(pat.to_owned()); | ||||
} | ||||
PatternSyntax::RootFiles => { | ||||
let pat = if pattern == b"." { | ||||
&[] as &[u8] | ||||
} else { | ||||
pattern | ||||
}; | ||||
dirs.push(HgPathBuf::from_bytes(pat)); | ||||
} | ||||
_ => { | ||||
roots.push(HgPathBuf::new()); | ||||
} | ||||
} | ||||
} | ||||
(roots, dirs) | ||||
} | ||||
/// Paths extracted from patterns | ||||
#[derive(Debug, PartialEq)] | ||||
struct RootsDirsAndParents { | ||||
/// Directories to match recursively | ||||
pub roots: HashSet<HgPathBuf>, | ||||
/// Directories to match non-recursively | ||||
pub dirs: HashSet<HgPathBuf>, | ||||
/// Implicitly required directories to go to items in either roots or dirs | ||||
pub parents: HashSet<HgPathBuf>, | ||||
} | ||||
/// Extract roots, dirs and parents from patterns. | ||||
fn roots_dirs_and_parents( | ||||
ignore_patterns: &[IgnorePattern], | ||||
) -> PatternResult<RootsDirsAndParents> { | ||||
let (roots, dirs) = roots_and_dirs(ignore_patterns); | ||||
let mut parents = HashSet::new(); | ||||
parents.extend( | ||||
Spencer Baugh
|
r51753 | DirsMultiset::from_manifest(&dirs)? | ||
Raphaël Gomès
|
r45007 | .iter() | ||
Raphaël Gomès
|
r45500 | .map(ToOwned::to_owned), | ||
Raphaël Gomès
|
r45007 | ); | ||
parents.extend( | ||||
Spencer Baugh
|
r51753 | DirsMultiset::from_manifest(&roots)? | ||
Raphaël Gomès
|
r45007 | .iter() | ||
Raphaël Gomès
|
r45500 | .map(ToOwned::to_owned), | ||
Raphaël Gomès
|
r45007 | ); | ||
Ok(RootsDirsAndParents { | ||||
roots: HashSet::from_iter(roots), | ||||
dirs: HashSet::from_iter(dirs), | ||||
parents, | ||||
}) | ||||
} | ||||
Raphaël Gomès
|
r45009 | /// Returns a function that checks whether a given file (in the general sense) | ||
/// should be matched. | ||||
Raphaël Gomès
|
r50825 | fn build_match<'a>( | ||
Simon Sapin
|
r48170 | ignore_patterns: Vec<IgnorePattern>, | ||
Spencer Baugh
|
r51754 | glob_suffix: &[u8], | ||
Raphaël Gomès
|
r50825 | ) -> PatternResult<(Vec<u8>, IgnoreFnType<'a>)> { | ||
let mut match_funcs: Vec<IgnoreFnType<'a>> = vec![]; | ||||
Raphaël Gomès
|
r45009 | // For debugging and printing | ||
let mut patterns = vec![]; | ||||
Simon Sapin
|
r48170 | let (subincludes, ignore_patterns) = filter_subincludes(ignore_patterns)?; | ||
Raphaël Gomès
|
r45009 | |||
if !subincludes.is_empty() { | ||||
// Build prefix-based matcher functions for subincludes | ||||
let mut submatchers = FastHashMap::default(); | ||||
let mut prefixes = vec![]; | ||||
Simon Sapin
|
r48170 | for sub_include in subincludes { | ||
let matcher = IncludeMatcher::new(sub_include.included_patterns)?; | ||||
let match_fn = | ||||
Box::new(move |path: &HgPath| matcher.matches(path)); | ||||
prefixes.push(sub_include.prefix.clone()); | ||||
submatchers.insert(sub_include.prefix.clone(), match_fn); | ||||
Raphaël Gomès
|
r45009 | } | ||
let match_subinclude = move |filename: &HgPath| { | ||||
for prefix in prefixes.iter() { | ||||
if let Some(rel) = filename.relative_to(prefix) { | ||||
Raphaël Gomès
|
r45500 | if (submatchers[prefix])(rel) { | ||
Raphaël Gomès
|
r45009 | return true; | ||
} | ||||
} | ||||
} | ||||
false | ||||
}; | ||||
match_funcs.push(Box::new(match_subinclude)); | ||||
} | ||||
if !ignore_patterns.is_empty() { | ||||
// Either do dumb matching if all patterns are rootfiles, or match | ||||
// with a regex. | ||||
if ignore_patterns | ||||
.iter() | ||||
.all(|k| k.syntax == PatternSyntax::RootFiles) | ||||
{ | ||||
let dirs: HashSet<_> = ignore_patterns | ||||
.iter() | ||||
.map(|k| k.pattern.to_owned()) | ||||
.collect(); | ||||
let mut dirs_vec: Vec<_> = dirs.iter().cloned().collect(); | ||||
let match_func = move |path: &HgPath| -> bool { | ||||
let path = path.as_bytes(); | ||||
let i = path.iter().rfind(|a| **a == b'/'); | ||||
let dir = if let Some(i) = i { | ||||
&path[..*i as usize] | ||||
} else { | ||||
b"." | ||||
}; | ||||
Raphaël Gomès
|
r52013 | dirs.contains(dir) | ||
Raphaël Gomès
|
r45009 | }; | ||
match_funcs.push(Box::new(match_func)); | ||||
patterns.extend(b"rootfilesin: "); | ||||
dirs_vec.sort(); | ||||
patterns.extend(dirs_vec.escaped_bytes()); | ||||
} else { | ||||
Spencer Baugh
|
r51754 | let (new_re, match_func) = | ||
build_regex_match(&ignore_patterns, glob_suffix)?; | ||||
Raphaël Gomès
|
r45009 | patterns = new_re; | ||
match_funcs.push(match_func) | ||||
} | ||||
} | ||||
Ok(if match_funcs.len() == 1 { | ||||
Simon Sapin
|
r48170 | (patterns, match_funcs.remove(0)) | ||
Raphaël Gomès
|
r45009 | } else { | ||
( | ||||
patterns, | ||||
Box::new(move |f: &HgPath| -> bool { | ||||
match_funcs.iter().any(|match_func| match_func(f)) | ||||
}), | ||||
) | ||||
}) | ||||
} | ||||
/// Parses all "ignore" files with their recursive includes and returns a | ||||
/// function that checks whether a given file (in the general sense) should be | ||||
/// ignored. | ||||
Arseniy Alekseyev
|
r49178 | pub fn get_ignore_matcher<'a>( | ||
Simon Sapin
|
r48202 | mut all_pattern_files: Vec<PathBuf>, | ||
Simon Sapin
|
r48169 | root_dir: &Path, | ||
Raphaël Gomès
|
r50453 | inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]), | ||
Arseniy Alekseyev
|
r49178 | ) -> PatternResult<(IncludeMatcher<'a>, Vec<PatternFileWarning>)> { | ||
Raphaël Gomès
|
r45009 | let mut all_patterns = vec![]; | ||
let mut all_warnings = vec![]; | ||||
Simon Sapin
|
r48202 | // Sort to make the ordering of calls to `inspect_pattern_bytes` | ||
// deterministic even if the ordering of `all_pattern_files` is not (such | ||||
// as when a iteration order of a Python dict or Rust HashMap is involved). | ||||
// Sort by "string" representation instead of the default by component | ||||
// (with a Rust-specific definition of a component) | ||||
all_pattern_files | ||||
.sort_unstable_by(|a, b| a.as_os_str().cmp(b.as_os_str())); | ||||
Simon Sapin
|
r48169 | for pattern_file in &all_pattern_files { | ||
Simon Sapin
|
r48202 | let (patterns, warnings) = get_patterns_from_file( | ||
pattern_file, | ||||
root_dir, | ||||
inspect_pattern_bytes, | ||||
)?; | ||||
Raphaël Gomès
|
r45009 | |||
Raphaël Gomès
|
r45088 | all_patterns.extend(patterns.to_owned()); | ||
Raphaël Gomès
|
r45009 | all_warnings.extend(warnings); | ||
} | ||||
Simon Sapin
|
r48170 | let matcher = IncludeMatcher::new(all_patterns)?; | ||
Arseniy Alekseyev
|
r49178 | Ok((matcher, all_warnings)) | ||
} | ||||
/// Parses all "ignore" files with their recursive includes and returns a | ||||
/// function that checks whether a given file (in the general sense) should be | ||||
/// ignored. | ||||
pub fn get_ignore_function<'a>( | ||||
all_pattern_files: Vec<PathBuf>, | ||||
root_dir: &Path, | ||||
Raphaël Gomès
|
r50453 | inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]), | ||
Arseniy Alekseyev
|
r49178 | ) -> PatternResult<(IgnoreFnType<'a>, Vec<PatternFileWarning>)> { | ||
let res = | ||||
get_ignore_matcher(all_pattern_files, root_dir, inspect_pattern_bytes); | ||||
res.map(|(matcher, all_warnings)| { | ||||
let res: IgnoreFnType<'a> = | ||||
Box::new(move |path: &HgPath| matcher.matches(path)); | ||||
(res, all_warnings) | ||||
}) | ||||
Raphaël Gomès
|
r45009 | } | ||
impl<'a> IncludeMatcher<'a> { | ||||
Simon Sapin
|
r48170 | pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> { | ||
Raphaël Gomès
|
r45009 | let RootsDirsAndParents { | ||
roots, | ||||
dirs, | ||||
parents, | ||||
} = roots_dirs_and_parents(&ignore_patterns)?; | ||||
Raphaël Gomès
|
r50825 | let prefix = ignore_patterns.iter().all(|k| { | ||
matches!(k.syntax, PatternSyntax::Path | PatternSyntax::RelPath) | ||||
Raphaël Gomès
|
r45009 | }); | ||
Spencer Baugh
|
r51754 | let (patterns, match_fn) = build_match(ignore_patterns, b"(?:/|$)")?; | ||
Raphaël Gomès
|
r45009 | |||
Simon Sapin
|
r48170 | Ok(Self { | ||
patterns, | ||||
match_fn, | ||||
prefix, | ||||
roots, | ||||
dirs, | ||||
parents, | ||||
}) | ||||
Raphaël Gomès
|
r45009 | } | ||
fn get_all_parents_children(&self) -> DirsChildrenMultiset { | ||||
// TODO cache | ||||
let thing = self | ||||
.dirs | ||||
.iter() | ||||
.chain(self.roots.iter()) | ||||
.chain(self.parents.iter()); | ||||
DirsChildrenMultiset::new(thing, Some(&self.parents)) | ||||
} | ||||
Arseniy Alekseyev
|
r49178 | |||
pub fn debug_get_patterns(&self) -> &[u8] { | ||||
self.patterns.as_ref() | ||||
} | ||||
Raphaël Gomès
|
r45009 | } | ||
impl<'a> Display for IncludeMatcher<'a> { | ||||
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { | ||||
Raphaël Gomès
|
r45312 | // XXX What about exact matches? | ||
// I'm not sure it's worth it to clone the HashSet and keep it | ||||
// around just in case someone wants to display the matcher, plus | ||||
// it's going to be unreadable after a few entries, but we need to | ||||
// inform in this display that exact matches are being used and are | ||||
// (on purpose) missing from the `includes`. | ||||
Raphaël Gomès
|
r45009 | write!( | ||
f, | ||||
"IncludeMatcher(includes='{}')", | ||||
String::from_utf8_lossy(&self.patterns.escaped_bytes()) | ||||
) | ||||
} | ||||
} | ||||
Raphaël Gomès
|
r44828 | #[cfg(test)] | ||
mod tests { | ||||
use super::*; | ||||
use pretty_assertions::assert_eq; | ||||
Raphaël Gomès
|
r45007 | use std::path::Path; | ||
#[test] | ||||
fn test_roots_and_dirs() { | ||||
let pats = vec![ | ||||
IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")), | ||||
IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")), | ||||
IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")), | ||||
]; | ||||
let (roots, dirs) = roots_and_dirs(&pats); | ||||
assert_eq!( | ||||
roots, | ||||
vec!( | ||||
HgPathBuf::from_bytes(b"g/h"), | ||||
HgPathBuf::from_bytes(b"g/h"), | ||||
HgPathBuf::new() | ||||
), | ||||
); | ||||
assert_eq!(dirs, vec!()); | ||||
} | ||||
#[test] | ||||
fn test_roots_dirs_and_parents() { | ||||
let pats = vec![ | ||||
IgnorePattern::new(PatternSyntax::Glob, b"g/h/*", Path::new("")), | ||||
IgnorePattern::new(PatternSyntax::Glob, b"g/h", Path::new("")), | ||||
IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")), | ||||
]; | ||||
let mut roots = HashSet::new(); | ||||
roots.insert(HgPathBuf::from_bytes(b"g/h")); | ||||
roots.insert(HgPathBuf::new()); | ||||
let dirs = HashSet::new(); | ||||
let mut parents = HashSet::new(); | ||||
parents.insert(HgPathBuf::new()); | ||||
parents.insert(HgPathBuf::from_bytes(b"g")); | ||||
assert_eq!( | ||||
roots_dirs_and_parents(&pats).unwrap(), | ||||
Raphaël Gomès
|
r45011 | RootsDirsAndParents { | ||
roots, | ||||
dirs, | ||||
parents | ||||
} | ||||
Raphaël Gomès
|
r45007 | ); | ||
} | ||||
Raphaël Gomès
|
r44828 | |||
#[test] | ||||
fn test_filematcher_visit_children_set() { | ||||
// Visitchildrenset | ||||
Raphaël Gomès
|
r46182 | let files = vec![HgPathBuf::from_bytes(b"dir/subdir/foo.txt")]; | ||
Raphaël Gomès
|
r50241 | let matcher = FileMatcher::new(files).unwrap(); | ||
Raphaël Gomès
|
r44828 | |||
let mut set = HashSet::new(); | ||||
Raphaël Gomès
|
r50241 | set.insert(HgPathBuf::from_bytes(b"dir")); | ||
Raphaël Gomès
|
r44828 | assert_eq!( | ||
matcher.visit_children_set(HgPath::new(b"")), | ||||
VisitChildrenSet::Set(set) | ||||
); | ||||
let mut set = HashSet::new(); | ||||
Raphaël Gomès
|
r50241 | set.insert(HgPathBuf::from_bytes(b"subdir")); | ||
Raphaël Gomès
|
r44828 | assert_eq!( | ||
matcher.visit_children_set(HgPath::new(b"dir")), | ||||
VisitChildrenSet::Set(set) | ||||
); | ||||
let mut set = HashSet::new(); | ||||
Raphaël Gomès
|
r50241 | set.insert(HgPathBuf::from_bytes(b"foo.txt")); | ||
Raphaël Gomès
|
r44828 | assert_eq!( | ||
matcher.visit_children_set(HgPath::new(b"dir/subdir")), | ||||
VisitChildrenSet::Set(set) | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"folder")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
} | ||||
#[test] | ||||
fn test_filematcher_visit_children_set_files_and_dirs() { | ||||
let files = vec![ | ||||
Raphaël Gomès
|
r46182 | HgPathBuf::from_bytes(b"rootfile.txt"), | ||
HgPathBuf::from_bytes(b"a/file1.txt"), | ||||
HgPathBuf::from_bytes(b"a/b/file2.txt"), | ||||
Raphaël Gomès
|
r44828 | // No file in a/b/c | ||
Raphaël Gomès
|
r46182 | HgPathBuf::from_bytes(b"a/b/c/d/file4.txt"), | ||
Raphaël Gomès
|
r44828 | ]; | ||
Raphaël Gomès
|
r50241 | let matcher = FileMatcher::new(files).unwrap(); | ||
Raphaël Gomès
|
r44828 | |||
let mut set = HashSet::new(); | ||||
Raphaël Gomès
|
r50241 | set.insert(HgPathBuf::from_bytes(b"a")); | ||
set.insert(HgPathBuf::from_bytes(b"rootfile.txt")); | ||||
Raphaël Gomès
|
r44828 | assert_eq!( | ||
matcher.visit_children_set(HgPath::new(b"")), | ||||
VisitChildrenSet::Set(set) | ||||
); | ||||
let mut set = HashSet::new(); | ||||
Raphaël Gomès
|
r50241 | set.insert(HgPathBuf::from_bytes(b"b")); | ||
set.insert(HgPathBuf::from_bytes(b"file1.txt")); | ||||
Raphaël Gomès
|
r44828 | assert_eq!( | ||
matcher.visit_children_set(HgPath::new(b"a")), | ||||
VisitChildrenSet::Set(set) | ||||
); | ||||
let mut set = HashSet::new(); | ||||
Raphaël Gomès
|
r50241 | set.insert(HgPathBuf::from_bytes(b"c")); | ||
set.insert(HgPathBuf::from_bytes(b"file2.txt")); | ||||
Raphaël Gomès
|
r44828 | assert_eq!( | ||
matcher.visit_children_set(HgPath::new(b"a/b")), | ||||
VisitChildrenSet::Set(set) | ||||
); | ||||
let mut set = HashSet::new(); | ||||
Raphaël Gomès
|
r50241 | set.insert(HgPathBuf::from_bytes(b"d")); | ||
Raphaël Gomès
|
r44828 | assert_eq!( | ||
matcher.visit_children_set(HgPath::new(b"a/b/c")), | ||||
VisitChildrenSet::Set(set) | ||||
); | ||||
let mut set = HashSet::new(); | ||||
Raphaël Gomès
|
r50241 | set.insert(HgPathBuf::from_bytes(b"file4.txt")); | ||
Raphaël Gomès
|
r44828 | assert_eq!( | ||
matcher.visit_children_set(HgPath::new(b"a/b/c/d")), | ||||
VisitChildrenSet::Set(set) | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"folder")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
} | ||||
Raphaël Gomès
|
r45009 | |||
#[test] | ||||
Spencer Baugh
|
r51758 | fn test_patternmatcher() { | ||
// VisitdirPrefix | ||||
let m = PatternMatcher::new(vec![IgnorePattern::new( | ||||
PatternSyntax::Path, | ||||
b"dir/subdir", | ||||
Path::new(""), | ||||
)]) | ||||
.unwrap(); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"")), | ||||
VisitChildrenSet::This | ||||
); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"dir")), | ||||
VisitChildrenSet::This | ||||
); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"dir/subdir")), | ||||
VisitChildrenSet::Recursive | ||||
); | ||||
// OPT: This should probably be Recursive if its parent is? | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"dir/subdir/x")), | ||||
VisitChildrenSet::This | ||||
); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"folder")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
// VisitchildrensetPrefix | ||||
let m = PatternMatcher::new(vec![IgnorePattern::new( | ||||
PatternSyntax::Path, | ||||
b"dir/subdir", | ||||
Path::new(""), | ||||
)]) | ||||
.unwrap(); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"")), | ||||
VisitChildrenSet::This | ||||
); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"dir")), | ||||
VisitChildrenSet::This | ||||
); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"dir/subdir")), | ||||
VisitChildrenSet::Recursive | ||||
); | ||||
// OPT: This should probably be Recursive if its parent is? | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"dir/subdir/x")), | ||||
VisitChildrenSet::This | ||||
); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"folder")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
// VisitdirRootfilesin | ||||
let m = PatternMatcher::new(vec![IgnorePattern::new( | ||||
PatternSyntax::RootFiles, | ||||
b"dir/subdir", | ||||
Path::new(""), | ||||
)]) | ||||
.unwrap(); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"dir/subdir/x")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"folder")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
// FIXME: These should probably be This. | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"dir")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"dir/subdir")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
// VisitchildrensetRootfilesin | ||||
let m = PatternMatcher::new(vec![IgnorePattern::new( | ||||
PatternSyntax::RootFiles, | ||||
b"dir/subdir", | ||||
Path::new(""), | ||||
)]) | ||||
.unwrap(); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"dir/subdir/x")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"folder")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
// FIXME: These should probably be {'dir'}, {'subdir'} and This, | ||||
// respectively, or at least This for all three. | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"dir")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"dir/subdir")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
// VisitdirGlob | ||||
let m = PatternMatcher::new(vec![IgnorePattern::new( | ||||
PatternSyntax::Glob, | ||||
b"dir/z*", | ||||
Path::new(""), | ||||
)]) | ||||
.unwrap(); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"")), | ||||
VisitChildrenSet::This | ||||
); | ||||
// FIXME: This probably should be This | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"dir")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"folder")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
// OPT: these should probably be False. | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"dir/subdir")), | ||||
VisitChildrenSet::This | ||||
); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"dir/subdir/x")), | ||||
VisitChildrenSet::This | ||||
); | ||||
// VisitchildrensetGlob | ||||
let m = PatternMatcher::new(vec![IgnorePattern::new( | ||||
PatternSyntax::Glob, | ||||
b"dir/z*", | ||||
Path::new(""), | ||||
)]) | ||||
.unwrap(); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"")), | ||||
VisitChildrenSet::This | ||||
); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"folder")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
// FIXME: This probably should be This | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"dir")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
// OPT: these should probably be Empty | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"dir/subdir")), | ||||
VisitChildrenSet::This | ||||
); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"dir/subdir/x")), | ||||
VisitChildrenSet::This | ||||
); | ||||
// VisitdirFilepath | ||||
let m = PatternMatcher::new(vec![IgnorePattern::new( | ||||
PatternSyntax::FilePath, | ||||
b"dir/z", | ||||
Path::new(""), | ||||
)]) | ||||
.unwrap(); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"")), | ||||
VisitChildrenSet::This | ||||
); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"dir")), | ||||
VisitChildrenSet::This | ||||
); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"folder")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"dir/subdir")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"dir/subdir/x")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
// VisitchildrensetFilepath | ||||
let m = PatternMatcher::new(vec![IgnorePattern::new( | ||||
PatternSyntax::FilePath, | ||||
b"dir/z", | ||||
Path::new(""), | ||||
)]) | ||||
.unwrap(); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"")), | ||||
VisitChildrenSet::This | ||||
); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"folder")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"dir")), | ||||
VisitChildrenSet::This | ||||
); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"dir/subdir")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
m.visit_children_set(HgPath::new(b"dir/subdir/x")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
} | ||||
#[test] | ||||
Raphaël Gomès
|
r45009 | fn test_includematcher() { | ||
// VisitchildrensetPrefix | ||||
Simon Sapin
|
r48170 | let matcher = IncludeMatcher::new(vec![IgnorePattern::new( | ||
PatternSyntax::RelPath, | ||||
b"dir/subdir", | ||||
Path::new(""), | ||||
)]) | ||||
Raphaël Gomès
|
r45009 | .unwrap(); | ||
let mut set = HashSet::new(); | ||||
Raphaël Gomès
|
r50241 | set.insert(HgPathBuf::from_bytes(b"dir")); | ||
Raphaël Gomès
|
r45009 | assert_eq!( | ||
matcher.visit_children_set(HgPath::new(b"")), | ||||
VisitChildrenSet::Set(set) | ||||
); | ||||
let mut set = HashSet::new(); | ||||
Raphaël Gomès
|
r50241 | set.insert(HgPathBuf::from_bytes(b"subdir")); | ||
Raphaël Gomès
|
r45009 | assert_eq!( | ||
matcher.visit_children_set(HgPath::new(b"dir")), | ||||
VisitChildrenSet::Set(set) | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir")), | ||||
VisitChildrenSet::Recursive | ||||
); | ||||
// OPT: This should probably be 'all' if its parent is? | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")), | ||||
VisitChildrenSet::This | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"folder")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
// VisitchildrensetRootfilesin | ||||
Simon Sapin
|
r48170 | let matcher = IncludeMatcher::new(vec![IgnorePattern::new( | ||
PatternSyntax::RootFiles, | ||||
b"dir/subdir", | ||||
Path::new(""), | ||||
)]) | ||||
Raphaël Gomès
|
r45009 | .unwrap(); | ||
let mut set = HashSet::new(); | ||||
Raphaël Gomès
|
r50241 | set.insert(HgPathBuf::from_bytes(b"dir")); | ||
Raphaël Gomès
|
r45009 | assert_eq!( | ||
matcher.visit_children_set(HgPath::new(b"")), | ||||
VisitChildrenSet::Set(set) | ||||
); | ||||
let mut set = HashSet::new(); | ||||
Raphaël Gomès
|
r50241 | set.insert(HgPathBuf::from_bytes(b"subdir")); | ||
Raphaël Gomès
|
r45009 | assert_eq!( | ||
matcher.visit_children_set(HgPath::new(b"dir")), | ||||
VisitChildrenSet::Set(set) | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir")), | ||||
VisitChildrenSet::This | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"folder")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
// VisitchildrensetGlob | ||||
Simon Sapin
|
r48170 | let matcher = IncludeMatcher::new(vec![IgnorePattern::new( | ||
PatternSyntax::Glob, | ||||
b"dir/z*", | ||||
Path::new(""), | ||||
)]) | ||||
Raphaël Gomès
|
r45009 | .unwrap(); | ||
let mut set = HashSet::new(); | ||||
Raphaël Gomès
|
r50241 | set.insert(HgPathBuf::from_bytes(b"dir")); | ||
Raphaël Gomès
|
r45009 | assert_eq!( | ||
matcher.visit_children_set(HgPath::new(b"")), | ||||
VisitChildrenSet::Set(set) | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"folder")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir")), | ||||
VisitChildrenSet::This | ||||
); | ||||
// OPT: these should probably be set(). | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir")), | ||||
VisitChildrenSet::This | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")), | ||||
VisitChildrenSet::This | ||||
); | ||||
Raphaël Gomès
|
r50359 | |||
Raphaël Gomès
|
r51588 | // VisitchildrensetFilePath | ||
let matcher = IncludeMatcher::new(vec![IgnorePattern::new( | ||||
PatternSyntax::FilePath, | ||||
b"dir/z", | ||||
Path::new(""), | ||||
)]) | ||||
.unwrap(); | ||||
let mut set = HashSet::new(); | ||||
set.insert(HgPathBuf::from_bytes(b"dir")); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"")), | ||||
VisitChildrenSet::Set(set) | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"folder")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
let mut set = HashSet::new(); | ||||
set.insert(HgPathBuf::from_bytes(b"z")); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir")), | ||||
VisitChildrenSet::Set(set) | ||||
); | ||||
// OPT: these should probably be set(). | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
Raphaël Gomès
|
r50359 | // Test multiple patterns | ||
let matcher = IncludeMatcher::new(vec![ | ||||
IgnorePattern::new(PatternSyntax::RelPath, b"foo", Path::new("")), | ||||
IgnorePattern::new(PatternSyntax::Glob, b"g*", Path::new("")), | ||||
]) | ||||
.unwrap(); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"")), | ||||
VisitChildrenSet::This | ||||
); | ||||
// Test multiple patterns | ||||
let matcher = IncludeMatcher::new(vec![IgnorePattern::new( | ||||
PatternSyntax::Glob, | ||||
b"**/*.exe", | ||||
Path::new(""), | ||||
)]) | ||||
.unwrap(); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"")), | ||||
VisitChildrenSet::This | ||||
); | ||||
Raphaël Gomès
|
r45009 | } | ||
Raphaël Gomès
|
r50243 | |||
#[test] | ||||
fn test_unionmatcher() { | ||||
// Path + Rootfiles | ||||
let m1 = IncludeMatcher::new(vec![IgnorePattern::new( | ||||
PatternSyntax::RelPath, | ||||
b"dir/subdir", | ||||
Path::new(""), | ||||
)]) | ||||
.unwrap(); | ||||
let m2 = IncludeMatcher::new(vec![IgnorePattern::new( | ||||
PatternSyntax::RootFiles, | ||||
b"dir", | ||||
Path::new(""), | ||||
)]) | ||||
.unwrap(); | ||||
let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]); | ||||
let mut set = HashSet::new(); | ||||
set.insert(HgPathBuf::from_bytes(b"dir")); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"")), | ||||
VisitChildrenSet::Set(set) | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir")), | ||||
VisitChildrenSet::This | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir")), | ||||
VisitChildrenSet::Recursive | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/foo")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"folder")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"folder")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
// OPT: These next two could be 'all' instead of 'this'. | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir/z")), | ||||
VisitChildrenSet::This | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")), | ||||
VisitChildrenSet::This | ||||
); | ||||
// Path + unrelated Path | ||||
let m1 = IncludeMatcher::new(vec![IgnorePattern::new( | ||||
PatternSyntax::RelPath, | ||||
b"dir/subdir", | ||||
Path::new(""), | ||||
)]) | ||||
.unwrap(); | ||||
let m2 = IncludeMatcher::new(vec![IgnorePattern::new( | ||||
PatternSyntax::RelPath, | ||||
b"folder", | ||||
Path::new(""), | ||||
)]) | ||||
.unwrap(); | ||||
let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]); | ||||
let mut set = HashSet::new(); | ||||
set.insert(HgPathBuf::from_bytes(b"folder")); | ||||
set.insert(HgPathBuf::from_bytes(b"dir")); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"")), | ||||
VisitChildrenSet::Set(set) | ||||
); | ||||
let mut set = HashSet::new(); | ||||
set.insert(HgPathBuf::from_bytes(b"subdir")); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir")), | ||||
VisitChildrenSet::Set(set) | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir")), | ||||
VisitChildrenSet::Recursive | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/foo")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"folder")), | ||||
VisitChildrenSet::Recursive | ||||
); | ||||
// OPT: These next two could be 'all' instead of 'this'. | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir/z")), | ||||
VisitChildrenSet::This | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")), | ||||
VisitChildrenSet::This | ||||
); | ||||
// Path + subpath | ||||
let m1 = IncludeMatcher::new(vec![IgnorePattern::new( | ||||
PatternSyntax::RelPath, | ||||
b"dir/subdir/x", | ||||
Path::new(""), | ||||
)]) | ||||
.unwrap(); | ||||
let m2 = IncludeMatcher::new(vec![IgnorePattern::new( | ||||
PatternSyntax::RelPath, | ||||
b"dir/subdir", | ||||
Path::new(""), | ||||
)]) | ||||
.unwrap(); | ||||
let matcher = UnionMatcher::new(vec![Box::new(m1), Box::new(m2)]); | ||||
let mut set = HashSet::new(); | ||||
set.insert(HgPathBuf::from_bytes(b"dir")); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"")), | ||||
VisitChildrenSet::Set(set) | ||||
); | ||||
let mut set = HashSet::new(); | ||||
set.insert(HgPathBuf::from_bytes(b"subdir")); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir")), | ||||
VisitChildrenSet::Set(set) | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir")), | ||||
VisitChildrenSet::Recursive | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/foo")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"folder")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")), | ||||
VisitChildrenSet::Recursive | ||||
); | ||||
// OPT: this should probably be 'all' not 'this'. | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir/z")), | ||||
VisitChildrenSet::This | ||||
); | ||||
} | ||||
Raphaël Gomès
|
r50245 | |||
#[test] | ||||
fn test_intersectionmatcher() { | ||||
// Include path + Include rootfiles | ||||
let m1 = Box::new( | ||||
IncludeMatcher::new(vec![IgnorePattern::new( | ||||
PatternSyntax::RelPath, | ||||
b"dir/subdir", | ||||
Path::new(""), | ||||
)]) | ||||
.unwrap(), | ||||
); | ||||
let m2 = Box::new( | ||||
IncludeMatcher::new(vec![IgnorePattern::new( | ||||
PatternSyntax::RootFiles, | ||||
b"dir", | ||||
Path::new(""), | ||||
)]) | ||||
.unwrap(), | ||||
); | ||||
let matcher = IntersectionMatcher::new(m1, m2); | ||||
let mut set = HashSet::new(); | ||||
set.insert(HgPathBuf::from_bytes(b"dir")); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"")), | ||||
VisitChildrenSet::Set(set) | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir")), | ||||
VisitChildrenSet::This | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/foo")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"folder")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir/z")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
// Non intersecting paths | ||||
let m1 = Box::new( | ||||
IncludeMatcher::new(vec![IgnorePattern::new( | ||||
PatternSyntax::RelPath, | ||||
b"dir/subdir", | ||||
Path::new(""), | ||||
)]) | ||||
.unwrap(), | ||||
); | ||||
let m2 = Box::new( | ||||
IncludeMatcher::new(vec![IgnorePattern::new( | ||||
PatternSyntax::RelPath, | ||||
b"folder", | ||||
Path::new(""), | ||||
)]) | ||||
.unwrap(), | ||||
); | ||||
let matcher = IntersectionMatcher::new(m1, m2); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/foo")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"folder")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir/z")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
// Nested paths | ||||
let m1 = Box::new( | ||||
IncludeMatcher::new(vec![IgnorePattern::new( | ||||
PatternSyntax::RelPath, | ||||
b"dir/subdir/x", | ||||
Path::new(""), | ||||
)]) | ||||
.unwrap(), | ||||
); | ||||
let m2 = Box::new( | ||||
IncludeMatcher::new(vec![IgnorePattern::new( | ||||
PatternSyntax::RelPath, | ||||
b"dir/subdir", | ||||
Path::new(""), | ||||
)]) | ||||
.unwrap(), | ||||
); | ||||
let matcher = IntersectionMatcher::new(m1, m2); | ||||
let mut set = HashSet::new(); | ||||
set.insert(HgPathBuf::from_bytes(b"dir")); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"")), | ||||
VisitChildrenSet::Set(set) | ||||
); | ||||
let mut set = HashSet::new(); | ||||
set.insert(HgPathBuf::from_bytes(b"subdir")); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir")), | ||||
VisitChildrenSet::Set(set) | ||||
); | ||||
let mut set = HashSet::new(); | ||||
set.insert(HgPathBuf::from_bytes(b"x")); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir")), | ||||
VisitChildrenSet::Set(set) | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/foo")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"folder")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir/z")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
// OPT: this should probably be 'all' not 'this'. | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")), | ||||
VisitChildrenSet::This | ||||
); | ||||
// Diverging paths | ||||
let m1 = Box::new( | ||||
IncludeMatcher::new(vec![IgnorePattern::new( | ||||
PatternSyntax::RelPath, | ||||
b"dir/subdir/x", | ||||
Path::new(""), | ||||
)]) | ||||
.unwrap(), | ||||
); | ||||
let m2 = Box::new( | ||||
IncludeMatcher::new(vec![IgnorePattern::new( | ||||
PatternSyntax::RelPath, | ||||
b"dir/subdir/z", | ||||
Path::new(""), | ||||
)]) | ||||
.unwrap(), | ||||
); | ||||
let matcher = IntersectionMatcher::new(m1, m2); | ||||
// OPT: these next two could probably be Empty as well. | ||||
let mut set = HashSet::new(); | ||||
set.insert(HgPathBuf::from_bytes(b"dir")); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"")), | ||||
VisitChildrenSet::Set(set) | ||||
); | ||||
// OPT: these next two could probably be Empty as well. | ||||
let mut set = HashSet::new(); | ||||
set.insert(HgPathBuf::from_bytes(b"subdir")); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir")), | ||||
VisitChildrenSet::Set(set) | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/foo")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"folder")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir/z")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
} | ||||
Raphaël Gomès
|
r50373 | |||
#[test] | ||||
fn test_differencematcher() { | ||||
// Two alwaysmatchers should function like a nevermatcher | ||||
let m1 = AlwaysMatcher; | ||||
let m2 = AlwaysMatcher; | ||||
let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2)); | ||||
for case in &[ | ||||
&b""[..], | ||||
b"dir", | ||||
b"dir/subdir", | ||||
b"dir/subdir/z", | ||||
b"dir/foo", | ||||
b"dir/subdir/x", | ||||
b"folder", | ||||
] { | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(case)), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
} | ||||
// One always and one never should behave the same as an always | ||||
let m1 = AlwaysMatcher; | ||||
let m2 = NeverMatcher; | ||||
let matcher = DifferenceMatcher::new(Box::new(m1), Box::new(m2)); | ||||
for case in &[ | ||||
&b""[..], | ||||
b"dir", | ||||
b"dir/subdir", | ||||
b"dir/subdir/z", | ||||
b"dir/foo", | ||||
b"dir/subdir/x", | ||||
b"folder", | ||||
] { | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(case)), | ||||
VisitChildrenSet::Recursive | ||||
); | ||||
} | ||||
// Two include matchers | ||||
let m1 = Box::new( | ||||
IncludeMatcher::new(vec![IgnorePattern::new( | ||||
PatternSyntax::RelPath, | ||||
b"dir/subdir", | ||||
Path::new("/repo"), | ||||
)]) | ||||
.unwrap(), | ||||
); | ||||
let m2 = Box::new( | ||||
IncludeMatcher::new(vec![IgnorePattern::new( | ||||
PatternSyntax::RootFiles, | ||||
b"dir", | ||||
Path::new("/repo"), | ||||
)]) | ||||
.unwrap(), | ||||
); | ||||
let matcher = DifferenceMatcher::new(m1, m2); | ||||
let mut set = HashSet::new(); | ||||
set.insert(HgPathBuf::from_bytes(b"dir")); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"")), | ||||
VisitChildrenSet::Set(set) | ||||
); | ||||
let mut set = HashSet::new(); | ||||
set.insert(HgPathBuf::from_bytes(b"subdir")); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir")), | ||||
VisitChildrenSet::Set(set) | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir")), | ||||
VisitChildrenSet::Recursive | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/foo")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"folder")), | ||||
VisitChildrenSet::Empty | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir/z")), | ||||
VisitChildrenSet::This | ||||
); | ||||
assert_eq!( | ||||
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")), | ||||
VisitChildrenSet::This | ||||
); | ||||
} | ||||
Raphaël Gomès
|
r44828 | } | ||