filepatterns.rs
913 lines
| 27.8 KiB
| application/rls-services+xml
|
RustLexer
Raphaël Gomès
|
r42996 | // filepatterns.rs | ||
// | ||||
// Copyright 2019 Raphaël Gomès <rgomes@octobus.net> | ||||
// | ||||
// This software may be used and distributed according to the terms of the | ||||
// GNU General Public License version 2 or any later version. | ||||
//! Handling of Mercurial-specific patterns. | ||||
Raphaël Gomès
|
r44785 | use crate::{ | ||
utils::{ | ||||
files::{canonical_path, get_bytes_from_path, get_path_from_bytes}, | ||||
hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError}, | ||||
SliceExt, | ||||
}, | ||||
Raphaël Gomès
|
r53199 | FastHashMap, | ||
Raphaël Gomès
|
r44785 | }; | ||
Raphaël Gomès
|
r42828 | use lazy_static::lazy_static; | ||
Yuya Nishihara
|
r42859 | use regex::bytes::{NoExpand, Regex}; | ||
Raphaël Gomès
|
r43227 | use std::path::{Path, PathBuf}; | ||
Raphaël Gomès
|
r42514 | use std::vec::Vec; | ||
Raphaël Gomès
|
r53199 | use std::{fmt, ops::Deref}; | ||
#[derive(Debug, derive_more::From)] | ||||
pub enum PatternError { | ||||
#[from] | ||||
Path(HgPathError), | ||||
UnsupportedSyntax(String), | ||||
UnsupportedSyntaxInFile(String, String, usize), | ||||
TooLong(usize), | ||||
#[from] | ||||
IO(std::io::Error), | ||||
/// Needed a pattern that can be turned into a regex but got one that | ||||
/// can't. This should only happen through programmer error. | ||||
NonRegexPattern(IgnorePattern), | ||||
} | ||||
impl fmt::Display for PatternError { | ||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||
match self { | ||||
PatternError::UnsupportedSyntax(syntax) => { | ||||
write!(f, "Unsupported syntax {}", syntax) | ||||
} | ||||
PatternError::UnsupportedSyntaxInFile(syntax, file_path, line) => { | ||||
write!( | ||||
f, | ||||
"{}:{}: unsupported syntax {}", | ||||
file_path, line, syntax | ||||
) | ||||
} | ||||
PatternError::TooLong(size) => { | ||||
write!(f, "matcher pattern is too long ({} bytes)", size) | ||||
} | ||||
PatternError::IO(error) => error.fmt(f), | ||||
PatternError::Path(error) => error.fmt(f), | ||||
PatternError::NonRegexPattern(pattern) => { | ||||
write!(f, "'{:?}' cannot be turned into a regex", pattern) | ||||
} | ||||
} | ||||
} | ||||
} | ||||
Raphaël Gomès
|
r42514 | |||
lazy_static! { | ||||
Yuya Nishihara
|
r42683 | static ref RE_ESCAPE: Vec<Vec<u8>> = { | ||
Raphaël Gomès
|
r42514 | let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect(); | ||
Spencer Baugh
|
r51752 | let to_escape = b"()[]{}?*+-|^$\\.&~#\t\n\r\x0b\x0c"; | ||
Raphaël Gomès
|
r42514 | for byte in to_escape { | ||
v[*byte as usize].insert(0, b'\\'); | ||||
} | ||||
v | ||||
}; | ||||
} | ||||
/// These are matched in order | ||||
const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] = | ||||
&[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")]; | ||||
Simon Sapin
|
r48170 | #[derive(Debug, Clone, PartialEq, Eq)] | ||
Raphaël Gomès
|
r42514 | pub enum PatternSyntax { | ||
Raphaël Gomès
|
r44784 | /// A regular expression | ||
Raphaël Gomès
|
r42514 | Regexp, | ||
/// Glob that matches at the front of the path | ||||
RootGlob, | ||||
Yuya Nishihara
|
r43109 | /// Glob that matches at any suffix of the path (still anchored at | ||
/// slashes) | ||||
Raphaël Gomès
|
r42514 | Glob, | ||
Raphaël Gomès
|
r44784 | /// a path relative to repository root, which is matched recursively | ||
Raphaël Gomès
|
r42514 | Path, | ||
Raphaël Gomès
|
r51588 | /// a single exact path relative to repository root | ||
FilePath, | ||||
Raphaël Gomès
|
r44784 | /// A path relative to cwd | ||
Raphaël Gomès
|
r42514 | RelPath, | ||
Raphaël Gomès
|
r44784 | /// an unrooted glob (*.rs matches Rust files in all dirs) | ||
Raphaël Gomès
|
r42514 | RelGlob, | ||
Raphaël Gomès
|
r44784 | /// A regexp that needn't match the start of a name | ||
Raphaël Gomès
|
r42514 | RelRegexp, | ||
Raphaël Gomès
|
r44784 | /// A path relative to repository root, which is matched non-recursively | ||
/// (will not match subdirectories) | ||||
Arseniy Alekseyev
|
r52461 | RootFilesIn, | ||
Raphaël Gomès
|
r44785 | /// A file of patterns to read and include | ||
Include, | ||||
/// A file of patterns to match against files under the same directory | ||||
SubInclude, | ||||
Simon Sapin
|
r48170 | /// SubInclude with the result of parsing the included file | ||
/// | ||||
/// Note: there is no ExpandedInclude because that expansion can be done | ||||
/// in place by replacing the Include pattern by the included patterns. | ||||
/// SubInclude requires more handling. | ||||
/// | ||||
/// Note: `Box` is used to minimize size impact on other enum variants | ||||
ExpandedSubInclude(Box<SubInclude>), | ||||
Raphaël Gomès
|
r42514 | } | ||
/// Transforms a glob pattern into a regex | ||||
Georges Racinet
|
r52363 | pub fn glob_to_re(pat: &[u8]) -> Vec<u8> { | ||
Raphaël Gomès
|
r42514 | let mut input = pat; | ||
let mut res: Vec<u8> = vec![]; | ||||
let mut group_depth = 0; | ||||
while let Some((c, rest)) = input.split_first() { | ||||
input = rest; | ||||
match c { | ||||
b'*' => { | ||||
for (source, repl) in GLOB_REPLACEMENTS { | ||||
Valentin Gatien-Baron
|
r43129 | if let Some(rest) = input.drop_prefix(source) { | ||
input = rest; | ||||
Raphaël Gomès
|
r42514 | res.extend(*repl); | ||
break; | ||||
} | ||||
} | ||||
} | ||||
b'?' => res.extend(b"."), | ||||
b'[' => { | ||||
match input.iter().skip(1).position(|b| *b == b']') { | ||||
None => res.extend(b"\\["), | ||||
Some(end) => { | ||||
// Account for the one we skipped | ||||
let end = end + 1; | ||||
res.extend(b"["); | ||||
for (i, b) in input[..end].iter().enumerate() { | ||||
if *b == b'!' && i == 0 { | ||||
res.extend(b"^") | ||||
} else if *b == b'^' && i == 0 { | ||||
res.extend(b"\\^") | ||||
} else if *b == b'\\' { | ||||
res.extend(b"\\\\") | ||||
} else { | ||||
res.push(*b) | ||||
} | ||||
} | ||||
res.extend(b"]"); | ||||
input = &input[end + 1..]; | ||||
} | ||||
} | ||||
} | ||||
b'{' => { | ||||
group_depth += 1; | ||||
res.extend(b"(?:") | ||||
} | ||||
b'}' if group_depth > 0 => { | ||||
group_depth -= 1; | ||||
res.extend(b")"); | ||||
} | ||||
b',' if group_depth > 0 => res.extend(b"|"), | ||||
b'\\' => { | ||||
let c = { | ||||
if let Some((c, rest)) = input.split_first() { | ||||
input = rest; | ||||
c | ||||
} else { | ||||
c | ||||
} | ||||
}; | ||||
Yuya Nishihara
|
r42683 | res.extend(&RE_ESCAPE[*c as usize]) | ||
Raphaël Gomès
|
r42514 | } | ||
Yuya Nishihara
|
r42683 | _ => res.extend(&RE_ESCAPE[*c as usize]), | ||
Raphaël Gomès
|
r42514 | } | ||
} | ||||
res | ||||
} | ||||
fn escape_pattern(pattern: &[u8]) -> Vec<u8> { | ||||
pattern | ||||
.iter() | ||||
Yuya Nishihara
|
r42683 | .flat_map(|c| RE_ESCAPE[*c as usize].clone()) | ||
Raphaël Gomès
|
r42514 | .collect() | ||
} | ||||
Arseniy Alekseyev
|
r52498 | pub fn parse_pattern_syntax_kind( | ||
Raphaël Gomès
|
r44784 | kind: &[u8], | ||
) -> Result<PatternSyntax, PatternError> { | ||||
Raphaël Gomès
|
r42514 | match kind { | ||
Arseniy Alekseyev
|
r52498 | b"re" => Ok(PatternSyntax::Regexp), | ||
b"path" => Ok(PatternSyntax::Path), | ||||
b"filepath" => Ok(PatternSyntax::FilePath), | ||||
b"relpath" => Ok(PatternSyntax::RelPath), | ||||
b"rootfilesin" => Ok(PatternSyntax::RootFilesIn), | ||||
b"relglob" => Ok(PatternSyntax::RelGlob), | ||||
b"relre" => Ok(PatternSyntax::RelRegexp), | ||||
b"glob" => Ok(PatternSyntax::Glob), | ||||
b"rootglob" => Ok(PatternSyntax::RootGlob), | ||||
b"include" => Ok(PatternSyntax::Include), | ||||
b"subinclude" => Ok(PatternSyntax::SubInclude), | ||||
Raphaël Gomès
|
r42514 | _ => Err(PatternError::UnsupportedSyntax( | ||
String::from_utf8_lossy(kind).to_string(), | ||||
)), | ||||
} | ||||
} | ||||
r50499 | lazy_static! { | |||
static ref FLAG_RE: Regex = Regex::new(r"^\(\?[aiLmsux]+\)").unwrap(); | ||||
} | ||||
Raphaël Gomès
|
r42514 | /// Builds the regex that corresponds to the given pattern. | ||
/// If within a `syntax: regexp` context, returns the pattern, | ||||
/// otherwise, returns the corresponding regex. | ||||
Spencer Baugh
|
r51754 | fn _build_single_regex(entry: &IgnorePattern, glob_suffix: &[u8]) -> Vec<u8> { | ||
Raphaël Gomès
|
r44784 | let IgnorePattern { | ||
syntax, pattern, .. | ||||
} = entry; | ||||
Raphaël Gomès
|
r42514 | if pattern.is_empty() { | ||
return vec![]; | ||||
} | ||||
match syntax { | ||||
Raphaël Gomès
|
r45347 | PatternSyntax::Regexp => pattern.to_owned(), | ||
Raphaël Gomès
|
r42514 | PatternSyntax::RelRegexp => { | ||
Raphaël Gomès
|
r45084 | // The `regex` crate accepts `**` while `re2` and Python's `re` | ||
// do not. Checking for `*` correctly triggers the same error all | ||||
// engines. | ||||
Raphaël Gomès
|
r45348 | if pattern[0] == b'^' | ||
|| pattern[0] == b'*' | ||||
|| pattern.starts_with(b".*") | ||||
{ | ||||
Raphaël Gomès
|
r42514 | return pattern.to_owned(); | ||
} | ||||
r50499 | match FLAG_RE.find(pattern) { | |||
Some(mat) => { | ||||
let s = mat.start(); | ||||
let e = mat.end(); | ||||
[ | ||||
&b"(?"[..], | ||||
&pattern[s + 2..e - 1], | ||||
&b":"[..], | ||||
r50500 | if pattern[e] == b'^' | |||
|| pattern[e] == b'*' | ||||
|| pattern[e..].starts_with(b".*") | ||||
{ | ||||
&b""[..] | ||||
} else { | ||||
&b".*"[..] | ||||
}, | ||||
r50499 | &pattern[e..], | |||
&b")"[..], | ||||
] | ||||
.concat() | ||||
} | ||||
None => [&b".*"[..], pattern].concat(), | ||||
} | ||||
Raphaël Gomès
|
r42514 | } | ||
PatternSyntax::Path | PatternSyntax::RelPath => { | ||||
if pattern == b"." { | ||||
return vec![]; | ||||
} | ||||
Valentin Gatien-Baron
|
r43133 | [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat() | ||
Raphaël Gomès
|
r42514 | } | ||
Arseniy Alekseyev
|
r52461 | PatternSyntax::RootFilesIn => { | ||
Raphaël Gomès
|
r42514 | let mut res = if pattern == b"." { | ||
Raphaël Gomès
|
r45347 | vec![] | ||
Raphaël Gomès
|
r42514 | } else { | ||
// Pattern is a directory name. | ||||
Raphaël Gomès
|
r45347 | [escape_pattern(pattern).as_slice(), b"/"].concat() | ||
Raphaël Gomès
|
r42514 | }; | ||
// Anything after the pattern must be a non-directory. | ||||
res.extend(b"[^/]+$"); | ||||
res | ||||
} | ||||
Valentin Gatien-Baron
|
r43132 | PatternSyntax::RelGlob => { | ||
let glob_re = glob_to_re(pattern); | ||||
if let Some(rest) = glob_re.drop_prefix(b"[^/]*") { | ||||
Spencer Baugh
|
r51754 | [b".*", rest, glob_suffix].concat() | ||
Valentin Gatien-Baron
|
r43132 | } else { | ||
Spencer Baugh
|
r51754 | [b"(?:.*/)?", glob_re.as_slice(), glob_suffix].concat() | ||
Raphaël Gomès
|
r42514 | } | ||
Valentin Gatien-Baron
|
r43132 | } | ||
PatternSyntax::Glob | PatternSyntax::RootGlob => { | ||||
Spencer Baugh
|
r51754 | [glob_to_re(pattern).as_slice(), glob_suffix].concat() | ||
Raphaël Gomès
|
r42514 | } | ||
Simon Sapin
|
r48170 | PatternSyntax::Include | ||
| PatternSyntax::SubInclude | ||||
Raphaël Gomès
|
r51588 | | PatternSyntax::ExpandedSubInclude(_) | ||
| PatternSyntax::FilePath => unreachable!(), | ||||
Raphaël Gomès
|
r42514 | } | ||
} | ||||
const GLOB_SPECIAL_CHARACTERS: [u8; 7] = | ||||
[b'*', b'?', b'[', b']', b'{', b'}', b'\\']; | ||||
Raphaël Gomès
|
r44784 | /// TODO support other platforms | ||
#[cfg(unix)] | ||||
pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> { | ||||
if bytes.is_empty() { | ||||
return b".".to_vec(); | ||||
} | ||||
let sep = b'/'; | ||||
let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count(); | ||||
if initial_slashes > 2 { | ||||
// POSIX allows one or two initial slashes, but treats three or more | ||||
// as single slash. | ||||
initial_slashes = 1; | ||||
} | ||||
let components = bytes | ||||
.split(|b| *b == sep) | ||||
.filter(|c| !(c.is_empty() || c == b".")) | ||||
.fold(vec![], |mut acc, component| { | ||||
if component != b".." | ||||
|| (initial_slashes == 0 && acc.is_empty()) | ||||
|| (!acc.is_empty() && acc[acc.len() - 1] == b"..") | ||||
{ | ||||
acc.push(component) | ||||
} else if !acc.is_empty() { | ||||
acc.pop(); | ||||
} | ||||
acc | ||||
}); | ||||
let mut new_bytes = components.join(&sep); | ||||
if initial_slashes > 0 { | ||||
let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect(); | ||||
buf.extend(new_bytes); | ||||
new_bytes = buf; | ||||
} | ||||
if new_bytes.is_empty() { | ||||
b".".to_vec() | ||||
} else { | ||||
new_bytes | ||||
} | ||||
} | ||||
Raphaël Gomès
|
r42514 | /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs | ||
/// that don't need to be transformed into a regex. | ||||
pub fn build_single_regex( | ||||
Raphaël Gomès
|
r44784 | entry: &IgnorePattern, | ||
Spencer Baugh
|
r51754 | glob_suffix: &[u8], | ||
Raphaël Gomès
|
r45311 | ) -> Result<Option<Vec<u8>>, PatternError> { | ||
Raphaël Gomès
|
r44784 | let IgnorePattern { | ||
pattern, syntax, .. | ||||
} = entry; | ||||
let pattern = match syntax { | ||||
PatternSyntax::RootGlob | ||||
| PatternSyntax::Path | ||||
| PatternSyntax::RelGlob | ||||
Spencer Baugh
|
r51751 | | PatternSyntax::RelPath | ||
Arseniy Alekseyev
|
r52461 | | PatternSyntax::RootFilesIn => normalize_path_bytes(pattern), | ||
Raphaël Gomès
|
r44785 | PatternSyntax::Include | PatternSyntax::SubInclude => { | ||
return Err(PatternError::NonRegexPattern(entry.clone())) | ||||
} | ||||
Raphaël Gomès
|
r44784 | _ => pattern.to_owned(), | ||
}; | ||||
Raphaël Gomès
|
r51588 | let is_simple_rootglob = *syntax == PatternSyntax::RootGlob | ||
&& !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b)); | ||||
if is_simple_rootglob || syntax == &PatternSyntax::FilePath { | ||||
Raphaël Gomès
|
r45311 | Ok(None) | ||
Raphaël Gomès
|
r42514 | } else { | ||
Raphaël Gomès
|
r44784 | let mut entry = entry.clone(); | ||
entry.pattern = pattern; | ||||
Spencer Baugh
|
r51754 | Ok(Some(_build_single_regex(&entry, glob_suffix))) | ||
Raphaël Gomès
|
r42514 | } | ||
} | ||||
lazy_static! { | ||||
Spencer Baugh
|
r51750 | static ref SYNTAXES: FastHashMap<&'static [u8], PatternSyntax> = { | ||
Raphaël Gomès
|
r44278 | let mut m = FastHashMap::default(); | ||
Raphaël Gomès
|
r42514 | |||
Spencer Baugh
|
r51750 | m.insert(b"re:".as_ref(), PatternSyntax::Regexp); | ||
m.insert(b"regexp:".as_ref(), PatternSyntax::Regexp); | ||||
m.insert(b"path:".as_ref(), PatternSyntax::Path); | ||||
m.insert(b"filepath:".as_ref(), PatternSyntax::FilePath); | ||||
m.insert(b"relpath:".as_ref(), PatternSyntax::RelPath); | ||||
Arseniy Alekseyev
|
r52461 | m.insert(b"rootfilesin:".as_ref(), PatternSyntax::RootFilesIn); | ||
Spencer Baugh
|
r51750 | m.insert(b"relglob:".as_ref(), PatternSyntax::RelGlob); | ||
m.insert(b"relre:".as_ref(), PatternSyntax::RelRegexp); | ||||
m.insert(b"glob:".as_ref(), PatternSyntax::Glob); | ||||
m.insert(b"rootglob:".as_ref(), PatternSyntax::RootGlob); | ||||
m.insert(b"include:".as_ref(), PatternSyntax::Include); | ||||
m.insert(b"subinclude:".as_ref(), PatternSyntax::SubInclude); | ||||
Raphaël Gomès
|
r42514 | m | ||
}; | ||||
} | ||||
Raphaël Gomès
|
r44784 | #[derive(Debug)] | ||
pub enum PatternFileWarning { | ||||
/// (file path, syntax bytes) | ||||
InvalidSyntax(PathBuf, Vec<u8>), | ||||
/// File path | ||||
NoSuchFile(PathBuf), | ||||
} | ||||
Raphaël Gomès
|
r42514 | |||
Spencer Baugh
|
r51750 | pub fn parse_one_pattern( | ||
pattern: &[u8], | ||||
source: &Path, | ||||
default: PatternSyntax, | ||||
Spencer Baugh
|
r51759 | normalize: bool, | ||
Spencer Baugh
|
r51750 | ) -> IgnorePattern { | ||
let mut pattern_bytes: &[u8] = pattern; | ||||
let mut syntax = default; | ||||
for (s, val) in SYNTAXES.iter() { | ||||
if let Some(rest) = pattern_bytes.drop_prefix(s) { | ||||
syntax = val.clone(); | ||||
pattern_bytes = rest; | ||||
break; | ||||
} | ||||
} | ||||
Spencer Baugh
|
r51759 | let pattern = match syntax { | ||
PatternSyntax::RootGlob | ||||
| PatternSyntax::Path | ||||
| PatternSyntax::Glob | ||||
| PatternSyntax::RelGlob | ||||
| PatternSyntax::RelPath | ||||
Arseniy Alekseyev
|
r52461 | | PatternSyntax::RootFilesIn | ||
Spencer Baugh
|
r51759 | if normalize => | ||
{ | ||||
normalize_path_bytes(pattern_bytes) | ||||
} | ||||
_ => pattern_bytes.to_vec(), | ||||
}; | ||||
Spencer Baugh
|
r51750 | |||
IgnorePattern { | ||||
syntax, | ||||
pattern, | ||||
source: source.to_owned(), | ||||
} | ||||
} | ||||
Simon Sapin
|
r48169 | pub fn parse_pattern_file_contents( | ||
Raphaël Gomès
|
r42630 | lines: &[u8], | ||
Simon Sapin
|
r48169 | file_path: &Path, | ||
Spencer Baugh
|
r51750 | default_syntax_override: Option<PatternSyntax>, | ||
Raphaël Gomès
|
r42514 | warn: bool, | ||
Spencer Baugh
|
r51750 | relativize: bool, | ||
Raphaël Gomès
|
r44784 | ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> { | ||
Raphaël Gomès
|
r42514 | let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap(); | ||
Raphaël Gomès
|
r45500 | |||
#[allow(clippy::trivial_regex)] | ||||
Yuya Nishihara
|
r42859 | let comment_escape_regex = Regex::new(r"\\#").unwrap(); | ||
Raphaël Gomès
|
r44784 | let mut inputs: Vec<IgnorePattern> = vec![]; | ||
let mut warnings: Vec<PatternFileWarning> = vec![]; | ||||
Raphaël Gomès
|
r42514 | |||
Raphaël Gomès
|
r50377 | let mut current_syntax = | ||
Spencer Baugh
|
r51750 | default_syntax_override.unwrap_or(PatternSyntax::RelRegexp); | ||
Raphaël Gomès
|
r42514 | |||
Spencer Baugh
|
r51750 | for mut line in lines.split(|c| *c == b'\n') { | ||
Yuya Nishihara
|
r42859 | let line_buf; | ||
Yuya Nishihara
|
r42858 | if line.contains(&b'#') { | ||
Raphaël Gomès
|
r42630 | if let Some(cap) = comment_regex.captures(line) { | ||
line = &line[..cap.get(1).unwrap().end()] | ||||
Raphaël Gomès
|
r42514 | } | ||
Yuya Nishihara
|
r42859 | line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#")); | ||
line = &line_buf; | ||||
Raphaël Gomès
|
r42514 | } | ||
Spencer Baugh
|
r51750 | let line = line.trim_end(); | ||
Raphaël Gomès
|
r42514 | |||
if line.is_empty() { | ||||
continue; | ||||
} | ||||
Valentin Gatien-Baron
|
r43129 | if let Some(syntax) = line.drop_prefix(b"syntax:") { | ||
let syntax = syntax.trim(); | ||||
Raphaël Gomès
|
r42514 | |||
Spencer Baugh
|
r51750 | if let Some(parsed) = | ||
SYNTAXES.get([syntax, &b":"[..]].concat().as_slice()) | ||||
{ | ||||
current_syntax = parsed.clone(); | ||||
Raphaël Gomès
|
r42514 | } else if warn { | ||
Raphaël Gomès
|
r44784 | warnings.push(PatternFileWarning::InvalidSyntax( | ||
Simon Sapin
|
r48169 | file_path.to_owned(), | ||
Raphaël Gomès
|
r44784 | syntax.to_owned(), | ||
)); | ||||
Raphaël Gomès
|
r42514 | } | ||
Spencer Baugh
|
r51750 | } else { | ||
let pattern = parse_one_pattern( | ||||
line, | ||||
file_path, | ||||
current_syntax.clone(), | ||||
Spencer Baugh
|
r51759 | false, | ||
Spencer Baugh
|
r51750 | ); | ||
inputs.push(if relativize { | ||||
pattern.to_relative() | ||||
} else { | ||||
pattern | ||||
}) | ||||
Raphaël Gomès
|
r42514 | } | ||
} | ||||
Raphaël Gomès
|
r44784 | Ok((inputs, warnings)) | ||
Raphaël Gomès
|
r42514 | } | ||
Spencer Baugh
|
r51759 | pub fn parse_pattern_args( | ||
patterns: Vec<Vec<u8>>, | ||||
cwd: &Path, | ||||
root: &Path, | ||||
) -> Result<Vec<IgnorePattern>, HgPathError> { | ||||
let mut ignore_patterns: Vec<IgnorePattern> = Vec::new(); | ||||
for pattern in patterns { | ||||
let pattern = parse_one_pattern( | ||||
&pattern, | ||||
Path::new("<args>"), | ||||
PatternSyntax::RelPath, | ||||
true, | ||||
); | ||||
match pattern.syntax { | ||||
PatternSyntax::RelGlob | PatternSyntax::RelPath => { | ||||
let name = get_path_from_bytes(&pattern.pattern); | ||||
let canon = canonical_path(root, cwd, name)?; | ||||
ignore_patterns.push(IgnorePattern { | ||||
syntax: pattern.syntax, | ||||
pattern: get_bytes_from_path(canon), | ||||
source: pattern.source, | ||||
}) | ||||
} | ||||
_ => ignore_patterns.push(pattern.to_owned()), | ||||
}; | ||||
} | ||||
Ok(ignore_patterns) | ||||
} | ||||
Simon Sapin
|
r48169 | pub fn read_pattern_file( | ||
file_path: &Path, | ||||
Raphaël Gomès
|
r42514 | warn: bool, | ||
Raphaël Gomès
|
r50453 | inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]), | ||
Raphaël Gomès
|
r44784 | ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> { | ||
Simon Sapin
|
r48202 | match std::fs::read(file_path) { | ||
Ok(contents) => { | ||||
Raphaël Gomès
|
r50453 | inspect_pattern_bytes(file_path, &contents); | ||
Spencer Baugh
|
r51750 | parse_pattern_file_contents(&contents, file_path, None, warn, true) | ||
Simon Sapin
|
r48202 | } | ||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(( | ||||
vec![], | ||||
vec![PatternFileWarning::NoSuchFile(file_path.to_owned())], | ||||
)), | ||||
Err(e) => Err(e.into()), | ||||
} | ||||
Raphaël Gomès
|
r44784 | } | ||
/// Represents an entry in an "ignore" file. | ||||
#[derive(Debug, Eq, PartialEq, Clone)] | ||||
pub struct IgnorePattern { | ||||
pub syntax: PatternSyntax, | ||||
pub pattern: Vec<u8>, | ||||
pub source: PathBuf, | ||||
Raphaël Gomès
|
r42514 | } | ||
Raphaël Gomès
|
r44784 | impl IgnorePattern { | ||
Simon Sapin
|
r48169 | pub fn new(syntax: PatternSyntax, pattern: &[u8], source: &Path) -> Self { | ||
Raphaël Gomès
|
r44784 | Self { | ||
syntax, | ||||
pattern: pattern.to_owned(), | ||||
Simon Sapin
|
r48169 | source: source.to_owned(), | ||
Raphaël Gomès
|
r44784 | } | ||
} | ||||
Spencer Baugh
|
r51750 | |||
pub fn to_relative(self) -> Self { | ||||
let Self { | ||||
syntax, | ||||
pattern, | ||||
source, | ||||
} = self; | ||||
Self { | ||||
syntax: match syntax { | ||||
PatternSyntax::Regexp => PatternSyntax::RelRegexp, | ||||
PatternSyntax::Glob => PatternSyntax::RelGlob, | ||||
x => x, | ||||
}, | ||||
pattern, | ||||
source, | ||||
} | ||||
} | ||||
Raphaël Gomès
|
r44784 | } | ||
pub type PatternResult<T> = Result<T, PatternError>; | ||||
Raphaël Gomès
|
r44785 | /// Wrapper for `read_pattern_file` that also recursively expands `include:` | ||
Simon Sapin
|
r48170 | /// and `subinclude:` patterns. | ||
Raphaël Gomès
|
r44785 | /// | ||
Simon Sapin
|
r48170 | /// The former are expanded in place, while `PatternSyntax::ExpandedSubInclude` | ||
/// is used for the latter to form a tree of patterns. | ||||
Raphaël Gomès
|
r44785 | pub fn get_patterns_from_file( | ||
Simon Sapin
|
r48169 | pattern_file: &Path, | ||
root_dir: &Path, | ||||
Raphaël Gomès
|
r50453 | inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]), | ||
Raphaël Gomès
|
r44785 | ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> { | ||
Simon Sapin
|
r48202 | let (patterns, mut warnings) = | ||
read_pattern_file(pattern_file, true, inspect_pattern_bytes)?; | ||||
Raphaël Gomès
|
r44785 | let patterns = patterns | ||
.into_iter() | ||||
.flat_map(|entry| -> PatternResult<_> { | ||||
Simon Sapin
|
r48170 | Ok(match &entry.syntax { | ||
Raphaël Gomès
|
r44785 | PatternSyntax::Include => { | ||
let inner_include = | ||||
Simon Sapin
|
r48170 | root_dir.join(get_path_from_bytes(&entry.pattern)); | ||
Simon Sapin
|
r48202 | let (inner_pats, inner_warnings) = get_patterns_from_file( | ||
&inner_include, | ||||
root_dir, | ||||
inspect_pattern_bytes, | ||||
)?; | ||||
Raphaël Gomès
|
r44785 | warnings.extend(inner_warnings); | ||
inner_pats | ||||
} | ||||
Simon Sapin
|
r48170 | PatternSyntax::SubInclude => { | ||
let mut sub_include = SubInclude::new( | ||||
Raphaël Gomès
|
r50825 | root_dir, | ||
Simon Sapin
|
r48170 | &entry.pattern, | ||
&entry.source, | ||||
)?; | ||||
let (inner_patterns, inner_warnings) = | ||||
get_patterns_from_file( | ||||
&sub_include.path, | ||||
&sub_include.root, | ||||
Simon Sapin
|
r48202 | inspect_pattern_bytes, | ||
Simon Sapin
|
r48170 | )?; | ||
sub_include.included_patterns = inner_patterns; | ||||
warnings.extend(inner_warnings); | ||||
vec![IgnorePattern { | ||||
syntax: PatternSyntax::ExpandedSubInclude(Box::new( | ||||
sub_include, | ||||
)), | ||||
..entry | ||||
}] | ||||
} | ||||
Raphaël Gomès
|
r44785 | _ => vec![entry], | ||
}) | ||||
}) | ||||
.flatten() | ||||
.collect(); | ||||
Ok((patterns, warnings)) | ||||
} | ||||
/// Holds all the information needed to handle a `subinclude:` pattern. | ||||
Simon Sapin
|
r48170 | #[derive(Debug, PartialEq, Eq, Clone)] | ||
Raphaël Gomès
|
r44785 | pub struct SubInclude { | ||
/// Will be used for repository (hg) paths that start with this prefix. | ||||
/// It is relative to the current working directory, so comparing against | ||||
/// repository paths is painless. | ||||
pub prefix: HgPathBuf, | ||||
/// The file itself, containing the patterns | ||||
pub path: PathBuf, | ||||
/// Folder in the filesystem where this it applies | ||||
pub root: PathBuf, | ||||
Simon Sapin
|
r48170 | |||
pub included_patterns: Vec<IgnorePattern>, | ||||
Raphaël Gomès
|
r44785 | } | ||
impl SubInclude { | ||||
pub fn new( | ||||
Simon Sapin
|
r48169 | root_dir: &Path, | ||
Raphaël Gomès
|
r44785 | pattern: &[u8], | ||
Simon Sapin
|
r48169 | source: &Path, | ||
Raphaël Gomès
|
r44785 | ) -> Result<SubInclude, HgPathError> { | ||
let normalized_source = | ||||
normalize_path_bytes(&get_bytes_from_path(source)); | ||||
let source_root = get_path_from_bytes(&normalized_source); | ||||
Raphaël Gomès
|
r52013 | let source_root = source_root.parent().unwrap_or(source_root); | ||
Raphaël Gomès
|
r44785 | |||
let path = source_root.join(get_path_from_bytes(pattern)); | ||||
Raphaël Gomès
|
r45500 | let new_root = path.parent().unwrap_or_else(|| path.deref()); | ||
Raphaël Gomès
|
r44785 | |||
Simon Sapin
|
r48169 | let prefix = canonical_path(root_dir, root_dir, new_root)?; | ||
Raphaël Gomès
|
r44785 | |||
Ok(Self { | ||||
Raphaël Gomès
|
r50825 | prefix: path_to_hg_path_buf(prefix).map(|mut p| { | ||
Raphaël Gomès
|
r44785 | if !p.is_empty() { | ||
Arseniy Alekseyev
|
r49132 | p.push_byte(b'/'); | ||
Raphaël Gomès
|
r44785 | } | ||
Raphaël Gomès
|
r50825 | p | ||
Raphaël Gomès
|
r44785 | })?, | ||
path: path.to_owned(), | ||||
root: new_root.to_owned(), | ||||
Simon Sapin
|
r48170 | included_patterns: Vec::new(), | ||
Raphaël Gomès
|
r44785 | }) | ||
} | ||||
} | ||||
/// Separate and pre-process subincludes from other patterns for the "ignore" | ||||
/// phase. | ||||
Simon Sapin
|
r48170 | pub fn filter_subincludes( | ||
ignore_patterns: Vec<IgnorePattern>, | ||||
Raphaël Gomès
|
r50823 | ) -> Result<(Vec<SubInclude>, Vec<IgnorePattern>), HgPathError> { | ||
Raphaël Gomès
|
r44785 | let mut subincludes = vec![]; | ||
let mut others = vec![]; | ||||
Simon Sapin
|
r48170 | for pattern in ignore_patterns { | ||
if let PatternSyntax::ExpandedSubInclude(sub_include) = pattern.syntax | ||||
{ | ||||
Raphaël Gomès
|
r50823 | subincludes.push(*sub_include); | ||
Raphaël Gomès
|
r44785 | } else { | ||
Simon Sapin
|
r48170 | others.push(pattern) | ||
Raphaël Gomès
|
r44785 | } | ||
} | ||||
Ok((subincludes, others)) | ||||
} | ||||
Raphaël Gomès
|
r42514 | #[cfg(test)] | ||
mod tests { | ||||
use super::*; | ||||
Raphaël Gomès
|
r44784 | use pretty_assertions::assert_eq; | ||
Raphaël Gomès
|
r42514 | |||
#[test] | ||||
fn escape_pattern_test() { | ||||
Raphaël Gomès
|
r44784 | let untouched = | ||
br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#; | ||||
Raphaël Gomès
|
r42514 | assert_eq!(escape_pattern(untouched), untouched.to_vec()); | ||
// All escape codes | ||||
assert_eq!( | ||||
Raphaël Gomès
|
r52013 | escape_pattern(br"()[]{}?*+-|^$\\.&~#\t\n\r\v\f"), | ||
br"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\\t\\n\\r\\v\\f".to_vec() | ||||
Raphaël Gomès
|
r42514 | ); | ||
} | ||||
#[test] | ||||
fn glob_test() { | ||||
Raphaël Gomès
|
r52013 | assert_eq!(glob_to_re(br"?"), br"."); | ||
assert_eq!(glob_to_re(br"*"), br"[^/]*"); | ||||
assert_eq!(glob_to_re(br"**"), br".*"); | ||||
assert_eq!(glob_to_re(br"**/a"), br"(?:.*/)?a"); | ||||
assert_eq!(glob_to_re(br"a/**/b"), br"a/(?:.*/)?b"); | ||||
assert_eq!(glob_to_re(br"[a*?!^][^b][!c]"), br"[a*?!^][\^b][^c]"); | ||||
assert_eq!(glob_to_re(br"{a,b}"), br"(?:a|b)"); | ||||
assert_eq!(glob_to_re(br".\*\?"), br"\.\*\?"); | ||||
Raphaël Gomès
|
r42514 | } | ||
#[test] | ||||
fn test_parse_pattern_file_contents() { | ||||
Raphaël Gomès
|
r42630 | let lines = b"syntax: glob\n*.elc"; | ||
Raphaël Gomès
|
r42514 | |||
assert_eq!( | ||||
Raphaël Gomès
|
r50377 | parse_pattern_file_contents( | ||
lines, | ||||
Path::new("file_path"), | ||||
None, | ||||
Spencer Baugh
|
r51750 | false, | ||
true, | ||||
Raphaël Gomès
|
r50377 | ) | ||
.unwrap() | ||||
.0, | ||||
Raphaël Gomès
|
r44784 | vec![IgnorePattern::new( | ||
PatternSyntax::RelGlob, | ||||
b"*.elc", | ||||
Path::new("file_path") | ||||
)], | ||||
Raphaël Gomès
|
r42514 | ); | ||
Raphaël Gomès
|
r42630 | let lines = b"syntax: include\nsyntax: glob"; | ||
Raphaël Gomès
|
r42514 | |||
assert_eq!( | ||||
Raphaël Gomès
|
r50377 | parse_pattern_file_contents( | ||
lines, | ||||
Path::new("file_path"), | ||||
None, | ||||
Spencer Baugh
|
r51750 | false, | ||
true, | ||||
Raphaël Gomès
|
r50377 | ) | ||
.unwrap() | ||||
.0, | ||||
Raphaël Gomès
|
r42514 | vec![] | ||
); | ||||
Raphaël Gomès
|
r42630 | let lines = b"glob:**.o"; | ||
assert_eq!( | ||||
Raphaël Gomès
|
r50377 | parse_pattern_file_contents( | ||
lines, | ||||
Path::new("file_path"), | ||||
None, | ||||
Spencer Baugh
|
r51750 | false, | ||
true, | ||||
Raphaël Gomès
|
r50377 | ) | ||
.unwrap() | ||||
.0, | ||||
Raphaël Gomès
|
r44784 | vec![IgnorePattern::new( | ||
PatternSyntax::RelGlob, | ||||
b"**.o", | ||||
Path::new("file_path") | ||||
)] | ||||
); | ||||
} | ||||
#[test] | ||||
fn test_build_single_regex() { | ||||
assert_eq!( | ||||
Spencer Baugh
|
r51754 | build_single_regex( | ||
&IgnorePattern::new( | ||||
PatternSyntax::RelGlob, | ||||
b"rust/target/", | ||||
Path::new("") | ||||
), | ||||
b"(?:/|$)" | ||||
) | ||||
Raphaël Gomès
|
r44784 | .unwrap(), | ||
Raphaël Gomès
|
r45311 | Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()), | ||
Raphaël Gomès
|
r42630 | ); | ||
Raphaël Gomès
|
r45349 | assert_eq!( | ||
Spencer Baugh
|
r51754 | build_single_regex( | ||
&IgnorePattern::new( | ||||
PatternSyntax::Regexp, | ||||
br"rust/target/\d+", | ||||
Path::new("") | ||||
), | ||||
b"(?:/|$)" | ||||
) | ||||
Raphaël Gomès
|
r45349 | .unwrap(), | ||
Some(br"rust/target/\d+".to_vec()), | ||||
); | ||||
Raphaël Gomès
|
r42514 | } | ||
Raphaël Gomès
|
r42631 | |||
#[test] | ||||
fn test_build_single_regex_shortcut() { | ||||
assert_eq!( | ||||
Spencer Baugh
|
r51754 | build_single_regex( | ||
&IgnorePattern::new( | ||||
PatternSyntax::RootGlob, | ||||
b"", | ||||
Path::new("") | ||||
), | ||||
b"(?:/|$)" | ||||
) | ||||
Raphaël Gomès
|
r44784 | .unwrap(), | ||
Raphaël Gomès
|
r45311 | None, | ||
Raphaël Gomès
|
r42631 | ); | ||
assert_eq!( | ||||
Spencer Baugh
|
r51754 | build_single_regex( | ||
&IgnorePattern::new( | ||||
PatternSyntax::RootGlob, | ||||
b"whatever", | ||||
Path::new("") | ||||
), | ||||
b"(?:/|$)" | ||||
) | ||||
Raphaël Gomès
|
r44784 | .unwrap(), | ||
Raphaël Gomès
|
r45311 | None, | ||
Raphaël Gomès
|
r42631 | ); | ||
assert_eq!( | ||||
Spencer Baugh
|
r51754 | build_single_regex( | ||
&IgnorePattern::new( | ||||
PatternSyntax::RootGlob, | ||||
b"*.o", | ||||
Path::new("") | ||||
), | ||||
b"(?:/|$)" | ||||
) | ||||
Raphaël Gomès
|
r44784 | .unwrap(), | ||
Raphaël Gomès
|
r45347 | Some(br"[^/]*\.o(?:/|$)".to_vec()), | ||
Raphaël Gomès
|
r42631 | ); | ||
} | ||||
r50499 | ||||
#[test] | ||||
fn test_build_single_relregex() { | ||||
assert_eq!( | ||||
Spencer Baugh
|
r51754 | build_single_regex( | ||
&IgnorePattern::new( | ||||
PatternSyntax::RelRegexp, | ||||
b"^ba{2}r", | ||||
Path::new("") | ||||
), | ||||
b"(?:/|$)" | ||||
) | ||||
r50499 | .unwrap(), | |||
Some(b"^ba{2}r".to_vec()), | ||||
); | ||||
assert_eq!( | ||||
Spencer Baugh
|
r51754 | build_single_regex( | ||
&IgnorePattern::new( | ||||
PatternSyntax::RelRegexp, | ||||
b"ba{2}r", | ||||
Path::new("") | ||||
), | ||||
b"(?:/|$)" | ||||
) | ||||
r50499 | .unwrap(), | |||
Some(b".*ba{2}r".to_vec()), | ||||
); | ||||
assert_eq!( | ||||
Spencer Baugh
|
r51754 | build_single_regex( | ||
&IgnorePattern::new( | ||||
PatternSyntax::RelRegexp, | ||||
b"(?ia)ba{2}r", | ||||
Path::new("") | ||||
), | ||||
b"(?:/|$)" | ||||
) | ||||
r50499 | .unwrap(), | |||
Some(b"(?ia:.*ba{2}r)".to_vec()), | ||||
); | ||||
r50500 | assert_eq!( | |||
Spencer Baugh
|
r51754 | build_single_regex( | ||
&IgnorePattern::new( | ||||
PatternSyntax::RelRegexp, | ||||
b"(?ia)^ba{2}r", | ||||
Path::new("") | ||||
), | ||||
b"(?:/|$)" | ||||
) | ||||
r50500 | .unwrap(), | |||
Some(b"(?ia:^ba{2}r)".to_vec()), | ||||
); | ||||
r50499 | } | |||
Raphaël Gomès
|
r42514 | } | ||