upstream/mercurial-mirror Commit - r50499:086b0c4f

matcher: fix the issue with regex inline-flag in rust oo...

marmoute -

r50499:086b0c4f stable

parent child

rust/hg-core/src/filepatterns.rs

0 +51 -1

              // filepatterns.rs
              //
              // Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
              //
              // This software may be used and distributed according to the terms of the
              // GNU General Public License version 2 or any later version.
              //! Handling of Mercurial-specific patterns.
              use crate::{
                  utils::{
                      files::{canonical_path, get_bytes_from_path, get_path_from_bytes},
                      hg_path::{path_to_hg_path_buf, HgPathBuf, HgPathError},
                      SliceExt,
                  },
                  FastHashMap, PatternError,
              };
              use lazy_static::lazy_static;
              use regex::bytes::{NoExpand, Regex};
              use std::ops::Deref;
              use std::path::{Path, PathBuf};
              use std::vec::Vec;
              lazy_static! {
                  static ref RE_ESCAPE: Vec<Vec<u8>> = {
                      let mut v: Vec<Vec<u8>> = (0..=255).map(|byte| vec![byte]).collect();
                      let to_escape = b"()[]{}?*+-|^$\\.&~# \t\n\r\x0b\x0c";
                      for byte in to_escape {
                          v[*byte as usize].insert(0, b'\\');
                      }
                      v
                  };
              }
              /// These are matched in order
              const GLOB_REPLACEMENTS: &[(&[u8], &[u8])] =
                  &[(b"*/", b"(?:.*/)?"), (b"*", b".*"), (b"", b"[^/]*")];
              /// Appended to the regexp of globs
              const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";
              #[derive(Debug, Clone, PartialEq, Eq)]
              pub enum PatternSyntax {
                  /// A regular expression
                  Regexp,
                  /// Glob that matches at the front of the path
                  RootGlob,
                  /// Glob that matches at any suffix of the path (still anchored at
                  /// slashes)
                  Glob,
                  /// a path relative to repository root, which is matched recursively
                  Path,
                  /// A path relative to cwd
                  RelPath,
                  /// an unrooted glob (*.rs matches Rust files in all dirs)
                  RelGlob,
                  /// A regexp that needn't match the start of a name
                  RelRegexp,
                  /// A path relative to repository root, which is matched non-recursively
                  /// (will not match subdirectories)
                  RootFiles,
                  /// A file of patterns to read and include
                  Include,
                  /// A file of patterns to match against files under the same directory
                  SubInclude,
                  /// SubInclude with the result of parsing the included file
                  ///
                  /// Note: there is no ExpandedInclude because that expansion can be done
                  /// in place by replacing the Include pattern by the included patterns.
                  /// SubInclude requires more handling.
                  ///
                  /// Note: `Box` is used to minimize size impact on other enum variants
                  ExpandedSubInclude(Box<SubInclude>),
              }
              /// Transforms a glob pattern into a regex
              fn glob_to_re(pat: &[u8]) -> Vec<u8> {
                  let mut input = pat;
                  let mut res: Vec<u8> = vec![];
                  let mut group_depth = 0;
                  while let Some((c, rest)) = input.split_first() {
                      input = rest;
                      match c {
                          b'*' => {
                              for (source, repl) in GLOB_REPLACEMENTS {
                                  if let Some(rest) = input.drop_prefix(source) {
                                      input = rest;
                                      res.extend(*repl);
                                      break;
                                  }
                              }
                          }
                          b'?' => res.extend(b"."),
                          b'[' => {
                              match input.iter().skip(1).position(|b| *b == b']') {
                                  None => res.extend(b"\\["),
                                  Some(end) => {
                                      // Account for the one we skipped
                                      let end = end + 1;
                                      res.extend(b"[");
                                      for (i, b) in input[..end].iter().enumerate() {
                                          if *b == b'!' && i == 0 {
                                              res.extend(b"^")
                                          } else if *b == b'^' && i == 0 {
                                              res.extend(b"\\^")
                                          } else if *b == b'\\' {
                                              res.extend(b"\\\\")
                                          } else {
                                              res.push(*b)
                                          }
                                      }
                                      res.extend(b"]");
                                      input = &input[end + 1..];
                                  }
                              }
                          }
                          b'{' => {
                              group_depth += 1;
                              res.extend(b"(?:")
                          }
                          b'}' if group_depth > 0 => {
                              group_depth -= 1;
                              res.extend(b")");
                          }
                          b',' if group_depth > 0 => res.extend(b"|"),
                          b'\\' => {
                              let c = {
                                  if let Some((c, rest)) = input.split_first() {
                                      input = rest;
                                      c
                                  } else {
                                      c
                                  }
                              };
                              res.extend(&RE_ESCAPE[*c as usize])
                          }
                          _ => res.extend(&RE_ESCAPE[*c as usize]),
                      }
                  }
                  res
              }
              fn escape_pattern(pattern: &[u8]) -> Vec<u8> {
                  pattern
                      .iter()
                      .flat_map(|c| RE_ESCAPE[*c as usize].clone())
                      .collect()
              }
              pub fn parse_pattern_syntax(
                  kind: &[u8],
              ) -> Result<PatternSyntax, PatternError> {
                  match kind {
                      b"re:" => Ok(PatternSyntax::Regexp),
                      b"path:" => Ok(PatternSyntax::Path),
                      b"relpath:" => Ok(PatternSyntax::RelPath),
                      b"rootfilesin:" => Ok(PatternSyntax::RootFiles),
                      b"relglob:" => Ok(PatternSyntax::RelGlob),
                      b"relre:" => Ok(PatternSyntax::RelRegexp),
                      b"glob:" => Ok(PatternSyntax::Glob),
                      b"rootglob:" => Ok(PatternSyntax::RootGlob),
                      b"include:" => Ok(PatternSyntax::Include),
                      b"subinclude:" => Ok(PatternSyntax::SubInclude),
                      _ => Err(PatternError::UnsupportedSyntax(
                          String::from_utf8_lossy(kind).to_string(),
                      )),
                  }
              }
+             lazy_static! {
+                 static ref FLAG_RE: Regex = Regex::new(r"^\(\?[aiLmsux]+\)").unwrap();
+             }
              /// Builds the regex that corresponds to the given pattern.
              /// If within a `syntax: regexp` context, returns the pattern,
              /// otherwise, returns the corresponding regex.
              fn _build_single_regex(entry: &IgnorePattern) -> Vec<u8> {
                  let IgnorePattern {
                      syntax, pattern, ..
                  } = entry;
                  if pattern.is_empty() {
                      return vec![];
                  }
                  match syntax {
                      PatternSyntax::Regexp => pattern.to_owned(),
                      PatternSyntax::RelRegexp => {
                          // The `regex` crate accepts `**` while `re2` and Python's `re`
                          // do not. Checking for `*` correctly triggers the same error all
                          // engines.
                          if pattern[0] == b'^'
                              || pattern[0] == b'*'
                              || pattern.starts_with(b".*")
                          {
                              return pattern.to_owned();
                          }
-                         [&b".*"[..], pattern].concat()
+                         match FLAG_RE.find(pattern) {
+                             Some(mat) => {
+                                 let s = mat.start();
+                                 let e = mat.end();
+                                 [
+                                     &b"(?"[..],
+                                     &pattern[s + 2..e - 1],
+                                     &b":"[..],
+                                     &b".*"[..],
+                                     &pattern[e..],
+                                     &b")"[..],
+                                 ]
+                                 .concat()
+                             }
+                             None => [&b".*"[..], pattern].concat(),
+                         }
                      }
                      PatternSyntax::Path | PatternSyntax::RelPath => {
                          if pattern == b"." {
                              return vec![];
                          }
                          [escape_pattern(pattern).as_slice(), b"(?:/|$)"].concat()
                      }
                      PatternSyntax::RootFiles => {
                          let mut res = if pattern == b"." {
                              vec![]
                          } else {
                              // Pattern is a directory name.
                              [escape_pattern(pattern).as_slice(), b"/"].concat()
                          };
                          // Anything after the pattern must be a non-directory.
                          res.extend(b"[^/]+$");
                          res
                      }
                      PatternSyntax::RelGlob => {
                          let glob_re = glob_to_re(pattern);
                          if let Some(rest) = glob_re.drop_prefix(b"[^/]*") {
                              [b".*", rest, GLOB_SUFFIX].concat()
                          } else {
                              [b"(?:.*/)?", glob_re.as_slice(), GLOB_SUFFIX].concat()
                          }
                      }
                      PatternSyntax::Glob | PatternSyntax::RootGlob => {
                          [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
                      }
                      PatternSyntax::Include
                      | PatternSyntax::SubInclude
                      | PatternSyntax::ExpandedSubInclude(_) => unreachable!(),
                  }
              }
              const GLOB_SPECIAL_CHARACTERS: [u8; 7] =
                  [b'*', b'?', b'[', b']', b'{', b'}', b'\\'];
              /// TODO support other platforms
              #[cfg(unix)]
              pub fn normalize_path_bytes(bytes: &[u8]) -> Vec<u8> {
                  if bytes.is_empty() {
                      return b".".to_vec();
                  }
                  let sep = b'/';
                  let mut initial_slashes = bytes.iter().take_while(|b| **b == sep).count();
                  if initial_slashes > 2 {
                      // POSIX allows one or two initial slashes, but treats three or more
                      // as single slash.
                      initial_slashes = 1;
                  }
                  let components = bytes
                      .split(|b| *b == sep)
                      .filter(|c| !(c.is_empty() || c == b"."))
                      .fold(vec![], |mut acc, component| {
                          if component != b".."
                              || (initial_slashes == 0 && acc.is_empty())
                              || (!acc.is_empty() && acc[acc.len() - 1] == b"..")
                          {
                              acc.push(component)
                          } else if !acc.is_empty() {
                              acc.pop();
                          }
                          acc
                      });
                  let mut new_bytes = components.join(&sep);
                  if initial_slashes > 0 {
                      let mut buf: Vec<_> = (0..initial_slashes).map(|_| sep).collect();
                      buf.extend(new_bytes);
                      new_bytes = buf;
                  }
                  if new_bytes.is_empty() {
                      b".".to_vec()
                  } else {
                      new_bytes
                  }
              }
              /// Wrapper function to `_build_single_regex` that short-circuits 'exact' globs
              /// that don't need to be transformed into a regex.
              pub fn build_single_regex(
                  entry: &IgnorePattern,
              ) -> Result<Option<Vec<u8>>, PatternError> {
                  let IgnorePattern {
                      pattern, syntax, ..
                  } = entry;
                  let pattern = match syntax {
                      PatternSyntax::RootGlob
                      | PatternSyntax::Path
                      | PatternSyntax::RelGlob
                      | PatternSyntax::RootFiles => normalize_path_bytes(&pattern),
                      PatternSyntax::Include | PatternSyntax::SubInclude => {
                          return Err(PatternError::NonRegexPattern(entry.clone()))
                      }
                      _ => pattern.to_owned(),
                  };
                  if *syntax == PatternSyntax::RootGlob
                      && !pattern.iter().any(|b| GLOB_SPECIAL_CHARACTERS.contains(b))
                  {
                      Ok(None)
                  } else {
                      let mut entry = entry.clone();
                      entry.pattern = pattern;
                      Ok(Some(_build_single_regex(&entry)))
                  }
              }
              lazy_static! {
                  static ref SYNTAXES: FastHashMap<&'static [u8], &'static [u8]> = {
                      let mut m = FastHashMap::default();
                      m.insert(b"re".as_ref(), b"relre:".as_ref());
                      m.insert(b"regexp".as_ref(), b"relre:".as_ref());
                      m.insert(b"glob".as_ref(), b"relglob:".as_ref());
                      m.insert(b"rootglob".as_ref(), b"rootglob:".as_ref());
                      m.insert(b"include".as_ref(), b"include:".as_ref());
                      m.insert(b"subinclude".as_ref(), b"subinclude:".as_ref());
                      m.insert(b"path".as_ref(), b"path:".as_ref());
                      m.insert(b"rootfilesin".as_ref(), b"rootfilesin:".as_ref());
                      m
                  };
              }
              #[derive(Debug)]
              pub enum PatternFileWarning {
                  /// (file path, syntax bytes)
                  InvalidSyntax(PathBuf, Vec<u8>),
                  /// File path
                  NoSuchFile(PathBuf),
              }
              pub fn parse_pattern_file_contents(
                  lines: &[u8],
                  file_path: &Path,
                  default_syntax_override: Option<&[u8]>,
                  warn: bool,
              ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
                  let comment_regex = Regex::new(r"((?:^|[^\\])(?:\\\\)*)#.*").unwrap();
                  #[allow(clippy::trivial_regex)]
                  let comment_escape_regex = Regex::new(r"\\#").unwrap();
                  let mut inputs: Vec<IgnorePattern> = vec![];
                  let mut warnings: Vec<PatternFileWarning> = vec![];
                  let mut current_syntax =
                      default_syntax_override.unwrap_or(b"relre:".as_ref());
                  for (line_number, mut line) in lines.split(|c| *c == b'\n').enumerate() {
                      let line_number = line_number + 1;
                      let line_buf;
                      if line.contains(&b'#') {
                          if let Some(cap) = comment_regex.captures(line) {
                              line = &line[..cap.get(1).unwrap().end()]
                          }
                          line_buf = comment_escape_regex.replace_all(line, NoExpand(b"#"));
                          line = &line_buf;
                      }
                      let mut line = line.trim_end();
                      if line.is_empty() {
                          continue;
                      }
                      if let Some(syntax) = line.drop_prefix(b"syntax:") {
                          let syntax = syntax.trim();
                          if let Some(rel_syntax) = SYNTAXES.get(syntax) {
                              current_syntax = rel_syntax;
                          } else if warn {
                              warnings.push(PatternFileWarning::InvalidSyntax(
                                  file_path.to_owned(),
                                  syntax.to_owned(),
                              ));
                          }
                          continue;
                      }
                      let mut line_syntax: &[u8] = &current_syntax;
                      for (s, rels) in SYNTAXES.iter() {
                          if let Some(rest) = line.drop_prefix(rels) {
                              line_syntax = rels;
                              line = rest;
                              break;
                          }
                          if let Some(rest) = line.drop_prefix(&[s, &b":"[..]].concat()) {
                              line_syntax = rels;
                              line = rest;
                              break;
                          }
                      }
                      inputs.push(IgnorePattern::new(
                          parse_pattern_syntax(&line_syntax).map_err(|e| match e {
                              PatternError::UnsupportedSyntax(syntax) => {
                                  PatternError::UnsupportedSyntaxInFile(
                                      syntax,
                                      file_path.to_string_lossy().into(),
                                      line_number,
                                  )
                              }
                              _ => e,
                          })?,
                          &line,
                          file_path,
                      ));
                  }
                  Ok((inputs, warnings))
              }
              pub fn read_pattern_file(
                  file_path: &Path,
                  warn: bool,
                  inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
              ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
                  match std::fs::read(file_path) {
                      Ok(contents) => {
                          inspect_pattern_bytes(file_path, &contents);
                          parse_pattern_file_contents(&contents, file_path, None, warn)
                      }
                      Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok((
                          vec![],
                          vec![PatternFileWarning::NoSuchFile(file_path.to_owned())],
                      )),
                      Err(e) => Err(e.into()),
                  }
              }
              /// Represents an entry in an "ignore" file.
              #[derive(Debug, Eq, PartialEq, Clone)]
              pub struct IgnorePattern {
                  pub syntax: PatternSyntax,
                  pub pattern: Vec<u8>,
                  pub source: PathBuf,
              }
              impl IgnorePattern {
                  pub fn new(syntax: PatternSyntax, pattern: &[u8], source: &Path) -> Self {
                      Self {
                          syntax,
                          pattern: pattern.to_owned(),
                          source: source.to_owned(),
                      }
                  }
              }
              pub type PatternResult<T> = Result<T, PatternError>;
              /// Wrapper for `read_pattern_file` that also recursively expands `include:`
              /// and `subinclude:` patterns.
              ///
              /// The former are expanded in place, while `PatternSyntax::ExpandedSubInclude`
              /// is used for the latter to form a tree of patterns.
              pub fn get_patterns_from_file(
                  pattern_file: &Path,
                  root_dir: &Path,
                  inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
              ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
                  let (patterns, mut warnings) =
                      read_pattern_file(pattern_file, true, inspect_pattern_bytes)?;
                  let patterns = patterns
                      .into_iter()
                      .flat_map(|entry| -> PatternResult<_> {
                          Ok(match &entry.syntax {
                              PatternSyntax::Include => {
                                  let inner_include =
                                      root_dir.join(get_path_from_bytes(&entry.pattern));
                                  let (inner_pats, inner_warnings) = get_patterns_from_file(
                                      &inner_include,
                                      root_dir,
                                      inspect_pattern_bytes,
                                  )?;
                                  warnings.extend(inner_warnings);
                                  inner_pats
                              }
                              PatternSyntax::SubInclude => {
                                  let mut sub_include = SubInclude::new(
                                      &root_dir,
                                      &entry.pattern,
                                      &entry.source,
                                  )?;
                                  let (inner_patterns, inner_warnings) =
                                      get_patterns_from_file(
                                          &sub_include.path,
                                          &sub_include.root,
                                          inspect_pattern_bytes,
                                      )?;
                                  sub_include.included_patterns = inner_patterns;
                                  warnings.extend(inner_warnings);
                                  vec![IgnorePattern {
                                      syntax: PatternSyntax::ExpandedSubInclude(Box::new(
                                          sub_include,
                                      )),
                                      ..entry
                                  }]
                              }
                              _ => vec![entry],
                          })
                      })
                      .flatten()
                      .collect();
                  Ok((patterns, warnings))
              }
              /// Holds all the information needed to handle a `subinclude:` pattern.
              #[derive(Debug, PartialEq, Eq, Clone)]
              pub struct SubInclude {
                  /// Will be used for repository (hg) paths that start with this prefix.
                  /// It is relative to the current working directory, so comparing against
                  /// repository paths is painless.
                  pub prefix: HgPathBuf,
                  /// The file itself, containing the patterns
                  pub path: PathBuf,
                  /// Folder in the filesystem where this it applies
                  pub root: PathBuf,
                  pub included_patterns: Vec<IgnorePattern>,
              }
              impl SubInclude {
                  pub fn new(
                      root_dir: &Path,
                      pattern: &[u8],
                      source: &Path,
                  ) -> Result<SubInclude, HgPathError> {
                      let normalized_source =
                          normalize_path_bytes(&get_bytes_from_path(source));
                      let source_root = get_path_from_bytes(&normalized_source);
                      let source_root =
                          source_root.parent().unwrap_or_else(|| source_root.deref());
                      let path = source_root.join(get_path_from_bytes(pattern));
                      let new_root = path.parent().unwrap_or_else(|| path.deref());
                      let prefix = canonical_path(root_dir, root_dir, new_root)?;
                      Ok(Self {
                          prefix: path_to_hg_path_buf(prefix).and_then(|mut p| {
                              if !p.is_empty() {
                                  p.push_byte(b'/');
                              }
                              Ok(p)
                          })?,
                          path: path.to_owned(),
                          root: new_root.to_owned(),
                          included_patterns: Vec::new(),
                      })
                  }
              }
              /// Separate and pre-process subincludes from other patterns for the "ignore"
              /// phase.
              pub fn filter_subincludes(
                  ignore_patterns: Vec<IgnorePattern>,
              ) -> Result<(Vec<Box<SubInclude>>, Vec<IgnorePattern>), HgPathError> {
                  let mut subincludes = vec![];
                  let mut others = vec![];
                  for pattern in ignore_patterns {
                      if let PatternSyntax::ExpandedSubInclude(sub_include) = pattern.syntax
                      {
                          subincludes.push(sub_include);
                      } else {
                          others.push(pattern)
                      }
                  }
                  Ok((subincludes, others))
              }
              #[cfg(test)]
              mod tests {
                  use super::*;
                  use pretty_assertions::assert_eq;
                  #[test]
                  fn escape_pattern_test() {
                      let untouched =
                          br#"!"%',/0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz"#;
                      assert_eq!(escape_pattern(untouched), untouched.to_vec());
                      // All escape codes
                      assert_eq!(
                          escape_pattern(br#"()[]{}?*+-|^$\\.&~# \t\n\r\v\f"#),
                          br#"\(\)\[\]\{\}\?\*\+\-\|\^\$\\\\\.\&\~\#\ \\t\\n\\r\\v\\f"#
                              .to_vec()
                      );
                  }
                  #[test]
                  fn glob_test() {
                      assert_eq!(glob_to_re(br#"?"#), br#"."#);
                      assert_eq!(glob_to_re(br#"*"#), br#"[^/]*"#);
                      assert_eq!(glob_to_re(br#"**"#), br#".*"#);
                      assert_eq!(glob_to_re(br#"**/a"#), br#"(?:.*/)?a"#);
                      assert_eq!(glob_to_re(br#"a/**/b"#), br#"a/(?:.*/)?b"#);
                      assert_eq!(glob_to_re(br#"[a*?!^][^b][!c]"#), br#"[a*?!^][\^b][^c]"#);
                      assert_eq!(glob_to_re(br#"{a,b}"#), br#"(?:a|b)"#);
                      assert_eq!(glob_to_re(br#".\*\?"#), br#"\.\*\?"#);
                  }
                  #[test]
                  fn test_parse_pattern_file_contents() {
                      let lines = b"syntax: glob\n*.elc";
                      assert_eq!(
                          parse_pattern_file_contents(
                              lines,
                              Path::new("file_path"),
                              None,
                              false
                          )
                          .unwrap()
                          .0,
                          vec![IgnorePattern::new(
                              PatternSyntax::RelGlob,
                              b"*.elc",
                              Path::new("file_path")
                          )],
                      );
                      let lines = b"syntax: include\nsyntax: glob";
                      assert_eq!(
                          parse_pattern_file_contents(
                              lines,
                              Path::new("file_path"),
                              None,
                              false
                          )
                          .unwrap()
                          .0,
                          vec![]
                      );
                      let lines = b"glob:**.o";
                      assert_eq!(
                          parse_pattern_file_contents(
                              lines,
                              Path::new("file_path"),
                              None,
                              false
                          )
                          .unwrap()
                          .0,
                          vec![IgnorePattern::new(
                              PatternSyntax::RelGlob,
                              b"**.o",
                              Path::new("file_path")
                          )]
                      );
                  }
                  #[test]
                  fn test_build_single_regex() {
                      assert_eq!(
                          build_single_regex(&IgnorePattern::new(
                              PatternSyntax::RelGlob,
                              b"rust/target/",
                              Path::new("")
                          ))
                          .unwrap(),
                          Some(br"(?:.*/)?rust/target(?:/|$)".to_vec()),
                      );
                      assert_eq!(
                          build_single_regex(&IgnorePattern::new(
                              PatternSyntax::Regexp,
                              br"rust/target/\d+",
                              Path::new("")
                          ))
                          .unwrap(),
                          Some(br"rust/target/\d+".to_vec()),
                      );
                  }
                  #[test]
                  fn test_build_single_regex_shortcut() {
                      assert_eq!(
                          build_single_regex(&IgnorePattern::new(
                              PatternSyntax::RootGlob,
                              b"",
                              Path::new("")
                          ))
                          .unwrap(),
                          None,
                      );
                      assert_eq!(
                          build_single_regex(&IgnorePattern::new(
                              PatternSyntax::RootGlob,
                              b"whatever",
                              Path::new("")
                          ))
                          .unwrap(),
                          None,
                      );
                      assert_eq!(
                          build_single_regex(&IgnorePattern::new(
                              PatternSyntax::RootGlob,
                              b"*.o",
                              Path::new("")
                          ))
                          .unwrap(),
                          Some(br"[^/]*\.o(?:/|$)".to_vec()),
                      );
                  }
+                 #[test]
+                 fn test_build_single_relregex() {
+                     assert_eq!(
+                         build_single_regex(&IgnorePattern::new(
+                             PatternSyntax::RelRegexp,
+                             b"^ba{2}r",
+                             Path::new("")
+                         ))
+                         .unwrap(),
+                         Some(b"^ba{2}r".to_vec()),
+                     );
+                     assert_eq!(
+                         build_single_regex(&IgnorePattern::new(
+                             PatternSyntax::RelRegexp,
+                             b"ba{2}r",
+                             Path::new("")
+                         ))
+                         .unwrap(),
+                         Some(b".*ba{2}r".to_vec()),
+                     );
+                     assert_eq!(
+                         build_single_regex(&IgnorePattern::new(
+                             PatternSyntax::RelRegexp,
+                             b"(?ia)ba{2}r",
+                             Path::new("")
+                         ))
+                         .unwrap(),
+                         Some(b"(?ia:.*ba{2}r)".to_vec()),
+                     );
+                 }
              }

tests/test-hgignore.t

0 +1 -2

              #testcases dirstate-v1 dirstate-v2
              #if dirstate-v2
                $ cat >> $HGRCPATH << EOF
                > [format]
                > use-dirstate-v2=1
                > [storage]
                > dirstate-v2.slow-path=allow
                > EOF
              #endif
                $ hg init ignorerepo
                $ cd ignorerepo
              debugignore with no hgignore should be deterministic:
                $ hg debugignore
                <nevermatcher>
              Issue562: .hgignore requires newline at end:
                $ touch foo
                $ touch bar
                $ touch baz
                $ cat > makeignore.py <<EOF
                > f = open(".hgignore", "w")
                > f.write("ignore\n")
                > f.write("foo\n")
                > # No EOL here
                > f.write("bar")
                > f.close()
                > EOF
                $ "$PYTHON" makeignore.py
              Should display baz only:
                $ hg status
                ? baz
                $ rm foo bar baz .hgignore makeignore.py
                $ touch a.o
                $ touch a.c
                $ touch syntax
                $ mkdir dir
                $ touch dir/a.o
                $ touch dir/b.o
                $ touch dir/c.o
                $ hg add dir/a.o
                $ hg commit -m 0
                $ hg add dir/b.o
                $ hg status
                A dir/b.o
                ? a.c
                ? a.o
                ? dir/c.o
                ? syntax
                $ echo "*.o" > .hgignore
                $ hg status
                abort: $TESTTMP/ignorerepo/.hgignore: invalid pattern (relre): *.o (glob)
                [255]
              Test relre with flags (issue6759)
              ---------------------------------
              regexp with flag is the first one
                $ echo 're:(?i)\.O$' > .hgignore
                $ echo 're:.hgignore' >> .hgignore
                $ hg status
                A dir/b.o
                ? a.c
                ? syntax
              regex with flag is not the first one
                $ echo 're:.hgignore' > .hgignore
                $ echo 're:(?i)\.O$' >> .hgignore
                $ hg status
                A dir/b.o
                ? a.c
                ? syntax
              flag in a pattern should affect that pattern only
                $ echo 're:(?i)\.O$' > .hgignore
                $ echo 're:.HGIGNORE' >> .hgignore
                $ hg status
                A dir/b.o
-               ? .hgignore (no-rust !)
-               ? .hgignore (rust missing-correct-output !)
+               ? .hgignore
                ? a.c
                ? syntax
                $ echo 're:.HGIGNORE' > .hgignore
                $ echo 're:(?i)\.O$' >> .hgignore
                $ hg status
                A dir/b.o
                ? .hgignore
                ? a.c
                ? syntax
              further testing
              ---------------
                $ echo 're:^(?!a).*\.o$' > .hgignore
                $ hg status
                A dir/b.o
                ? .hgignore
                ? a.c
                ? a.o
                ? syntax
              #if rhg
                $ hg status --config rhg.on-unsupported=abort
                unsupported feature: Unsupported syntax regex parse error:
                    ^(?:^(?!a).*\.o$)
                         ^^^
                error: look-around, including look-ahead and look-behind, is not supported
                [252]
              #endif
              Ensure given files are relative to cwd
                $ echo "dir/.*\.o" > .hgignore
                $ hg status -i
                I dir/c.o
                $ hg debugignore dir/c.o dir/missing.o
                dir/c.o is ignored
                (ignore rule in $TESTTMP/ignorerepo/.hgignore, line 1: 'dir/.*\.o') (glob)
                dir/missing.o is ignored
                (ignore rule in $TESTTMP/ignorerepo/.hgignore, line 1: 'dir/.*\.o') (glob)
                $ cd dir
                $ hg debugignore c.o missing.o
                c.o is ignored
                (ignore rule in $TESTTMP/ignorerepo/.hgignore, line 1: 'dir/.*\.o') (glob)
                missing.o is ignored
                (ignore rule in $TESTTMP/ignorerepo/.hgignore, line 1: 'dir/.*\.o') (glob)
              For icasefs, inexact matches also work, except for missing files
              #if icasefs
                $ hg debugignore c.O missing.O
                c.o is ignored
                (ignore rule in $TESTTMP/ignorerepo/.hgignore, line 1: 'dir/.*\.o') (glob)
                missing.O is not ignored
              #endif
                $ cd ..
                $ echo ".*\.o" > .hgignore
                $ hg status
                A dir/b.o
                ? .hgignore
                ? a.c
                ? syntax
              Ensure that comments work:
                $ touch 'foo#bar' 'quux#' 'quu0#'
              #if no-windows
                $ touch 'baz\' 'baz\wat' 'ba0\#wat' 'ba1\\' 'ba1\\wat' 'quu0\'
              #endif
                $ cat <<'EOF' >> .hgignore
                > # full-line comment
                >   # whitespace-only comment line
                > syntax# pattern, no whitespace, then comment
                > a.c  # pattern, then whitespace, then comment
                > baz\\# # (escaped) backslash, then comment
                > ba0\\\#w # (escaped) backslash, escaped comment character, then comment
                > ba1\\\\# # (escaped) backslashes, then comment
                > foo\#b # escaped comment character
                > quux\## escaped comment character at end of name
                > EOF
                $ hg status
                A dir/b.o
                ? .hgignore
                ? quu0#
                ? quu0\ (no-windows !)
                $ cat <<'EOF' > .hgignore
                > .*\.o
                > syntax: glob
                > syntax# pattern, no whitespace, then comment
                > a.c  # pattern, then whitespace, then comment
                > baz\\#* # (escaped) backslash, then comment
                > ba0\\\#w* # (escaped) backslash, escaped comment character, then comment
                > ba1\\\\#* # (escaped) backslashes, then comment
                > foo\#b* # escaped comment character
                > quux\## escaped comment character at end of name
                > quu0[\#]# escaped comment character inside [...]
                > EOF
                $ hg status
                A dir/b.o
                ? .hgignore
                ? ba1\\wat (no-windows !)
                ? baz\wat (no-windows !)
                ? quu0\ (no-windows !)
                $ rm 'foo#bar' 'quux#' 'quu0#'
              #if no-windows
                $ rm 'baz\' 'baz\wat' 'ba0\#wat' 'ba1\\' 'ba1\\wat' 'quu0\'
              #endif
              Check that '^\.' does not ignore the root directory:
                $ echo "^\." > .hgignore
                $ hg status
                A dir/b.o
                ? a.c
                ? a.o
                ? dir/c.o
                ? syntax
              Test that patterns from ui.ignore options are read:
                $ echo > .hgignore
                $ cat >> $HGRCPATH << EOF
                > [ui]
                > ignore.other = $TESTTMP/ignorerepo/.hg/testhgignore
                > EOF
                $ echo "glob:**.o" > .hg/testhgignore
                $ hg status
                A dir/b.o
                ? .hgignore
                ? a.c
                ? syntax
              empty out testhgignore
                $ echo > .hg/testhgignore
              Test relative ignore path (issue4473):
                $ cat >> $HGRCPATH << EOF
                > [ui]
                > ignore.relative = .hg/testhgignorerel
                > EOF
                $ echo "glob:*.o" > .hg/testhgignorerel
                $ cd dir
                $ hg status
                A dir/b.o
                ? .hgignore
                ? a.c
                ? syntax
                $ hg debugignore
                <includematcher includes='.*\\.o(?:/|$)'>
                $ cd ..
                $ echo > .hg/testhgignorerel
                $ echo "syntax: glob" > .hgignore
                $ echo "re:.*\.o" >> .hgignore
                $ hg status
                A dir/b.o
                ? .hgignore
                ? a.c
                ? syntax
                $ echo "syntax: invalid" > .hgignore
                $ hg status
                $TESTTMP/ignorerepo/.hgignore: ignoring invalid syntax 'invalid'
                A dir/b.o
                ? .hgignore
                ? a.c
                ? a.o
                ? dir/c.o
                ? syntax
                $ echo "syntax: glob" > .hgignore
                $ echo "*.o" >> .hgignore
                $ hg status
                A dir/b.o
                ? .hgignore
                ? a.c
                ? syntax
                $ echo "relglob:syntax*" > .hgignore
                $ hg status
                A dir/b.o
                ? .hgignore
                ? a.c
                ? a.o
                ? dir/c.o
                $ echo "relglob:*" > .hgignore
                $ hg status
                A dir/b.o
                $ cd dir
                $ hg status .
                A b.o
                $ hg debugignore
                <includematcher includes='.*(?:/|$)'>
                $ hg debugignore b.o
                b.o is ignored
                (ignore rule in $TESTTMP/ignorerepo/.hgignore, line 1: '*') (glob)
                $ cd ..
              Check patterns that match only the directory
              "(fsmonitor !)" below assumes that fsmonitor is enabled with
              "walk_on_invalidate = false" (default), which doesn't involve
              re-walking whole repository at detection of .hgignore change.
                $ echo "^dir\$" > .hgignore
                $ hg status
                A dir/b.o
                ? .hgignore
                ? a.c
                ? a.o
                ? dir/c.o (fsmonitor !)
                ? syntax
              Check recursive glob pattern matches no directories (dir/**/c.o matches dir/c.o)
                $ echo "syntax: glob" > .hgignore
                $ echo "dir/**/c.o" >> .hgignore
                $ touch dir/c.o
                $ mkdir dir/subdir
                $ touch dir/subdir/c.o
                $ hg status
                A dir/b.o
                ? .hgignore
                ? a.c
                ? a.o
                ? syntax
                $ hg debugignore a.c
                a.c is not ignored
                $ hg debugignore dir/c.o
                dir/c.o is ignored
                (ignore rule in $TESTTMP/ignorerepo/.hgignore, line 2: 'dir/**/c.o') (glob)
              Check rooted globs
                $ hg purge --all --config extensions.purge=
                $ echo "syntax: rootglob" > .hgignore
                $ echo "a/*.ext" >> .hgignore
                $ for p in a b/a aa; do mkdir -p $p; touch $p/b.ext; done
                $ hg status -A 'set:**.ext'
                ? aa/b.ext
                ? b/a/b.ext
                I a/b.ext
              Check using 'include:' in ignore file
                $ hg purge --all --config extensions.purge=
                $ touch foo.included
                $ echo ".*.included" > otherignore
                $ hg status -I "include:otherignore"
                ? foo.included
                $ echo "include:otherignore" >> .hgignore
                $ hg status
                A dir/b.o
                ? .hgignore
                ? otherignore
              Check recursive uses of 'include:'
                $ echo "include:nested/ignore" >> otherignore
                $ mkdir nested nested/more
                $ echo "glob:*ignore" > nested/ignore
                $ echo "rootglob:a" >> nested/ignore
                $ touch a nested/a nested/more/a
                $ hg status
                A dir/b.o
                ? nested/a
                ? nested/more/a
                $ rm a nested/a nested/more/a
                $ cp otherignore goodignore
                $ echo "include:badignore" >> otherignore
                $ hg status
                skipping unreadable pattern file 'badignore': $ENOENT$
                A dir/b.o
                $ mv goodignore otherignore
              Check using 'include:' while in a non-root directory
                $ cd ..
                $ hg -R ignorerepo status
                A dir/b.o
                $ cd ignorerepo
              Check including subincludes
                $ hg revert -q --all
                $ hg purge --all --config extensions.purge=
                $ echo ".hgignore" > .hgignore
                $ mkdir dir1 dir2
                $ touch dir1/file1 dir1/file2 dir2/file1 dir2/file2
                $ echo "subinclude:dir2/.hgignore" >> .hgignore
                $ echo "glob:file*2" > dir2/.hgignore
                $ hg status
                ? dir1/file1
                ? dir1/file2
                ? dir2/file1
              Check including subincludes with other patterns
                $ echo "subinclude:dir1/.hgignore" >> .hgignore
                $ mkdir dir1/subdir
                $ touch dir1/subdir/file1
                $ echo "rootglob:f?le1" > dir1/.hgignore
                $ hg status
                ? dir1/file2
                ? dir1/subdir/file1
                ? dir2/file1
                $ rm dir1/subdir/file1
                $ echo "regexp:f.le1" > dir1/.hgignore
                $ hg status
                ? dir1/file2
                ? dir2/file1
              Check multiple levels of sub-ignores
                $ touch dir1/subdir/subfile1 dir1/subdir/subfile3 dir1/subdir/subfile4
                $ echo "subinclude:subdir/.hgignore" >> dir1/.hgignore
                $ echo "glob:subfil*3" >> dir1/subdir/.hgignore
                $ hg status
                ? dir1/file2
                ? dir1/subdir/subfile4
                ? dir2/file1
              Check include subignore at the same level
                $ mv dir1/subdir/.hgignore dir1/.hgignoretwo
                $ echo "regexp:f.le1" > dir1/.hgignore
                $ echo "subinclude:.hgignoretwo" >> dir1/.hgignore
                $ echo "glob:file*2" > dir1/.hgignoretwo
                $ hg status | grep file2
                [1]
                $ hg debugignore dir1/file2
                dir1/file2 is ignored
                (ignore rule in dir2/.hgignore, line 1: 'file*2')
              #if windows
              Windows paths are accepted on input
                $ rm dir1/.hgignore
                $ echo "dir1/file*" >> .hgignore
                $ hg debugignore "dir1\file2"
                dir1/file2 is ignored
                (ignore rule in $TESTTMP\ignorerepo\.hgignore, line 4: 'dir1/file*')
                $ hg up -qC .
              #endif
              #if dirstate-v2 rust
              Check the hash of ignore patterns written in the dirstate
              This is an optimization that is only relevant when using the Rust extensions
                $ cat_filename_and_hash () {
                >     for i in "$@"; do
                >         printf "$i "
                >         cat "$i" | "$TESTDIR"/f --raw-sha1 | sed 's/^raw-sha1=//'
                >     done
                > }
                $ hg status > /dev/null
                $ cat_filename_and_hash .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1
                sha1=c0beb296395d48ced8e14f39009c4ea6e409bfe6
                $ hg debugstate --docket | grep ignore
                ignore pattern hash: c0beb296395d48ced8e14f39009c4ea6e409bfe6
                $ echo rel > .hg/testhgignorerel
                $ hg status > /dev/null
                $ cat_filename_and_hash .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1
                sha1=b8e63d3428ec38abc68baa27631516d5ec46b7fa
                $ hg debugstate --docket | grep ignore
                ignore pattern hash: b8e63d3428ec38abc68baa27631516d5ec46b7fa
                $ cd ..
              Check that the hash depends on the source of the hgignore patterns
              (otherwise the context is lost and things like subinclude are cached improperly)
                $ hg init ignore-collision
                $ cd ignore-collision
                $ echo > .hg/testhgignorerel
                $ mkdir dir1/ dir1/subdir
                $ touch dir1/subdir/f dir1/subdir/ignored1
                $ echo 'ignored1' > dir1/.hgignore
                $ mkdir dir2 dir2/subdir
                $ touch dir2/subdir/f dir2/subdir/ignored2
                $ echo 'ignored2' > dir2/.hgignore
                $ echo 'subinclude:dir2/.hgignore' >> .hgignore
                $ echo 'subinclude:dir1/.hgignore' >> .hgignore
                $ hg commit -Aqm_
                $ > dir1/.hgignore
                $ echo 'ignored' > dir2/.hgignore
                $ echo 'ignored1' >> dir2/.hgignore
                $ hg status
                M dir1/.hgignore
                M dir2/.hgignore
                ? dir1/subdir/ignored1
              #endif

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages