upstream/mercurial-mirror Commit - r13218:1f4721de

match: support reading pattern lists from files

Steve Borho -

r13218:1f4721de default

parent child

mercurial/help/patterns.txt

0 +10 0

              Mercurial accepts several notations for identifying one or more files
              at a time.
              By default, Mercurial treats filenames as shell-style extended glob
              patterns.
              Alternate pattern notations must be specified explicitly.
              To use a plain path name without any pattern matching, start it with
              ``path:``. These path names must completely match starting at the
              current repository root.
              To use an extended glob, start a name with ``glob:``. Globs are rooted
              at the current directory; a glob such as ``*.c`` will only match files
              in the current directory ending with ``.c``.
              The supported glob syntax extensions are ``**`` to match any string
              across path separators and ``{a,b}`` to mean "a or b".
              To use a Perl/Python regular expression, start a name with ``re:``.
              Regexp pattern matching is anchored at the root of the repository.
+             To read name patterns from a file, use ``listfile:`` or ``listfile0:``.
+             The latter expects null delimited patterns while the former expects line
+             feeds. Each string read from the file is itself treated as a file
+             pattern.
              Plain examples::
                path:foo/bar   a name bar in a directory named foo in the root
                               of the repository
                path:path:name a file or directory named "path:name"
              Glob examples::
                glob:*.c       any name ending in ".c" in the current directory
                *.c            any name ending in ".c" in the current directory
                **.c           any name ending in ".c" in any subdirectory of the
                               current directory including itself.
                foo/*.c        any name ending in ".c" in the directory foo
                foo/**.c       any name ending in ".c" in any subdirectory of foo
                               including itself.
              Regexp examples::
                re:.*\.c$      any name ending in ".c", anywhere in the repository
+             File examples::
+               listfile:list.txt  read list from list.txt with one file pattern per line
+               listfile0:list.txt read list from list.txt with null byte delimiters

mercurial/match.py

0 +11 -1

              # match.py - filename matching
              #
              #  Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              import re
              import util
              from i18n import _
              class match(object):
                  def __init__(self, root, cwd, patterns, include=[], exclude=[],
                               default='glob', exact=False, auditor=None):
                      """build an object to match a set of file patterns
                      arguments:
                      root - the canonical root of the tree you're matching against
                      cwd - the current working directory, if relevant
                      patterns - patterns to find
                      include - patterns to include
                      exclude - patterns to exclude
                      default - if a pattern in names has no explicit type, assume this one
                      exact - patterns are actually literals
                      a pattern is one of:
                      'glob:<glob>' - a glob relative to cwd
                      're:<regexp>' - a regular expression
                      'path:<path>' - a path relative to canonroot
                      'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
                      'relpath:<path>' - a path relative to cwd
                      'relre:<regexp>' - a regexp that needn't match the start of a name
                      '<something>' - a pattern of the specified default type
                      """
                      self._root = root
                      self._cwd = cwd
                      self._files = []
                      self._anypats = bool(include or exclude)
                      if include:
                          im = _buildmatch(_normalize(include, 'glob', root, cwd, auditor),
                                           '(?:/|$)')
                      if exclude:
                          em = _buildmatch(_normalize(exclude, 'glob', root, cwd, auditor),
                                           '(?:/|$)')
                      if exact:
                          self._files = patterns
                          pm = self.exact
                      elif patterns:
                          pats = _normalize(patterns, default, root, cwd, auditor)
                          self._files = _roots(pats)
                          self._anypats = self._anypats or _anypats(pats)
                          pm = _buildmatch(pats, '$')
                      if patterns or exact:
                          if include:
                              if exclude:
                                  m = lambda f: im(f) and not em(f) and pm(f)
                              else:
                                  m = lambda f: im(f) and pm(f)
                          else:
                              if exclude:
                                  m = lambda f: not em(f) and pm(f)
                              else:
                                  m = pm
                      else:
                          if include:
                              if exclude:
                                  m = lambda f: im(f) and not em(f)
                              else:
                                  m = im
                          else:
                              if exclude:
                                  m = lambda f: not em(f)
                              else:
                                  m = lambda f: True
                      self.matchfn = m
                      self._fmap = set(self._files)
                  def __call__(self, fn):
                      return self.matchfn(fn)
                  def __iter__(self):
                      for f in self._files:
                          yield f
                  def bad(self, f, msg):
                      '''callback for each explicit file that can't be
                      found/accessed, with an error message
                      '''
                      pass
                  def dir(self, f):
                      pass
                  def missing(self, f):
                      pass
                  def exact(self, f):
                      return f in self._fmap
                  def rel(self, f):
                      return util.pathto(self._root, self._cwd, f)
                  def files(self):
                      return self._files
                  def anypats(self):
                      return self._anypats
              class exact(match):
                  def __init__(self, root, cwd, files):
                      match.__init__(self, root, cwd, files, exact = True)
              class always(match):
                  def __init__(self, root, cwd):
                      match.__init__(self, root, cwd, [])
              class narrowmatcher(match):
                  """Adapt a matcher to work on a subdirectory only.
                  The paths are remapped to remove/insert the path as needed:
                  >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
                  >>> m2 = narrowmatcher('sub', m1)
                  >>> bool(m2('a.txt'))
                  False
                  >>> bool(m2('b.txt'))
                  True
                  >>> bool(m2.matchfn('a.txt'))
                  False
                  >>> bool(m2.matchfn('b.txt'))
                  True
                  >>> m2.files()
                  ['b.txt']
                  >>> m2.exact('b.txt')
                  True
                  >>> m2.rel('b.txt')
                  'b.txt'
                  >>> def bad(f, msg):
                  ...     print "%s: %s" % (f, msg)
                  >>> m1.bad = bad
                  >>> m2.bad('x.txt', 'No such file')
                  sub/x.txt: No such file
                  """
                  def __init__(self, path, matcher):
                      self._root = matcher._root
                      self._cwd = matcher._cwd
                      self._path = path
                      self._matcher = matcher
                      self._files = [f[len(path) + 1:] for f in matcher._files
                                     if f.startswith(path + "/")]
                      self._anypats = matcher._anypats
                      self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
                      self._fmap = set(self._files)
                  def bad(self, f, msg):
                      self._matcher.bad(self._path + "/" + f, msg)
              def patkind(pat):
                  return _patsplit(pat, None)[0]
              def _patsplit(pat, default):
                  """Split a string into an optional pattern kind prefix and the
                  actual pattern."""
                  if ':' in pat:
                      kind, val = pat.split(':', 1)
-                     if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre'):
+                     if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
+                                 'listfile', 'listfile0'):
                          return kind, val
                  return default, pat
              def _globre(pat):
                  "convert a glob pattern into a regexp"
                  i, n = 0, len(pat)
                  res = ''
                  group = 0
                  escape = re.escape
                  def peek():
                      return i < n and pat[i]
                  while i < n:
                      c = pat[i]
                      i += 1
                      if c not in '*?[{},\\':
                          res += escape(c)
                      elif c == '*':
                          if peek() == '*':
                              i += 1
                              res += '.*'
                          else:
                              res += '[^/]*'
                      elif c == '?':
                          res += '.'
                      elif c == '[':
                          j = i
                          if j < n and pat[j] in '!]':
                              j += 1
                          while j < n and pat[j] != ']':
                              j += 1
                          if j >= n:
                              res += '\\['
                          else:
                              stuff = pat[i:j].replace('\\','\\\\')
                              i = j + 1
                              if stuff[0] == '!':
                                  stuff = '^' + stuff[1:]
                              elif stuff[0] == '^':
                                  stuff = '\\' + stuff
                              res = '%s[%s]' % (res, stuff)
                      elif c == '{':
                          group += 1
                          res += '(?:'
                      elif c == '}' and group:
                          res += ')'
                          group -= 1
                      elif c == ',' and group:
                          res += '|'
                      elif c == '\\':
                          p = peek()
                          if p:
                              i += 1
                              res += escape(p)
                          else:
                              res += escape(c)
                      else:
                          res += escape(c)
                  return res
              def _regex(kind, name, tail):
                  '''convert a pattern into a regular expression'''
                  if not name:
                      return ''
                  if kind == 're':
                      return name
                  elif kind == 'path':
                      return '^' + re.escape(name) + '(?:/|$)'
                  elif kind == 'relglob':
                      return '(?:|.*/)' + _globre(name) + tail
                  elif kind == 'relpath':
                      return re.escape(name) + '(?:/|$)'
                  elif kind == 'relre':
                      if name.startswith('^'):
                          return name
                      return '.*' + name
                  return _globre(name) + tail
              def _buildmatch(pats, tail):
                  """build a matching function from a set of patterns"""
                  try:
                      pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
                      if len(pat) > 20000:
                          raise OverflowError()
                      return re.compile(pat).match
                  except OverflowError:
                      # We're using a Python with a tiny regex engine and we
                      # made it explode, so we'll divide the pattern list in two
                      # until it works
                      l = len(pats)
                      if l < 2:
                          raise
                      a, b = _buildmatch(pats[:l//2], tail), _buildmatch(pats[l//2:], tail)
                      return lambda s: a(s) or b(s)
                  except re.error:
                      for k, p in pats:
                          try:
                              re.compile('(?:%s)' % _regex(k, p, tail))
                          except re.error:
                              raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
                      raise util.Abort(_("invalid pattern"))
              def _normalize(names, default, root, cwd, auditor):
                  pats = []
                  for kind, name in [_patsplit(p, default) for p in names]:
                      if kind in ('glob', 'relpath'):
                          name = util.canonpath(root, cwd, name, auditor)
                      elif kind in ('relglob', 'path'):
                          name = util.normpath(name)
+                     elif kind in ('listfile', 'listfile0'):
+                         delimiter = kind == 'listfile0' and '\0' or '\n'
+                         try:
+                             files = open(name, 'r').read().split(delimiter)
+                             files = [f for f in files if f]
+                         except EnvironmentError:
+                             raise util.Abort(_("unable to read file list (%s)") % name)
+                         pats += _normalize(files, default, root, cwd, auditor)
+                         continue
                      pats.append((kind, name))
                  return pats
              def _roots(patterns):
                  r = []
                  for kind, name in patterns:
                      if kind == 'glob': # find the non-glob prefix
                          root = []
                          for p in name.split('/'):
                              if '[' in p or '{' in p or '*' in p or '?' in p:
                                  break
                              root.append(p)
                          r.append('/'.join(root) or '.')
                      elif kind in ('relpath', 'path'):
                          r.append(name or '.')
                      elif kind == 'relglob':
                          r.append('.')
                  return r
              def _anypats(patterns):
                  for kind, name in patterns:
                      if kind in ('glob', 're', 'relglob', 'relre'):
                          return True

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages