upstream/mercurial-mirror Commit - r14675:cfc89398

match: introduce basic fileset support

Matt Mackall -

r14675:cfc89398 default

parent child

mercurial/match.py

0 +33 -6

              # match.py - filename matching
              #
              #  Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              import re
-             import scmutil, util
+             import scmutil, util, fileset
              from i18n import _
+             def _expandsets(pats, ctx):
+                 '''convert set: patterns into a list of files in the given context'''
+                 fset = set()
+                 other = []
+                 for kind, expr in pats:
+                     if kind == 'set':
+                         if not ctx:
+                             raise util.Abort("fileset expression with no context")
+                         s = fileset.getfileset(ctx, expr)
+                         fset.update(s)
+                         continue
+                     other.append((kind, expr))
+                 return fset, other
              class match(object):
                  def __init__(self, root, cwd, patterns, include=[], exclude=[],
                               default='glob', exact=False, auditor=None, ctx=None):
                      """build an object to match a set of file patterns
                      arguments:
                      root - the canonical root of the tree you're matching against
                      cwd - the current working directory, if relevant
                      patterns - patterns to find
                      include - patterns to include
                      exclude - patterns to exclude
                      default - if a pattern in names has no explicit type, assume this one
                      exact - patterns are actually literals
                      a pattern is one of:
                      'glob:<glob>' - a glob relative to cwd
                      're:<regexp>' - a regular expression
                      'path:<path>' - a path relative to canonroot
                      'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
                      'relpath:<path>' - a path relative to cwd
                      'relre:<regexp>' - a regexp that needn't match the start of a name
+                     'set:<fileset>' - a fileset expression
                      '<something>' - a pattern of the specified default type
                      """
+                     self._ctx = None
                      self._root = root
                      self._cwd = cwd
                      self._files = []
                      self._anypats = bool(include or exclude)
                      self._ctx = ctx
                      if include:
                          pats = _normalize(include, 'glob', root, cwd, auditor)
-                         self.includepat, im = _buildmatch(pats, '(?:/|$)')
+                         self.includepat, im = _buildmatch(ctx, pats, '(?:/|$)')
                      if exclude:
                          pats = _normalize(exclude, 'glob', root, cwd, auditor)
-                         self.excludepat, em = _buildmatch(pats, '(?:/|$)')
+                         self.excludepat, em = _buildmatch(ctx, pats, '(?:/|$)')
                      if exact:
                          self._files = patterns
                          pm = self.exact
                      elif patterns:
                          pats = _normalize(patterns, default, root, cwd, auditor)
                          self._files = _roots(pats)
                          self._anypats = self._anypats or _anypats(pats)
-                         self.patternspat, pm = _buildmatch(pats, '$')
+                         self.patternspat, pm = _buildmatch(ctx, pats, '$')
                      if patterns or exact:
                          if include:
                              if exclude:
                                  m = lambda f: im(f) and not em(f) and pm(f)
                              else:
                                  m = lambda f: im(f) and pm(f)
                          else:
                              if exclude:
                                  m = lambda f: not em(f) and pm(f)
                              else:
                                  m = pm
                      else:
                          if include:
                              if exclude:
                                  m = lambda f: im(f) and not em(f)
                              else:
                                  m = im
                          else:
                              if exclude:
                                  m = lambda f: not em(f)
                              else:
                                  m = lambda f: True
                      self.matchfn = m
                      self._fmap = set(self._files)
                  def __call__(self, fn):
                      return self.matchfn(fn)
                  def __iter__(self):
                      for f in self._files:
                          yield f
                  def bad(self, f, msg):
                      '''callback for each explicit file that can't be
                      found/accessed, with an error message
                      '''
                      pass
                  def dir(self, f):
                      pass
                  def missing(self, f):
                      pass
                  def exact(self, f):
                      return f in self._fmap
                  def rel(self, f):
                      return util.pathto(self._root, self._cwd, f)
                  def files(self):
                      return self._files
                  def anypats(self):
                      return self._anypats
              class exact(match):
                  def __init__(self, root, cwd, files):
                      match.__init__(self, root, cwd, files, exact = True)
              class always(match):
                  def __init__(self, root, cwd):
                      match.__init__(self, root, cwd, [])
              class narrowmatcher(match):
                  """Adapt a matcher to work on a subdirectory only.
                  The paths are remapped to remove/insert the path as needed:
                  >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
                  >>> m2 = narrowmatcher('sub', m1)
                  >>> bool(m2('a.txt'))
                  False
                  >>> bool(m2('b.txt'))
                  True
                  >>> bool(m2.matchfn('a.txt'))
                  False
                  >>> bool(m2.matchfn('b.txt'))
                  True
                  >>> m2.files()
                  ['b.txt']
                  >>> m2.exact('b.txt')
                  True
                  >>> m2.rel('b.txt')
                  'b.txt'
                  >>> def bad(f, msg):
                  ...     print "%s: %s" % (f, msg)
                  >>> m1.bad = bad
                  >>> m2.bad('x.txt', 'No such file')
                  sub/x.txt: No such file
                  """
                  def __init__(self, path, matcher):
                      self._root = matcher._root
                      self._cwd = matcher._cwd
                      self._path = path
                      self._matcher = matcher
                      self._files = [f[len(path) + 1:] for f in matcher._files
                                     if f.startswith(path + "/")]
                      self._anypats = matcher._anypats
                      self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
                      self._fmap = set(self._files)
                  def bad(self, f, msg):
                      self._matcher.bad(self._path + "/" + f, msg)
              def patkind(pat):
                  return _patsplit(pat, None)[0]
              def _patsplit(pat, default):
                  """Split a string into an optional pattern kind prefix and the
                  actual pattern."""
                  if ':' in pat:
                      kind, val = pat.split(':', 1)
                      if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
-                                 'listfile', 'listfile0'):
+                                 'listfile', 'listfile0', 'set'):
                          return kind, val
                  return default, pat
              def _globre(pat):
                  "convert a glob pattern into a regexp"
                  i, n = 0, len(pat)
                  res = ''
                  group = 0
                  escape = re.escape
                  def peek():
                      return i < n and pat[i]
                  while i < n:
                      c = pat[i]
                      i += 1
                      if c not in '*?[{},\\':
                          res += escape(c)
                      elif c == '*':
                          if peek() == '*':
                              i += 1
                              res += '.*'
                          else:
                              res += '[^/]*'
                      elif c == '?':
                          res += '.'
                      elif c == '[':
                          j = i
                          if j < n and pat[j] in '!]':
                              j += 1
                          while j < n and pat[j] != ']':
                              j += 1
                          if j >= n:
                              res += '\\['
                          else:
                              stuff = pat[i:j].replace('\\','\\\\')
                              i = j + 1
                              if stuff[0] == '!':
                                  stuff = '^' + stuff[1:]
                              elif stuff[0] == '^':
                                  stuff = '\\' + stuff
                              res = '%s[%s]' % (res, stuff)
                      elif c == '{':
                          group += 1
                          res += '(?:'
                      elif c == '}' and group:
                          res += ')'
                          group -= 1
                      elif c == ',' and group:
                          res += '|'
                      elif c == '\\':
                          p = peek()
                          if p:
                              i += 1
                              res += escape(p)
                          else:
                              res += escape(c)
                      else:
                          res += escape(c)
                  return res
              def _regex(kind, name, tail):
                  '''convert a pattern into a regular expression'''
                  if not name:
                      return ''
                  if kind == 're':
                      return name
                  elif kind == 'path':
                      return '^' + re.escape(name) + '(?:/|$)'
                  elif kind == 'relglob':
                      return '(?:|.*/)' + _globre(name) + tail
                  elif kind == 'relpath':
                      return re.escape(name) + '(?:/|$)'
                  elif kind == 'relre':
                      if name.startswith('^'):
                          return name
                      return '.*' + name
                  return _globre(name) + tail
-             def _buildmatch(pats, tail):
+             def _buildmatch(ctx, pats, tail):
+                 fset, pats = _expandsets(pats, ctx)
+                 if not pats:
+                     return "", fset.__contains__
+                 pat, mf = _buildregexmatch(pats, tail)
+                 if fset:
+                     return pat, lambda f: f in fset or mf(f)
+                 return pat, mf
+             def _buildregexmatch(pats, tail):
                  """build a matching function from a set of patterns"""
                  try:
                      pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
                      if len(pat) > 20000:
                          raise OverflowError()
                      return pat, re.compile(pat).match
                  except OverflowError:
                      # We're using a Python with a tiny regex engine and we
                      # made it explode, so we'll divide the pattern list in two
                      # until it works
                      l = len(pats)
                      if l < 2:
                          raise
                      pata, a = _buildmatch(pats[:l//2], tail)
                      patb, b = _buildmatch(pats[l//2:], tail)
                      return pat, lambda s: a(s) or b(s)
                  except re.error:
                      for k, p in pats:
                          try:
                              re.compile('(?:%s)' % _regex(k, p, tail))
                          except re.error:
                              raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
                      raise util.Abort(_("invalid pattern"))
              def _normalize(names, default, root, cwd, auditor):
                  pats = []
                  for kind, name in [_patsplit(p, default) for p in names]:
                      if kind in ('glob', 'relpath'):
                          name = scmutil.canonpath(root, cwd, name, auditor)
                      elif kind in ('relglob', 'path'):
                          name = util.normpath(name)
                      elif kind in ('listfile', 'listfile0'):
                          try:
                              files = util.readfile(name)
                              if kind == 'listfile0':
                                  files = files.split('\0')
                              else:
                                  files = files.splitlines()
                              files = [f for f in files if f]
                          except EnvironmentError:
                              raise util.Abort(_("unable to read file list (%s)") % name)
                          pats += _normalize(files, default, root, cwd, auditor)
                          continue
                      pats.append((kind, name))
                  return pats
              def _roots(patterns):
                  r = []
                  for kind, name in patterns:
                      if kind == 'glob': # find the non-glob prefix
                          root = []
                          for p in name.split('/'):
                              if '[' in p or '{' in p or '*' in p or '?' in p:
                                  break
                              root.append(p)
                          r.append('/'.join(root) or '.')
                      elif kind in ('relpath', 'path'):
                          r.append(name or '.')
                      elif kind == 'relglob':
                          r.append('.')
                  return r
              def _anypats(patterns):
                  for kind, name in patterns:
                      if kind in ('glob', 're', 'relglob', 'relre'):
                          return True

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages