upstream/mercurial-mirror Commit - r21112:03782d2f

match: _globre doctests

Mads Kiilerich -

r21112:03782d2f default

parent child

mercurial/match.py

0 +15 -1

              # match.py - filename matching
              #
              #  Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              import re
              import util, pathutil
              from i18n import _
              def _rematcher(regex):
                  '''compile the regexp with the best available regexp engine and return a
                  matcher function'''
                  m = util.compilere(regex)
                  try:
                      # slightly faster, provided by facebook's re2 bindings
                      return m.test_match
                  except AttributeError:
                      return m.match
              def _expandsets(kindpats, ctx):
                  '''Returns the kindpats list with the 'set' patterns expanded.'''
                  fset = set()
                  other = []
                  for kind, pat in kindpats:
                      if kind == 'set':
                          if not ctx:
                              raise util.Abort("fileset expression with no context")
                          s = ctx.getfileset(pat)
                          fset.update(s)
                          continue
                      other.append((kind, pat))
                  return fset, other
              class match(object):
                  def __init__(self, root, cwd, patterns, include=[], exclude=[],
                               default='glob', exact=False, auditor=None, ctx=None):
                      """build an object to match a set of file patterns
                      arguments:
                      root - the canonical root of the tree you're matching against
                      cwd - the current working directory, if relevant
                      patterns - patterns to find
                      include - patterns to include (unless they are excluded)
                      exclude - patterns to exclude (even if they are included)
                      default - if a pattern in patterns has no explicit type, assume this one
                      exact - patterns are actually filenames (include/exclude still apply)
                      a pattern is one of:
                      'glob:<glob>' - a glob relative to cwd
                      're:<regexp>' - a regular expression
                      'path:<path>' - a path relative to repository root
                      'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
                      'relpath:<path>' - a path relative to cwd
                      'relre:<regexp>' - a regexp that needn't match the start of a name
                      'set:<fileset>' - a fileset expression
                      '<something>' - a pattern of the specified default type
                      """
                      self._root = root
                      self._cwd = cwd
                      self._files = [] # exact files and roots of patterns
                      self._anypats = bool(include or exclude)
                      self._ctx = ctx
                      self._always = False
                      if include:
                          kindpats = _normalize(include, 'glob', root, cwd, auditor)
                          self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)')
                      if exclude:
                          kindpats = _normalize(exclude, 'glob', root, cwd, auditor)
                          self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)')
                      if exact:
                          if isinstance(patterns, list):
                              self._files = patterns
                          else:
                              self._files = list(patterns)
                          pm = self.exact
                      elif patterns:
                          kindpats = _normalize(patterns, default, root, cwd, auditor)
                          self._files = _roots(kindpats)
                          self._anypats = self._anypats or _anypats(kindpats)
                          self.patternspat, pm = _buildmatch(ctx, kindpats, '$')
                      if patterns or exact:
                          if include:
                              if exclude:
                                  m = lambda f: im(f) and not em(f) and pm(f)
                              else:
                                  m = lambda f: im(f) and pm(f)
                          else:
                              if exclude:
                                  m = lambda f: not em(f) and pm(f)
                              else:
                                  m = pm
                      else:
                          if include:
                              if exclude:
                                  m = lambda f: im(f) and not em(f)
                              else:
                                  m = im
                          else:
                              if exclude:
                                  m = lambda f: not em(f)
                              else:
                                  m = lambda f: True
                                  self._always = True
                      self.matchfn = m
                      self._fmap = set(self._files)
                  def __call__(self, fn):
                      return self.matchfn(fn)
                  def __iter__(self):
                      for f in self._files:
                          yield f
                  # Callbacks related to how the matcher is used by dirstate.walk.
                  # Subscribers to these events must monkeypatch the matcher object.
                  def bad(self, f, msg):
                      '''Callback from dirstate.walk for each explicit file that can't be
                      found/accessed, with an error message.'''
                      pass
                  # If an explicitdir is set, it will be called when an explicitly listed
                  # directory is visited.
                  explicitdir = None
                  # If an traversedir is set, it will be called when a directory discovered
                  # by recursive traversal is visited.
                  traversedir = None
                  def missing(self, f):
                      pass
                  def rel(self, f):
                      '''Convert repo path back to path that is relative to cwd of matcher.'''
                      return util.pathto(self._root, self._cwd, f)
                  def files(self):
                      '''Explicitly listed files or patterns or roots:
                      if no patterns or .always(): empty list,
                      if exact: list exact files,
                      if not .anypats(): list all files and dirs,
                      else: optimal roots'''
                      return self._files
                  def exact(self, f):
                      '''Returns True if f is in .files().'''
                      return f in self._fmap
                  def anypats(self):
                      '''Matcher uses patterns or include/exclude.'''
                      return self._anypats
                  def always(self):
                      '''Matcher will match everything and .files() will be empty
                      - optimization might be possible and necessary.'''
                      return self._always
              class exact(match):
                  def __init__(self, root, cwd, files):
                      match.__init__(self, root, cwd, files, exact=True)
              class always(match):
                  def __init__(self, root, cwd):
                      match.__init__(self, root, cwd, [])
                      self._always = True
              class narrowmatcher(match):
                  """Adapt a matcher to work on a subdirectory only.
                  The paths are remapped to remove/insert the path as needed:
                  >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
                  >>> m2 = narrowmatcher('sub', m1)
                  >>> bool(m2('a.txt'))
                  False
                  >>> bool(m2('b.txt'))
                  True
                  >>> bool(m2.matchfn('a.txt'))
                  False
                  >>> bool(m2.matchfn('b.txt'))
                  True
                  >>> m2.files()
                  ['b.txt']
                  >>> m2.exact('b.txt')
                  True
                  >>> m2.rel('b.txt')
                  'b.txt'
                  >>> def bad(f, msg):
                  ...     print "%s: %s" % (f, msg)
                  >>> m1.bad = bad
                  >>> m2.bad('x.txt', 'No such file')
                  sub/x.txt: No such file
                  """
                  def __init__(self, path, matcher):
                      self._root = matcher._root
                      self._cwd = matcher._cwd
                      self._path = path
                      self._matcher = matcher
                      self._always = matcher._always
                      self._files = [f[len(path) + 1:] for f in matcher._files
                                     if f.startswith(path + "/")]
                      self._anypats = matcher._anypats
                      self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
                      self._fmap = set(self._files)
                  def bad(self, f, msg):
                      self._matcher.bad(self._path + "/" + f, msg)
              def patkind(pattern, default=None):
                  '''If pattern is 'kind:pat' with a known kind, return kind.'''
                  return _patsplit(pattern, default)[0]
              def _patsplit(pattern, default):
                  """Split a string into the optional pattern kind prefix and the actual
                  pattern."""
                  if ':' in pattern:
                      kind, pat = pattern.split(':', 1)
                      if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
                                  'listfile', 'listfile0', 'set'):
                          return kind, pat
                  return default, pattern
              def _globre(pat):
-                 '''Convert an extended glob string to a regexp string.'''
+                 r'''Convert an extended glob string to a regexp string.
+                 >>> print _globre(r'?')
+                 .
+                 >>> print _globre(r'*')
+                 [^/]*
+                 >>> print _globre(r'**')
+                 .*
+                 >>> print _globre(r'[a*?!^][^b][!c]')
+                 [a*?!^][\^b][^c]
+                 >>> print _globre(r'{a,b}')
+                 (?:a|b)
+                 >>> print _globre(r'.\*\?')
+                 \.\*\?
+                 '''
                  i, n = 0, len(pat)
                  res = ''
                  group = 0
                  escape = re.escape
                  def peek():
                      return i < n and pat[i]
                  while i < n:
                      c = pat[i]
                      i += 1
                      if c not in '*?[{},\\':
                          res += escape(c)
                      elif c == '*':
                          if peek() == '*':
                              i += 1
                              res += '.*'
                          else:
                              res += '[^/]*'
                      elif c == '?':
                          res += '.'
                      elif c == '[':
                          j = i
                          if j < n and pat[j] in '!]':
                              j += 1
                          while j < n and pat[j] != ']':
                              j += 1
                          if j >= n:
                              res += '\\['
                          else:
                              stuff = pat[i:j].replace('\\','\\\\')
                              i = j + 1
                              if stuff[0] == '!':
                                  stuff = '^' + stuff[1:]
                              elif stuff[0] == '^':
                                  stuff = '\\' + stuff
                              res = '%s[%s]' % (res, stuff)
                      elif c == '{':
                          group += 1
                          res += '(?:'
                      elif c == '}' and group:
                          res += ')'
                          group -= 1
                      elif c == ',' and group:
                          res += '|'
                      elif c == '\\':
                          p = peek()
                          if p:
                              i += 1
                              res += escape(p)
                          else:
                              res += escape(c)
                      else:
                          res += escape(c)
                  return res
              def _regex(kind, pat, globsuffix):
                  '''Convert a (normalized) pattern of any kind into a regular expression.
                  globsuffix is appended to the regexp of globs.'''
                  if not pat:
                      return ''
                  if kind == 're':
                      return pat
                  if kind == 'path':
                      return '^' + re.escape(pat) + '(?:/|$)'
                  if kind == 'relglob':
                      return '(?:|.*/)' + _globre(pat) + globsuffix
                  if kind == 'relpath':
                      return re.escape(pat) + '(?:/|$)'
                  if kind == 'relre':
                      if pat.startswith('^'):
                          return pat
                      return '.*' + pat
                  return _globre(pat) + globsuffix
              def _buildmatch(ctx, kindpats, globsuffix):
                  '''Return regexp string and a matcher function for kindpats.
                  globsuffix is appended to the regexp of globs.'''
                  fset, kindpats = _expandsets(kindpats, ctx)
                  if not kindpats:
                      return "", fset.__contains__
                  regex, mf = _buildregexmatch(kindpats, globsuffix)
                  if fset:
                      return regex, lambda f: f in fset or mf(f)
                  return regex, mf
              def _buildregexmatch(kindpats, globsuffix):
                  """Build a match function from a list of kinds and kindpats,
                  return regexp string and a matcher function."""
                  try:
                      regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
                                                   for (k, p) in kindpats])
                      if len(regex) > 20000:
                          raise OverflowError
                      return regex, _rematcher(regex)
                  except OverflowError:
                      # We're using a Python with a tiny regex engine and we
                      # made it explode, so we'll divide the pattern list in two
                      # until it works
                      l = len(kindpats)
                      if l < 2:
                          raise
                      regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
                      regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
                      return pat, lambda s: a(s) or b(s)
                  except re.error:
                      for k, p in kindpats:
                          try:
                              _rematcher('(?:%s)' % _regex(k, p, globsuffix))
                          except re.error:
                              raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
                      raise util.Abort(_("invalid pattern"))
              def _normalize(patterns, default, root, cwd, auditor):
                  '''Convert 'kind:pat' from the patterns list to tuples with kind and
                  normalized and rooted patterns and with listfiles expanded.'''
                  kindpats = []
                  for kind, pat in [_patsplit(p, default) for p in patterns]:
                      if kind in ('glob', 'relpath'):
                          pat = pathutil.canonpath(root, cwd, pat, auditor)
                      elif kind in ('relglob', 'path'):
                          pat = util.normpath(pat)
                      elif kind in ('listfile', 'listfile0'):
                          try:
                              files = util.readfile(pat)
                              if kind == 'listfile0':
                                  files = files.split('\0')
                              else:
                                  files = files.splitlines()
                              files = [f for f in files if f]
                          except EnvironmentError:
                              raise util.Abort(_("unable to read file list (%s)") % pat)
                          kindpats += _normalize(files, default, root, cwd, auditor)
                          continue
                      # else: re or relre - which cannot be normalized
                      kindpats.append((kind, pat))
                  return kindpats
              def _roots(kindpats):
                  '''return roots and exact explicitly listed files from patterns
                  >>> _roots([('glob', 'g/*'), ('glob', 'g'), ('glob', 'g*')])
                  ['g', 'g', '.']
                  >>> _roots([('relpath', 'r'), ('path', 'p/p'), ('path', '')])
                  ['r', 'p/p', '.']
                  >>> _roots([('relglob', 'rg*'), ('re', 're/'), ('relre', 'rr')])
                  ['.', '.', '.']
                  '''
                  r = []
                  for kind, pat in kindpats:
                      if kind == 'glob': # find the non-glob prefix
                          root = []
                          for p in pat.split('/'):
                              if '[' in p or '{' in p or '*' in p or '?' in p:
                                  break
                              root.append(p)
                          r.append('/'.join(root) or '.')
                      elif kind in ('relpath', 'path'):
                          r.append(pat or '.')
                      else: # relglob, re, relre
                          r.append('.')
                  return r
              def _anypats(kindpats):
                  for kind, pat in kindpats:
                      if kind in ('glob', 're', 'relglob', 'relre', 'set'):
                          return True

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages