##// END OF EJS Templates
match: make glob '**/' match the empty string...
match: make glob '**/' match the empty string Previously, a glob pattern of the form 'foo/**/bar' would match 'foo/a/bar' but not 'foo/bar'. That was because the '**' in 'foo/**/bar' would be translated to '.*', making the final regex pattern 'foo/.*/bar'. That pattern doesn't match the string 'foo/bar'. This is a bug because the '**/' glob matches the empty string in standard Unix shells like bash and zsh. Fix that by making the ending '/' optional if an empty string can be matched.

File last commit:

r21815:a4b67bf1 stable
r21815:a4b67bf1 stable
Show More
match.py
416 lines | 13.4 KiB | text/x-python | PythonLexer
timeless
Generally replace "file name" with "filename" in help and comments.
r8761 # match.py - filename matching
Martin Geisler
match: add copyright and license header
r8231 #
# Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
#
# This software may be used and distributed according to the terms of the
Matt Mackall
Update license to GPLv2+
r10263 # GNU General Public License version 2 or any later version.
Martin Geisler
match: add copyright and license header
r8231
Alejandro Santos
split local and stdlib module imports (eases migration issues)
r9036 import re
Augie Fackler
match: use ctx.getfileset() instead of fileset.getfileset()...
r20401 import util, pathutil
Martin Geisler
match: mark error messages for translation
r12133 from i18n import _
Matt Mackall
walk: introduce match objects
r6576
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 def _rematcher(regex):
'''compile the regexp with the best available regexp engine and return a
matcher function'''
m = util.compilere(regex)
Bryan O'Sullivan
matcher: use re2 bindings if available...
r16943 try:
# slightly faster, provided by facebook's re2 bindings
return m.test_match
except AttributeError:
return m.match
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 def _expandsets(kindpats, ctx):
'''Returns the kindpats list with the 'set' patterns expanded.'''
Matt Mackall
match: introduce basic fileset support
r14675 fset = set()
other = []
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 for kind, pat in kindpats:
Matt Mackall
match: introduce basic fileset support
r14675 if kind == 'set':
if not ctx:
raise util.Abort("fileset expression with no context")
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 s = ctx.getfileset(pat)
Matt Mackall
match: introduce basic fileset support
r14675 fset.update(s)
continue
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 other.append((kind, pat))
Matt Mackall
match: introduce basic fileset support
r14675 return fset, other
Matt Mackall
match: fold match into _match base class
r8587 class match(object):
Matt Mackall
match: add some default args
r8567 def __init__(self, root, cwd, patterns, include=[], exclude=[],
Matt Mackall
match: allow passing a context object to match core
r14674 default='glob', exact=False, auditor=None, ctx=None):
Matt Mackall
match: fold _matcher into match.__init__
r8581 """build an object to match a set of file patterns
arguments:
root - the canonical root of the tree you're matching against
cwd - the current working directory, if relevant
patterns - patterns to find
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 include - patterns to include (unless they are excluded)
exclude - patterns to exclude (even if they are included)
default - if a pattern in patterns has no explicit type, assume this one
exact - patterns are actually filenames (include/exclude still apply)
Matt Mackall
match: fold _matcher into match.__init__
r8581
a pattern is one of:
'glob:<glob>' - a glob relative to cwd
're:<regexp>' - a regular expression
Mads Kiilerich
fix wording and not-completely-trivial spelling errors and bad docstrings
r17425 'path:<path>' - a path relative to repository root
Matt Mackall
match: fold _matcher into match.__init__
r8581 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
'relpath:<path>' - a path relative to cwd
Matt Mackall
match: fold match into _match base class
r8587 'relre:<regexp>' - a regexp that needn't match the start of a name
Matt Mackall
match: introduce basic fileset support
r14675 'set:<fileset>' - a fileset expression
Matt Mackall
match: fold match into _match base class
r8587 '<something>' - a pattern of the specified default type
Matt Mackall
match: fold _matcher into match.__init__
r8581 """
Matt Mackall
match: fold match into _match base class
r8587 self._root = root
self._cwd = cwd
Mads Kiilerich
match: make it more clear what _roots do and that it ends up in match()._files
r21079 self._files = [] # exact files and roots of patterns
Matt Mackall
match: fold match into _match base class
r8587 self._anypats = bool(include or exclude)
Matt Mackall
match: allow passing a context object to match core
r14674 self._ctx = ctx
Bryan O'Sullivan
match: more accurately report when we're always going to match...
r18713 self._always = False
Matt Mackall
match: fold _matcher into match.__init__
r8581
Matt Mackall
match: add exact flag to match() to unify all match forms
r8586 if include:
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 kindpats = _normalize(include, 'glob', root, cwd, auditor)
self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)')
Matt Mackall
match: add exact flag to match() to unify all match forms
r8586 if exclude:
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 kindpats = _normalize(exclude, 'glob', root, cwd, auditor)
self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)')
Matt Mackall
match: add exact flag to match() to unify all match forms
r8586 if exact:
FUJIWARA Katsunori
match: make 'match.files()' return list object always...
r16789 if isinstance(patterns, list):
self._files = patterns
else:
self._files = list(patterns)
Matt Mackall
match: add exact flag to match() to unify all match forms
r8586 pm = self.exact
elif patterns:
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 kindpats = _normalize(patterns, default, root, cwd, auditor)
self._files = _roots(kindpats)
self._anypats = self._anypats or _anypats(kindpats)
self.patternspat, pm = _buildmatch(ctx, kindpats, '$')
Matt Mackall
match: fold _matcher into match.__init__
r8581
Matt Mackall
match: add exact flag to match() to unify all match forms
r8586 if patterns or exact:
Matt Mackall
match: fold _matcher into match.__init__
r8581 if include:
if exclude:
m = lambda f: im(f) and not em(f) and pm(f)
else:
m = lambda f: im(f) and pm(f)
else:
if exclude:
m = lambda f: not em(f) and pm(f)
else:
m = pm
else:
if include:
if exclude:
m = lambda f: im(f) and not em(f)
else:
m = im
else:
if exclude:
m = lambda f: not em(f)
else:
m = lambda f: True
Bryan O'Sullivan
match: more accurately report when we're always going to match...
r18713 self._always = True
Matt Mackall
match: fold _matcher into match.__init__
r8581
Matt Mackall
match: fold match into _match base class
r8587 self.matchfn = m
self._fmap = set(self._files)
def __call__(self, fn):
return self.matchfn(fn)
def __iter__(self):
for f in self._files:
yield f
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111
# Callbacks related to how the matcher is used by dirstate.walk.
# Subscribers to these events must monkeypatch the matcher object.
Matt Mackall
match: fold match into _match base class
r8587 def bad(self, f, msg):
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 '''Callback from dirstate.walk for each explicit file that can't be
found/accessed, with an error message.'''
Matt Mackall
match: ignore return of match.bad...
r8680 pass
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111
# If an explicitdir is set, it will be called when an explicitly listed
# directory is visited.
Siddharth Agarwal
match: make explicitdir and traversedir None by default...
r19143 explicitdir = None
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111
# If an traversedir is set, it will be called when a directory discovered
# by recursive traversal is visited.
Siddharth Agarwal
match: make explicitdir and traversedir None by default...
r19143 traversedir = None
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111
Matt Mackall
match: fold match into _match base class
r8587 def rel(self, f):
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 '''Convert repo path back to path that is relative to cwd of matcher.'''
Matt Mackall
match: fold match into _match base class
r8587 return util.pathto(self._root, self._cwd, f)
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111
Matt Mackall
match: fold match into _match base class
r8587 def files(self):
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 '''Explicitly listed files or patterns or roots:
if no patterns or .always(): empty list,
if exact: list exact files,
if not .anypats(): list all files and dirs,
else: optimal roots'''
Matt Mackall
match: fold match into _match base class
r8587 return self._files
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111
def exact(self, f):
'''Returns True if f is in .files().'''
return f in self._fmap
Matt Mackall
match: fold match into _match base class
r8587 def anypats(self):
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 '''Matcher uses patterns or include/exclude.'''
Matt Mackall
match: fold match into _match base class
r8587 return self._anypats
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111
Jesse Glick
localrepo: optimize internode status calls using match.always...
r16645 def always(self):
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 '''Matcher will match everything and .files() will be empty
- optimization might be possible and necessary.'''
Bryan O'Sullivan
match: more accurately report when we're always going to match...
r18713 return self._always
Matt Mackall
match: refactor patkind...
r8568
Matt Mackall
match: add exact flag to match() to unify all match forms
r8586 class exact(match):
Matt Mackall
match: redefine always and never in terms of match and exact
r8585 def __init__(self, root, cwd, files):
Mads Kiilerich
check-code: check for spaces around = for named parameters
r19872 match.__init__(self, root, cwd, files, exact=True)
Matt Mackall
match: redefine always and never in terms of match and exact
r8585
class always(match):
def __init__(self, root, cwd):
match.__init__(self, root, cwd, [])
Bryan O'Sullivan
match: more accurately report when we're always going to match...
r18713 self._always = True
Matt Mackall
match: redefine always and never in terms of match and exact
r8585
Martin Geisler
match: add narrowmatcher class...
r12165 class narrowmatcher(match):
"""Adapt a matcher to work on a subdirectory only.
The paths are remapped to remove/insert the path as needed:
>>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
>>> m2 = narrowmatcher('sub', m1)
>>> bool(m2('a.txt'))
False
>>> bool(m2('b.txt'))
True
>>> bool(m2.matchfn('a.txt'))
False
>>> bool(m2.matchfn('b.txt'))
True
>>> m2.files()
['b.txt']
>>> m2.exact('b.txt')
True
Martin Geisler
narrowmatcher: fix broken rel method
r12267 >>> m2.rel('b.txt')
'b.txt'
Martin Geisler
narrowmatcher: propagate bad method...
r12268 >>> def bad(f, msg):
... print "%s: %s" % (f, msg)
>>> m1.bad = bad
>>> m2.bad('x.txt', 'No such file')
sub/x.txt: No such file
Martin Geisler
match: add narrowmatcher class...
r12165 """
def __init__(self, path, matcher):
Martin Geisler
narrowmatcher: fix broken rel method
r12267 self._root = matcher._root
self._cwd = matcher._cwd
Martin Geisler
match: add narrowmatcher class...
r12165 self._path = path
self._matcher = matcher
Bryan O'Sullivan
match: more accurately report when we're always going to match...
r18713 self._always = matcher._always
Martin Geisler
match: add narrowmatcher class...
r12165
self._files = [f[len(path) + 1:] for f in matcher._files
if f.startswith(path + "/")]
self._anypats = matcher._anypats
self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
self._fmap = set(self._files)
Martin Geisler
narrowmatcher: propagate bad method...
r12268 def bad(self, f, msg):
self._matcher.bad(self._path + "/" + f, msg)
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 def patkind(pattern, default=None):
'''If pattern is 'kind:pat' with a known kind, return kind.'''
return _patsplit(pattern, default)[0]
Matt Mackall
match: move util match functions over
r8570
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 def _patsplit(pattern, default):
"""Split a string into the optional pattern kind prefix and the actual
pattern."""
if ':' in pattern:
kind, pat = pattern.split(':', 1)
Steve Borho
match: support reading pattern lists from files
r13218 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
Matt Mackall
match: introduce basic fileset support
r14675 'listfile', 'listfile0', 'set'):
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 return kind, pat
return default, pattern
Matt Mackall
match: move util match functions over
r8570
Matt Mackall
match: remove head and tail args from _globre
r8582 def _globre(pat):
Mads Kiilerich
match: _globre doctests
r21112 r'''Convert an extended glob string to a regexp string.
>>> print _globre(r'?')
.
>>> print _globre(r'*')
[^/]*
>>> print _globre(r'**')
.*
Siddharth Agarwal
match: make glob '**/' match the empty string...
r21815 >>> print _globre(r'**/a')
(?:.*/)?a
>>> print _globre(r'a/**/b')
a\/(?:.*/)?b
Mads Kiilerich
match: _globre doctests
r21112 >>> print _globre(r'[a*?!^][^b][!c]')
[a*?!^][\^b][^c]
>>> print _globre(r'{a,b}')
(?:a|b)
>>> print _globre(r'.\*\?')
\.\*\?
'''
Matt Mackall
match: move util match functions over
r8570 i, n = 0, len(pat)
res = ''
group = 0
Matt Mackall
match: optimize escaping in _globre...
r8583 escape = re.escape
Matt Mackall
many, many trivial check-code fixups
r10282 def peek():
return i < n and pat[i]
Matt Mackall
match: move util match functions over
r8570 while i < n:
c = pat[i]
Matt Mackall
many, many trivial check-code fixups
r10282 i += 1
Matt Mackall
match: optimize escaping in _globre...
r8583 if c not in '*?[{},\\':
res += escape(c)
elif c == '*':
Matt Mackall
match: move util match functions over
r8570 if peek() == '*':
i += 1
Siddharth Agarwal
match: make glob '**/' match the empty string...
r21815 if peek() == '/':
i += 1
res += '(?:.*/)?'
else:
res += '.*'
Matt Mackall
match: move util match functions over
r8570 else:
res += '[^/]*'
elif c == '?':
res += '.'
elif c == '[':
j = i
if j < n and pat[j] in '!]':
j += 1
while j < n and pat[j] != ']':
j += 1
if j >= n:
res += '\\['
else:
stuff = pat[i:j].replace('\\','\\\\')
i = j + 1
if stuff[0] == '!':
stuff = '^' + stuff[1:]
elif stuff[0] == '^':
stuff = '\\' + stuff
res = '%s[%s]' % (res, stuff)
elif c == '{':
group += 1
res += '(?:'
elif c == '}' and group:
res += ')'
group -= 1
elif c == ',' and group:
res += '|'
elif c == '\\':
p = peek()
if p:
i += 1
Matt Mackall
match: optimize escaping in _globre...
r8583 res += escape(p)
Matt Mackall
match: move util match functions over
r8570 else:
Matt Mackall
match: optimize escaping in _globre...
r8583 res += escape(c)
Matt Mackall
match: move util match functions over
r8570 else:
Matt Mackall
match: optimize escaping in _globre...
r8583 res += escape(c)
Matt Mackall
match: remove head and tail args from _globre
r8582 return res
Matt Mackall
match: move util match functions over
r8570
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 def _regex(kind, pat, globsuffix):
'''Convert a (normalized) pattern of any kind into a regular expression.
globsuffix is appended to the regexp of globs.'''
if not pat:
Matt Mackall
match: unnest functions in _matcher
r8574 return ''
if kind == 're':
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 return pat
if kind == 'path':
return '^' + re.escape(pat) + '(?:/|$)'
if kind == 'relglob':
return '(?:|.*/)' + _globre(pat) + globsuffix
if kind == 'relpath':
return re.escape(pat) + '(?:/|$)'
if kind == 'relre':
if pat.startswith('^'):
return pat
return '.*' + pat
return _globre(pat) + globsuffix
Matt Mackall
match: unnest functions in _matcher
r8574
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 def _buildmatch(ctx, kindpats, globsuffix):
'''Return regexp string and a matcher function for kindpats.
globsuffix is appended to the regexp of globs.'''
fset, kindpats = _expandsets(kindpats, ctx)
if not kindpats:
Matt Mackall
match: introduce basic fileset support
r14675 return "", fset.__contains__
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 regex, mf = _buildregexmatch(kindpats, globsuffix)
Matt Mackall
match: introduce basic fileset support
r14675 if fset:
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 return regex, lambda f: f in fset or mf(f)
return regex, mf
Matt Mackall
match: introduce basic fileset support
r14675
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 def _buildregexmatch(kindpats, globsuffix):
"""Build a match function from a list of kinds and kindpats,
return regexp string and a matcher function."""
Matt Mackall
match: unnest functions in _matcher
r8574 try:
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
for (k, p) in kindpats])
if len(regex) > 20000:
Brodie Rao
cleanup: "raise SomeException()" -> "raise SomeException"
r16687 raise OverflowError
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 return regex, _rematcher(regex)
Matt Mackall
match: unnest functions in _matcher
r8574 except OverflowError:
# We're using a Python with a tiny regex engine and we
# made it explode, so we'll divide the pattern list in two
# until it works
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 l = len(kindpats)
Matt Mackall
match: unnest functions in _matcher
r8574 if l < 2:
raise
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
Yuya Nishihara
match: fix NameError 'pat' on overflow of regex pattern length...
r21191 return regex, lambda s: a(s) or b(s)
Matt Mackall
match: unnest functions in _matcher
r8574 except re.error:
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 for k, p in kindpats:
Matt Mackall
match: unnest functions in _matcher
r8574 try:
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 _rematcher('(?:%s)' % _regex(k, p, globsuffix))
Matt Mackall
match: unnest functions in _matcher
r8574 except re.error:
Martin Geisler
match: mark error messages for translation
r12133 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
raise util.Abort(_("invalid pattern"))
Matt Mackall
match: unnest functions in _matcher
r8574
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 def _normalize(patterns, default, root, cwd, auditor):
'''Convert 'kind:pat' from the patterns list to tuples with kind and
normalized and rooted patterns and with listfiles expanded.'''
kindpats = []
for kind, pat in [_patsplit(p, default) for p in patterns]:
Matt Mackall
match: unnest functions in _matcher
r8574 if kind in ('glob', 'relpath'):
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 pat = pathutil.canonpath(root, cwd, pat, auditor)
Matt Mackall
match: unnest functions in _matcher
r8574 elif kind in ('relglob', 'path'):
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 pat = util.normpath(pat)
Steve Borho
match: support reading pattern lists from files
r13218 elif kind in ('listfile', 'listfile0'):
try:
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 files = util.readfile(pat)
Patrick Mezard
match: make 'listfile:' split on LF and CRLF...
r14248 if kind == 'listfile0':
files = files.split('\0')
else:
files = files.splitlines()
Steve Borho
match: support reading pattern lists from files
r13218 files = [f for f in files if f]
except EnvironmentError:
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 raise util.Abort(_("unable to read file list (%s)") % pat)
kindpats += _normalize(files, default, root, cwd, auditor)
Steve Borho
match: support reading pattern lists from files
r13218 continue
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 # else: re or relre - which cannot be normalized
kindpats.append((kind, pat))
return kindpats
Matt Mackall
match: unnest functions in _matcher
r8574
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 def _roots(kindpats):
Mads Kiilerich
match: make it more clear what _roots do and that it ends up in match()._files
r21079 '''return roots and exact explicitly listed files from patterns
>>> _roots([('glob', 'g/*'), ('glob', 'g'), ('glob', 'g*')])
['g', 'g', '.']
>>> _roots([('relpath', 'r'), ('path', 'p/p'), ('path', '')])
['r', 'p/p', '.']
>>> _roots([('relglob', 'rg*'), ('re', 're/'), ('relre', 'rr')])
['.', '.', '.']
'''
Matt Mackall
match: split up _normalizepats
r8576 r = []
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 for kind, pat in kindpats:
Matt Mackall
match: fold _globprefix into _roots
r8584 if kind == 'glob': # find the non-glob prefix
root = []
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 for p in pat.split('/'):
Matt Mackall
match: fold _globprefix into _roots
r8584 if '[' in p or '{' in p or '*' in p or '?' in p:
break
root.append(p)
r.append('/'.join(root) or '.')
Matt Mackall
match: unnest functions in _matcher
r8574 elif kind in ('relpath', 'path'):
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 r.append(pat or '.')
Mads Kiilerich
match: fix root calculation for combining regexps with simple paths...
r19107 else: # relglob, re, relre
Matt Mackall
match: split up _normalizepats
r8576 r.append('.')
return r
Mads Kiilerich
match: improve documentation - docstrings and more descriptive variable naming...
r21111 def _anypats(kindpats):
for kind, pat in kindpats:
Patrick Mezard
match: consider filesets as "anypats"...
r16182 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
Matt Mackall
match: split up _normalizepats
r8576 return True