|
|
# match.py - filename matching
|
|
|
#
|
|
|
# Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
|
|
|
#
|
|
|
# This software may be used and distributed according to the terms of the
|
|
|
# GNU General Public License version 2 or any later version.
|
|
|
|
|
|
import re
|
|
|
import util, pathutil
|
|
|
from i18n import _
|
|
|
|
|
|
def _rematcher(regex):
|
|
|
'''compile the regexp with the best available regexp engine and return a
|
|
|
matcher function'''
|
|
|
m = util.re.compile(regex)
|
|
|
try:
|
|
|
# slightly faster, provided by facebook's re2 bindings
|
|
|
return m.test_match
|
|
|
except AttributeError:
|
|
|
return m.match
|
|
|
|
|
|
def _expandsets(kindpats, ctx):
|
|
|
'''Returns the kindpats list with the 'set' patterns expanded.'''
|
|
|
fset = set()
|
|
|
other = []
|
|
|
|
|
|
for kind, pat in kindpats:
|
|
|
if kind == 'set':
|
|
|
if not ctx:
|
|
|
raise util.Abort("fileset expression with no context")
|
|
|
s = ctx.getfileset(pat)
|
|
|
fset.update(s)
|
|
|
continue
|
|
|
other.append((kind, pat))
|
|
|
return fset, other
|
|
|
|
|
|
class match(object):
|
|
|
def __init__(self, root, cwd, patterns, include=[], exclude=[],
|
|
|
default='glob', exact=False, auditor=None, ctx=None):
|
|
|
"""build an object to match a set of file patterns
|
|
|
|
|
|
arguments:
|
|
|
root - the canonical root of the tree you're matching against
|
|
|
cwd - the current working directory, if relevant
|
|
|
patterns - patterns to find
|
|
|
include - patterns to include (unless they are excluded)
|
|
|
exclude - patterns to exclude (even if they are included)
|
|
|
default - if a pattern in patterns has no explicit type, assume this one
|
|
|
exact - patterns are actually filenames (include/exclude still apply)
|
|
|
|
|
|
a pattern is one of:
|
|
|
'glob:<glob>' - a glob relative to cwd
|
|
|
're:<regexp>' - a regular expression
|
|
|
'path:<path>' - a path relative to repository root
|
|
|
'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
|
|
|
'relpath:<path>' - a path relative to cwd
|
|
|
'relre:<regexp>' - a regexp that needn't match the start of a name
|
|
|
'set:<fileset>' - a fileset expression
|
|
|
'<something>' - a pattern of the specified default type
|
|
|
"""
|
|
|
|
|
|
self._root = root
|
|
|
self._cwd = cwd
|
|
|
self._files = [] # exact files and roots of patterns
|
|
|
self._anypats = bool(include or exclude)
|
|
|
self._ctx = ctx
|
|
|
self._always = False
|
|
|
|
|
|
if include:
|
|
|
kindpats = _normalize(include, 'glob', root, cwd, auditor)
|
|
|
self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)')
|
|
|
if exclude:
|
|
|
kindpats = _normalize(exclude, 'glob', root, cwd, auditor)
|
|
|
self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)')
|
|
|
if exact:
|
|
|
if isinstance(patterns, list):
|
|
|
self._files = patterns
|
|
|
else:
|
|
|
self._files = list(patterns)
|
|
|
pm = self.exact
|
|
|
elif patterns:
|
|
|
kindpats = _normalize(patterns, default, root, cwd, auditor)
|
|
|
self._files = _roots(kindpats)
|
|
|
self._anypats = self._anypats or _anypats(kindpats)
|
|
|
self.patternspat, pm = _buildmatch(ctx, kindpats, '$')
|
|
|
|
|
|
if patterns or exact:
|
|
|
if include:
|
|
|
if exclude:
|
|
|
m = lambda f: im(f) and not em(f) and pm(f)
|
|
|
else:
|
|
|
m = lambda f: im(f) and pm(f)
|
|
|
else:
|
|
|
if exclude:
|
|
|
m = lambda f: not em(f) and pm(f)
|
|
|
else:
|
|
|
m = pm
|
|
|
else:
|
|
|
if include:
|
|
|
if exclude:
|
|
|
m = lambda f: im(f) and not em(f)
|
|
|
else:
|
|
|
m = im
|
|
|
else:
|
|
|
if exclude:
|
|
|
m = lambda f: not em(f)
|
|
|
else:
|
|
|
m = lambda f: True
|
|
|
self._always = True
|
|
|
|
|
|
self.matchfn = m
|
|
|
self._fmap = set(self._files)
|
|
|
|
|
|
def __call__(self, fn):
|
|
|
return self.matchfn(fn)
|
|
|
def __iter__(self):
|
|
|
for f in self._files:
|
|
|
yield f
|
|
|
|
|
|
# Callbacks related to how the matcher is used by dirstate.walk.
|
|
|
# Subscribers to these events must monkeypatch the matcher object.
|
|
|
def bad(self, f, msg):
|
|
|
'''Callback from dirstate.walk for each explicit file that can't be
|
|
|
found/accessed, with an error message.'''
|
|
|
pass
|
|
|
|
|
|
# If an explicitdir is set, it will be called when an explicitly listed
|
|
|
# directory is visited.
|
|
|
explicitdir = None
|
|
|
|
|
|
# If an traversedir is set, it will be called when a directory discovered
|
|
|
# by recursive traversal is visited.
|
|
|
traversedir = None
|
|
|
|
|
|
def rel(self, f):
|
|
|
'''Convert repo path back to path that is relative to cwd of matcher.'''
|
|
|
return util.pathto(self._root, self._cwd, f)
|
|
|
|
|
|
def files(self):
|
|
|
'''Explicitly listed files or patterns or roots:
|
|
|
if no patterns or .always(): empty list,
|
|
|
if exact: list exact files,
|
|
|
if not .anypats(): list all files and dirs,
|
|
|
else: optimal roots'''
|
|
|
return self._files
|
|
|
|
|
|
def exact(self, f):
|
|
|
'''Returns True if f is in .files().'''
|
|
|
return f in self._fmap
|
|
|
|
|
|
def anypats(self):
|
|
|
'''Matcher uses patterns or include/exclude.'''
|
|
|
return self._anypats
|
|
|
|
|
|
def always(self):
|
|
|
'''Matcher will match everything and .files() will be empty
|
|
|
- optimization might be possible and necessary.'''
|
|
|
return self._always
|
|
|
|
|
|
class exact(match):
|
|
|
def __init__(self, root, cwd, files):
|
|
|
match.__init__(self, root, cwd, files, exact=True)
|
|
|
|
|
|
class always(match):
|
|
|
def __init__(self, root, cwd):
|
|
|
match.__init__(self, root, cwd, [])
|
|
|
self._always = True
|
|
|
|
|
|
class narrowmatcher(match):
|
|
|
"""Adapt a matcher to work on a subdirectory only.
|
|
|
|
|
|
The paths are remapped to remove/insert the path as needed:
|
|
|
|
|
|
>>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
|
|
|
>>> m2 = narrowmatcher('sub', m1)
|
|
|
>>> bool(m2('a.txt'))
|
|
|
False
|
|
|
>>> bool(m2('b.txt'))
|
|
|
True
|
|
|
>>> bool(m2.matchfn('a.txt'))
|
|
|
False
|
|
|
>>> bool(m2.matchfn('b.txt'))
|
|
|
True
|
|
|
>>> m2.files()
|
|
|
['b.txt']
|
|
|
>>> m2.exact('b.txt')
|
|
|
True
|
|
|
>>> m2.rel('b.txt')
|
|
|
'b.txt'
|
|
|
>>> def bad(f, msg):
|
|
|
... print "%s: %s" % (f, msg)
|
|
|
>>> m1.bad = bad
|
|
|
>>> m2.bad('x.txt', 'No such file')
|
|
|
sub/x.txt: No such file
|
|
|
"""
|
|
|
|
|
|
def __init__(self, path, matcher):
|
|
|
self._root = matcher._root
|
|
|
self._cwd = matcher._cwd
|
|
|
self._path = path
|
|
|
self._matcher = matcher
|
|
|
self._always = matcher._always
|
|
|
|
|
|
self._files = [f[len(path) + 1:] for f in matcher._files
|
|
|
if f.startswith(path + "/")]
|
|
|
self._anypats = matcher._anypats
|
|
|
self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
|
|
|
self._fmap = set(self._files)
|
|
|
|
|
|
def bad(self, f, msg):
|
|
|
self._matcher.bad(self._path + "/" + f, msg)
|
|
|
|
|
|
def patkind(pattern, default=None):
|
|
|
'''If pattern is 'kind:pat' with a known kind, return kind.'''
|
|
|
return _patsplit(pattern, default)[0]
|
|
|
|
|
|
def _patsplit(pattern, default):
|
|
|
"""Split a string into the optional pattern kind prefix and the actual
|
|
|
pattern."""
|
|
|
if ':' in pattern:
|
|
|
kind, pat = pattern.split(':', 1)
|
|
|
if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
|
|
|
'listfile', 'listfile0', 'set'):
|
|
|
return kind, pat
|
|
|
return default, pattern
|
|
|
|
|
|
def _globre(pat):
|
|
|
r'''Convert an extended glob string to a regexp string.
|
|
|
|
|
|
>>> print _globre(r'?')
|
|
|
.
|
|
|
>>> print _globre(r'*')
|
|
|
[^/]*
|
|
|
>>> print _globre(r'**')
|
|
|
.*
|
|
|
>>> print _globre(r'**/a')
|
|
|
(?:.*/)?a
|
|
|
>>> print _globre(r'a/**/b')
|
|
|
a\/(?:.*/)?b
|
|
|
>>> print _globre(r'[a*?!^][^b][!c]')
|
|
|
[a*?!^][\^b][^c]
|
|
|
>>> print _globre(r'{a,b}')
|
|
|
(?:a|b)
|
|
|
>>> print _globre(r'.\*\?')
|
|
|
\.\*\?
|
|
|
'''
|
|
|
i, n = 0, len(pat)
|
|
|
res = ''
|
|
|
group = 0
|
|
|
escape = util.re.escape
|
|
|
def peek():
|
|
|
return i < n and pat[i]
|
|
|
while i < n:
|
|
|
c = pat[i]
|
|
|
i += 1
|
|
|
if c not in '*?[{},\\':
|
|
|
res += escape(c)
|
|
|
elif c == '*':
|
|
|
if peek() == '*':
|
|
|
i += 1
|
|
|
if peek() == '/':
|
|
|
i += 1
|
|
|
res += '(?:.*/)?'
|
|
|
else:
|
|
|
res += '.*'
|
|
|
else:
|
|
|
res += '[^/]*'
|
|
|
elif c == '?':
|
|
|
res += '.'
|
|
|
elif c == '[':
|
|
|
j = i
|
|
|
if j < n and pat[j] in '!]':
|
|
|
j += 1
|
|
|
while j < n and pat[j] != ']':
|
|
|
j += 1
|
|
|
if j >= n:
|
|
|
res += '\\['
|
|
|
else:
|
|
|
stuff = pat[i:j].replace('\\','\\\\')
|
|
|
i = j + 1
|
|
|
if stuff[0] == '!':
|
|
|
stuff = '^' + stuff[1:]
|
|
|
elif stuff[0] == '^':
|
|
|
stuff = '\\' + stuff
|
|
|
res = '%s[%s]' % (res, stuff)
|
|
|
elif c == '{':
|
|
|
group += 1
|
|
|
res += '(?:'
|
|
|
elif c == '}' and group:
|
|
|
res += ')'
|
|
|
group -= 1
|
|
|
elif c == ',' and group:
|
|
|
res += '|'
|
|
|
elif c == '\\':
|
|
|
p = peek()
|
|
|
if p:
|
|
|
i += 1
|
|
|
res += escape(p)
|
|
|
else:
|
|
|
res += escape(c)
|
|
|
else:
|
|
|
res += escape(c)
|
|
|
return res
|
|
|
|
|
|
def _regex(kind, pat, globsuffix):
|
|
|
'''Convert a (normalized) pattern of any kind into a regular expression.
|
|
|
globsuffix is appended to the regexp of globs.'''
|
|
|
if not pat:
|
|
|
return ''
|
|
|
if kind == 're':
|
|
|
return pat
|
|
|
if kind == 'path':
|
|
|
return '^' + util.re.escape(pat) + '(?:/|$)'
|
|
|
if kind == 'relglob':
|
|
|
return '(?:|.*/)' + _globre(pat) + globsuffix
|
|
|
if kind == 'relpath':
|
|
|
return util.re.escape(pat) + '(?:/|$)'
|
|
|
if kind == 'relre':
|
|
|
if pat.startswith('^'):
|
|
|
return pat
|
|
|
return '.*' + pat
|
|
|
return _globre(pat) + globsuffix
|
|
|
|
|
|
def _buildmatch(ctx, kindpats, globsuffix):
|
|
|
'''Return regexp string and a matcher function for kindpats.
|
|
|
globsuffix is appended to the regexp of globs.'''
|
|
|
fset, kindpats = _expandsets(kindpats, ctx)
|
|
|
if not kindpats:
|
|
|
return "", fset.__contains__
|
|
|
|
|
|
regex, mf = _buildregexmatch(kindpats, globsuffix)
|
|
|
if fset:
|
|
|
return regex, lambda f: f in fset or mf(f)
|
|
|
return regex, mf
|
|
|
|
|
|
def _buildregexmatch(kindpats, globsuffix):
|
|
|
"""Build a match function from a list of kinds and kindpats,
|
|
|
return regexp string and a matcher function."""
|
|
|
try:
|
|
|
regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
|
|
|
for (k, p) in kindpats])
|
|
|
if len(regex) > 20000:
|
|
|
raise OverflowError
|
|
|
return regex, _rematcher(regex)
|
|
|
except OverflowError:
|
|
|
# We're using a Python with a tiny regex engine and we
|
|
|
# made it explode, so we'll divide the pattern list in two
|
|
|
# until it works
|
|
|
l = len(kindpats)
|
|
|
if l < 2:
|
|
|
raise
|
|
|
regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
|
|
|
regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
|
|
|
return regex, lambda s: a(s) or b(s)
|
|
|
except re.error:
|
|
|
for k, p in kindpats:
|
|
|
try:
|
|
|
_rematcher('(?:%s)' % _regex(k, p, globsuffix))
|
|
|
except re.error:
|
|
|
raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
|
|
|
raise util.Abort(_("invalid pattern"))
|
|
|
|
|
|
def _normalize(patterns, default, root, cwd, auditor):
|
|
|
'''Convert 'kind:pat' from the patterns list to tuples with kind and
|
|
|
normalized and rooted patterns and with listfiles expanded.'''
|
|
|
kindpats = []
|
|
|
for kind, pat in [_patsplit(p, default) for p in patterns]:
|
|
|
if kind in ('glob', 'relpath'):
|
|
|
pat = pathutil.canonpath(root, cwd, pat, auditor)
|
|
|
elif kind in ('relglob', 'path'):
|
|
|
pat = util.normpath(pat)
|
|
|
elif kind in ('listfile', 'listfile0'):
|
|
|
try:
|
|
|
files = util.readfile(pat)
|
|
|
if kind == 'listfile0':
|
|
|
files = files.split('\0')
|
|
|
else:
|
|
|
files = files.splitlines()
|
|
|
files = [f for f in files if f]
|
|
|
except EnvironmentError:
|
|
|
raise util.Abort(_("unable to read file list (%s)") % pat)
|
|
|
kindpats += _normalize(files, default, root, cwd, auditor)
|
|
|
continue
|
|
|
# else: re or relre - which cannot be normalized
|
|
|
kindpats.append((kind, pat))
|
|
|
return kindpats
|
|
|
|
|
|
def _roots(kindpats):
|
|
|
'''return roots and exact explicitly listed files from patterns
|
|
|
|
|
|
>>> _roots([('glob', 'g/*'), ('glob', 'g'), ('glob', 'g*')])
|
|
|
['g', 'g', '.']
|
|
|
>>> _roots([('relpath', 'r'), ('path', 'p/p'), ('path', '')])
|
|
|
['r', 'p/p', '.']
|
|
|
>>> _roots([('relglob', 'rg*'), ('re', 're/'), ('relre', 'rr')])
|
|
|
['.', '.', '.']
|
|
|
'''
|
|
|
r = []
|
|
|
for kind, pat in kindpats:
|
|
|
if kind == 'glob': # find the non-glob prefix
|
|
|
root = []
|
|
|
for p in pat.split('/'):
|
|
|
if '[' in p or '{' in p or '*' in p or '?' in p:
|
|
|
break
|
|
|
root.append(p)
|
|
|
r.append('/'.join(root) or '.')
|
|
|
elif kind in ('relpath', 'path'):
|
|
|
r.append(pat or '.')
|
|
|
else: # relglob, re, relre
|
|
|
r.append('.')
|
|
|
return r
|
|
|
|
|
|
def _anypats(kindpats):
|
|
|
for kind, pat in kindpats:
|
|
|
if kind in ('glob', 're', 'relglob', 'relre', 'set'):
|
|
|
return True
|
|
|
|