match.py
352 lines
| 10.6 KiB
| text/x-python
|
PythonLexer
/ mercurial / match.py
timeless
|
r8761 | # match.py - filename matching | ||
Martin Geisler
|
r8231 | # | ||
# Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Martin Geisler
|
r8231 | |||
Alejandro Santos
|
r9036 | import re | ||
Matt Mackall
|
r14675 | import scmutil, util, fileset | ||
Martin Geisler
|
r12133 | from i18n import _ | ||
Matt Mackall
|
r6576 | |||
Bryan O'Sullivan
|
r16943 | def _rematcher(pat): | ||
m = util.compilere(pat) | ||||
try: | ||||
# slightly faster, provided by facebook's re2 bindings | ||||
return m.test_match | ||||
except AttributeError: | ||||
return m.match | ||||
Matt Mackall
|
r14675 | def _expandsets(pats, ctx): | ||
'''convert set: patterns into a list of files in the given context''' | ||||
fset = set() | ||||
other = [] | ||||
for kind, expr in pats: | ||||
if kind == 'set': | ||||
if not ctx: | ||||
raise util.Abort("fileset expression with no context") | ||||
s = fileset.getfileset(ctx, expr) | ||||
fset.update(s) | ||||
continue | ||||
other.append((kind, expr)) | ||||
return fset, other | ||||
Matt Mackall
|
r8587 | class match(object): | ||
Matt Mackall
|
r8567 | def __init__(self, root, cwd, patterns, include=[], exclude=[], | ||
Matt Mackall
|
r14674 | default='glob', exact=False, auditor=None, ctx=None): | ||
Matt Mackall
|
r8581 | """build an object to match a set of file patterns | ||
arguments: | ||||
root - the canonical root of the tree you're matching against | ||||
cwd - the current working directory, if relevant | ||||
patterns - patterns to find | ||||
include - patterns to include | ||||
exclude - patterns to exclude | ||||
default - if a pattern in names has no explicit type, assume this one | ||||
Matt Mackall
|
r8586 | exact - patterns are actually literals | ||
Matt Mackall
|
r8581 | |||
a pattern is one of: | ||||
'glob:<glob>' - a glob relative to cwd | ||||
're:<regexp>' - a regular expression | ||||
Mads Kiilerich
|
r17425 | 'path:<path>' - a path relative to repository root | ||
Matt Mackall
|
r8581 | 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs) | ||
'relpath:<path>' - a path relative to cwd | ||||
Matt Mackall
|
r8587 | 'relre:<regexp>' - a regexp that needn't match the start of a name | ||
Matt Mackall
|
r14675 | 'set:<fileset>' - a fileset expression | ||
Matt Mackall
|
r8587 | '<something>' - a pattern of the specified default type | ||
Matt Mackall
|
r8581 | """ | ||
Matt Mackall
|
r8587 | self._root = root | ||
self._cwd = cwd | ||||
self._files = [] | ||||
self._anypats = bool(include or exclude) | ||||
Matt Mackall
|
r14674 | self._ctx = ctx | ||
Matt Mackall
|
r8581 | |||
Matt Mackall
|
r8586 | if include: | ||
jfh
|
r13396 | pats = _normalize(include, 'glob', root, cwd, auditor) | ||
Matt Mackall
|
r14675 | self.includepat, im = _buildmatch(ctx, pats, '(?:/|$)') | ||
Matt Mackall
|
r8586 | if exclude: | ||
jfh
|
r13396 | pats = _normalize(exclude, 'glob', root, cwd, auditor) | ||
Matt Mackall
|
r14675 | self.excludepat, em = _buildmatch(ctx, pats, '(?:/|$)') | ||
Matt Mackall
|
r8586 | if exact: | ||
FUJIWARA Katsunori
|
r16789 | if isinstance(patterns, list): | ||
self._files = patterns | ||||
else: | ||||
self._files = list(patterns) | ||||
Matt Mackall
|
r8586 | pm = self.exact | ||
elif patterns: | ||||
Martin Geisler
|
r12163 | pats = _normalize(patterns, default, root, cwd, auditor) | ||
Matt Mackall
|
r8587 | self._files = _roots(pats) | ||
self._anypats = self._anypats or _anypats(pats) | ||||
Matt Mackall
|
r14675 | self.patternspat, pm = _buildmatch(ctx, pats, '$') | ||
Matt Mackall
|
r8581 | |||
Matt Mackall
|
r8586 | if patterns or exact: | ||
Matt Mackall
|
r8581 | if include: | ||
if exclude: | ||||
m = lambda f: im(f) and not em(f) and pm(f) | ||||
else: | ||||
m = lambda f: im(f) and pm(f) | ||||
else: | ||||
if exclude: | ||||
m = lambda f: not em(f) and pm(f) | ||||
else: | ||||
m = pm | ||||
else: | ||||
if include: | ||||
if exclude: | ||||
m = lambda f: im(f) and not em(f) | ||||
else: | ||||
m = im | ||||
else: | ||||
if exclude: | ||||
m = lambda f: not em(f) | ||||
else: | ||||
m = lambda f: True | ||||
Matt Mackall
|
r8587 | self.matchfn = m | ||
self._fmap = set(self._files) | ||||
def __call__(self, fn): | ||||
return self.matchfn(fn) | ||||
def __iter__(self): | ||||
for f in self._files: | ||||
yield f | ||||
def bad(self, f, msg): | ||||
Matt Mackall
|
r8678 | '''callback for each explicit file that can't be | ||
found/accessed, with an error message | ||||
''' | ||||
Matt Mackall
|
r8680 | pass | ||
Matt Mackall
|
r8587 | def dir(self, f): | ||
pass | ||||
def missing(self, f): | ||||
pass | ||||
def exact(self, f): | ||||
return f in self._fmap | ||||
def rel(self, f): | ||||
return util.pathto(self._root, self._cwd, f) | ||||
def files(self): | ||||
return self._files | ||||
def anypats(self): | ||||
return self._anypats | ||||
Jesse Glick
|
r16645 | def always(self): | ||
return False | ||||
Matt Mackall
|
r8568 | |||
Matt Mackall
|
r8586 | class exact(match): | ||
Matt Mackall
|
r8585 | def __init__(self, root, cwd, files): | ||
Matt Mackall
|
r8586 | match.__init__(self, root, cwd, files, exact = True) | ||
Matt Mackall
|
r8585 | |||
class always(match): | ||||
def __init__(self, root, cwd): | ||||
match.__init__(self, root, cwd, []) | ||||
Jesse Glick
|
r16645 | def always(self): | ||
return True | ||||
Matt Mackall
|
r8585 | |||
Martin Geisler
|
r12165 | class narrowmatcher(match): | ||
"""Adapt a matcher to work on a subdirectory only. | ||||
The paths are remapped to remove/insert the path as needed: | ||||
>>> m1 = match('root', '', ['a.txt', 'sub/b.txt']) | ||||
>>> m2 = narrowmatcher('sub', m1) | ||||
>>> bool(m2('a.txt')) | ||||
False | ||||
>>> bool(m2('b.txt')) | ||||
True | ||||
>>> bool(m2.matchfn('a.txt')) | ||||
False | ||||
>>> bool(m2.matchfn('b.txt')) | ||||
True | ||||
>>> m2.files() | ||||
['b.txt'] | ||||
>>> m2.exact('b.txt') | ||||
True | ||||
Martin Geisler
|
r12267 | >>> m2.rel('b.txt') | ||
'b.txt' | ||||
Martin Geisler
|
r12268 | >>> def bad(f, msg): | ||
... print "%s: %s" % (f, msg) | ||||
>>> m1.bad = bad | ||||
>>> m2.bad('x.txt', 'No such file') | ||||
sub/x.txt: No such file | ||||
Martin Geisler
|
r12165 | """ | ||
def __init__(self, path, matcher): | ||||
Martin Geisler
|
r12267 | self._root = matcher._root | ||
self._cwd = matcher._cwd | ||||
Martin Geisler
|
r12165 | self._path = path | ||
self._matcher = matcher | ||||
self._files = [f[len(path) + 1:] for f in matcher._files | ||||
if f.startswith(path + "/")] | ||||
self._anypats = matcher._anypats | ||||
self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn) | ||||
self._fmap = set(self._files) | ||||
Martin Geisler
|
r12268 | def bad(self, f, msg): | ||
self._matcher.bad(self._path + "/" + f, msg) | ||||
Matt Mackall
|
r8568 | def patkind(pat): | ||
Matt Mackall
|
r8570 | return _patsplit(pat, None)[0] | ||
def _patsplit(pat, default): | ||||
"""Split a string into an optional pattern kind prefix and the | ||||
actual pattern.""" | ||||
Matt Mackall
|
r8579 | if ':' in pat: | ||
Matt Mackall
|
r8613 | kind, val = pat.split(':', 1) | ||
Steve Borho
|
r13218 | if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre', | ||
Matt Mackall
|
r14675 | 'listfile', 'listfile0', 'set'): | ||
Matt Mackall
|
r8613 | return kind, val | ||
Matt Mackall
|
r8570 | return default, pat | ||
Matt Mackall
|
r8582 | def _globre(pat): | ||
Matt Mackall
|
r8570 | "convert a glob pattern into a regexp" | ||
i, n = 0, len(pat) | ||||
res = '' | ||||
group = 0 | ||||
Matt Mackall
|
r8583 | escape = re.escape | ||
Matt Mackall
|
r10282 | def peek(): | ||
return i < n and pat[i] | ||||
Matt Mackall
|
r8570 | while i < n: | ||
c = pat[i] | ||||
Matt Mackall
|
r10282 | i += 1 | ||
Matt Mackall
|
r8583 | if c not in '*?[{},\\': | ||
res += escape(c) | ||||
elif c == '*': | ||||
Matt Mackall
|
r8570 | if peek() == '*': | ||
i += 1 | ||||
res += '.*' | ||||
else: | ||||
res += '[^/]*' | ||||
elif c == '?': | ||||
res += '.' | ||||
elif c == '[': | ||||
j = i | ||||
if j < n and pat[j] in '!]': | ||||
j += 1 | ||||
while j < n and pat[j] != ']': | ||||
j += 1 | ||||
if j >= n: | ||||
res += '\\[' | ||||
else: | ||||
stuff = pat[i:j].replace('\\','\\\\') | ||||
i = j + 1 | ||||
if stuff[0] == '!': | ||||
stuff = '^' + stuff[1:] | ||||
elif stuff[0] == '^': | ||||
stuff = '\\' + stuff | ||||
res = '%s[%s]' % (res, stuff) | ||||
elif c == '{': | ||||
group += 1 | ||||
res += '(?:' | ||||
elif c == '}' and group: | ||||
res += ')' | ||||
group -= 1 | ||||
elif c == ',' and group: | ||||
res += '|' | ||||
elif c == '\\': | ||||
p = peek() | ||||
if p: | ||||
i += 1 | ||||
Matt Mackall
|
r8583 | res += escape(p) | ||
Matt Mackall
|
r8570 | else: | ||
Matt Mackall
|
r8583 | res += escape(c) | ||
Matt Mackall
|
r8570 | else: | ||
Matt Mackall
|
r8583 | res += escape(c) | ||
Matt Mackall
|
r8582 | return res | ||
Matt Mackall
|
r8570 | |||
Matt Mackall
|
r8574 | def _regex(kind, name, tail): | ||
'''convert a pattern into a regular expression''' | ||||
if not name: | ||||
return '' | ||||
if kind == 're': | ||||
return name | ||||
elif kind == 'path': | ||||
return '^' + re.escape(name) + '(?:/|$)' | ||||
elif kind == 'relglob': | ||||
Matt Mackall
|
r8582 | return '(?:|.*/)' + _globre(name) + tail | ||
Matt Mackall
|
r8574 | elif kind == 'relpath': | ||
return re.escape(name) + '(?:/|$)' | ||||
elif kind == 'relre': | ||||
if name.startswith('^'): | ||||
return name | ||||
return '.*' + name | ||||
Matt Mackall
|
r8582 | return _globre(name) + tail | ||
Matt Mackall
|
r8574 | |||
Matt Mackall
|
r14675 | def _buildmatch(ctx, pats, tail): | ||
fset, pats = _expandsets(pats, ctx) | ||||
if not pats: | ||||
return "", fset.__contains__ | ||||
pat, mf = _buildregexmatch(pats, tail) | ||||
if fset: | ||||
return pat, lambda f: f in fset or mf(f) | ||||
return pat, mf | ||||
def _buildregexmatch(pats, tail): | ||||
Matt Mackall
|
r8574 | """build a matching function from a set of patterns""" | ||
try: | ||||
pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats]) | ||||
if len(pat) > 20000: | ||||
Brodie Rao
|
r16687 | raise OverflowError | ||
Bryan O'Sullivan
|
r16943 | return pat, _rematcher(pat) | ||
Matt Mackall
|
r8574 | except OverflowError: | ||
# We're using a Python with a tiny regex engine and we | ||||
# made it explode, so we'll divide the pattern list in two | ||||
# until it works | ||||
l = len(pats) | ||||
if l < 2: | ||||
raise | ||||
Peter Arrenbrecht
|
r14722 | pata, a = _buildregexmatch(pats[:l//2], tail) | ||
patb, b = _buildregexmatch(pats[l//2:], tail) | ||||
jfh
|
r13396 | return pat, lambda s: a(s) or b(s) | ||
Matt Mackall
|
r8574 | except re.error: | ||
for k, p in pats: | ||||
try: | ||||
Bryan O'Sullivan
|
r16943 | _rematcher('(?:%s)' % _regex(k, p, tail)) | ||
Matt Mackall
|
r8574 | except re.error: | ||
Martin Geisler
|
r12133 | raise util.Abort(_("invalid pattern (%s): %s") % (k, p)) | ||
raise util.Abort(_("invalid pattern")) | ||||
Matt Mackall
|
r8574 | |||
Martin Geisler
|
r12163 | def _normalize(names, default, root, cwd, auditor): | ||
Matt Mackall
|
r8574 | pats = [] | ||
for kind, name in [_patsplit(p, default) for p in names]: | ||||
if kind in ('glob', 'relpath'): | ||||
Adrian Buehlmann
|
r13971 | name = scmutil.canonpath(root, cwd, name, auditor) | ||
Matt Mackall
|
r8574 | elif kind in ('relglob', 'path'): | ||
name = util.normpath(name) | ||||
Steve Borho
|
r13218 | elif kind in ('listfile', 'listfile0'): | ||
try: | ||||
Patrick Mezard
|
r14248 | files = util.readfile(name) | ||
if kind == 'listfile0': | ||||
files = files.split('\0') | ||||
else: | ||||
files = files.splitlines() | ||||
Steve Borho
|
r13218 | files = [f for f in files if f] | ||
except EnvironmentError: | ||||
raise util.Abort(_("unable to read file list (%s)") % name) | ||||
pats += _normalize(files, default, root, cwd, auditor) | ||||
continue | ||||
Matt Mackall
|
r8574 | |||
pats.append((kind, name)) | ||||
Matt Mackall
|
r8576 | return pats | ||
Matt Mackall
|
r8574 | |||
Matt Mackall
|
r8576 | def _roots(patterns): | ||
r = [] | ||||
for kind, name in patterns: | ||||
Matt Mackall
|
r8584 | if kind == 'glob': # find the non-glob prefix | ||
root = [] | ||||
for p in name.split('/'): | ||||
if '[' in p or '{' in p or '*' in p or '?' in p: | ||||
break | ||||
root.append(p) | ||||
r.append('/'.join(root) or '.') | ||||
Matt Mackall
|
r8574 | elif kind in ('relpath', 'path'): | ||
Matt Mackall
|
r8576 | r.append(name or '.') | ||
Matt Mackall
|
r8574 | elif kind == 'relglob': | ||
Matt Mackall
|
r8576 | r.append('.') | ||
return r | ||||
def _anypats(patterns): | ||||
for kind, name in patterns: | ||||
Patrick Mezard
|
r16182 | if kind in ('glob', 're', 'relglob', 'relre', 'set'): | ||
Matt Mackall
|
r8576 | return True | ||