match.py
413 lines
| 13.4 KiB
| text/x-python
|
PythonLexer
/ mercurial / match.py
timeless
|
r8761 | # match.py - filename matching | ||
Martin Geisler
|
r8231 | # | ||
# Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Martin Geisler
|
r8231 | |||
Alejandro Santos
|
r9036 | import re | ||
Augie Fackler
|
r20401 | import util, pathutil | ||
Martin Geisler
|
r12133 | from i18n import _ | ||
Matt Mackall
|
r6576 | |||
Mads Kiilerich
|
r21111 | def _rematcher(regex): | ||
'''compile the regexp with the best available regexp engine and return a | ||||
matcher function''' | ||||
Siddharth Agarwal
|
r21909 | m = util.re.compile(regex) | ||
Bryan O'Sullivan
|
r16943 | try: | ||
# slightly faster, provided by facebook's re2 bindings | ||||
return m.test_match | ||||
except AttributeError: | ||||
return m.match | ||||
Mads Kiilerich
|
r21111 | def _expandsets(kindpats, ctx): | ||
'''Returns the kindpats list with the 'set' patterns expanded.''' | ||||
Matt Mackall
|
r14675 | fset = set() | ||
other = [] | ||||
Mads Kiilerich
|
r21111 | for kind, pat in kindpats: | ||
Matt Mackall
|
r14675 | if kind == 'set': | ||
if not ctx: | ||||
raise util.Abort("fileset expression with no context") | ||||
Mads Kiilerich
|
r21111 | s = ctx.getfileset(pat) | ||
Matt Mackall
|
r14675 | fset.update(s) | ||
continue | ||||
Mads Kiilerich
|
r21111 | other.append((kind, pat)) | ||
Matt Mackall
|
r14675 | return fset, other | ||
Matt Mackall
|
r8587 | class match(object): | ||
Matt Mackall
|
r8567 | def __init__(self, root, cwd, patterns, include=[], exclude=[], | ||
Matt Mackall
|
r14674 | default='glob', exact=False, auditor=None, ctx=None): | ||
Matt Mackall
|
r8581 | """build an object to match a set of file patterns | ||
arguments: | ||||
root - the canonical root of the tree you're matching against | ||||
cwd - the current working directory, if relevant | ||||
patterns - patterns to find | ||||
Mads Kiilerich
|
r21111 | include - patterns to include (unless they are excluded) | ||
exclude - patterns to exclude (even if they are included) | ||||
default - if a pattern in patterns has no explicit type, assume this one | ||||
exact - patterns are actually filenames (include/exclude still apply) | ||||
Matt Mackall
|
r8581 | |||
a pattern is one of: | ||||
'glob:<glob>' - a glob relative to cwd | ||||
're:<regexp>' - a regular expression | ||||
Mads Kiilerich
|
r17425 | 'path:<path>' - a path relative to repository root | ||
Matt Mackall
|
r8581 | 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs) | ||
'relpath:<path>' - a path relative to cwd | ||||
Matt Mackall
|
r8587 | 'relre:<regexp>' - a regexp that needn't match the start of a name | ||
Matt Mackall
|
r14675 | 'set:<fileset>' - a fileset expression | ||
Matt Mackall
|
r8587 | '<something>' - a pattern of the specified default type | ||
Matt Mackall
|
r8581 | """ | ||
Matt Mackall
|
r8587 | self._root = root | ||
self._cwd = cwd | ||||
Mads Kiilerich
|
r21079 | self._files = [] # exact files and roots of patterns | ||
Matt Mackall
|
r8587 | self._anypats = bool(include or exclude) | ||
Matt Mackall
|
r14674 | self._ctx = ctx | ||
Bryan O'Sullivan
|
r18713 | self._always = False | ||
Matt Harbison
|
r23480 | self._pathrestricted = bool(include or exclude or patterns) | ||
Matt Mackall
|
r8581 | |||
Martin von Zweigbergk
|
r22513 | matchfns = [] | ||
Matt Mackall
|
r8586 | if include: | ||
Mads Kiilerich
|
r21111 | kindpats = _normalize(include, 'glob', root, cwd, auditor) | ||
self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)') | ||||
Martin von Zweigbergk
|
r22513 | matchfns.append(im) | ||
Matt Mackall
|
r8586 | if exclude: | ||
Mads Kiilerich
|
r21111 | kindpats = _normalize(exclude, 'glob', root, cwd, auditor) | ||
self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)') | ||||
Martin von Zweigbergk
|
r22513 | matchfns.append(lambda f: not em(f)) | ||
Matt Mackall
|
r8586 | if exact: | ||
FUJIWARA Katsunori
|
r16789 | if isinstance(patterns, list): | ||
self._files = patterns | ||||
else: | ||||
self._files = list(patterns) | ||||
Martin von Zweigbergk
|
r22513 | matchfns.append(self.exact) | ||
Matt Mackall
|
r8586 | elif patterns: | ||
Mads Kiilerich
|
r21111 | kindpats = _normalize(patterns, default, root, cwd, auditor) | ||
self._files = _roots(kindpats) | ||||
self._anypats = self._anypats or _anypats(kindpats) | ||||
self.patternspat, pm = _buildmatch(ctx, kindpats, '$') | ||||
Martin von Zweigbergk
|
r22513 | matchfns.append(pm) | ||
Matt Mackall
|
r8581 | |||
Martin von Zweigbergk
|
r22513 | if not matchfns: | ||
m = util.always | ||||
self._always = True | ||||
elif len(matchfns) == 1: | ||||
m = matchfns[0] | ||||
Matt Mackall
|
r8581 | else: | ||
Martin von Zweigbergk
|
r22513 | def m(f): | ||
for matchfn in matchfns: | ||||
if not matchfn(f): | ||||
return False | ||||
return True | ||||
Matt Mackall
|
r8581 | |||
Matt Mackall
|
r8587 | self.matchfn = m | ||
self._fmap = set(self._files) | ||||
def __call__(self, fn): | ||||
return self.matchfn(fn) | ||||
def __iter__(self): | ||||
for f in self._files: | ||||
yield f | ||||
Mads Kiilerich
|
r21111 | |||
# Callbacks related to how the matcher is used by dirstate.walk. | ||||
# Subscribers to these events must monkeypatch the matcher object. | ||||
Matt Mackall
|
r8587 | def bad(self, f, msg): | ||
Mads Kiilerich
|
r21111 | '''Callback from dirstate.walk for each explicit file that can't be | ||
found/accessed, with an error message.''' | ||||
Matt Mackall
|
r8680 | pass | ||
Mads Kiilerich
|
r21111 | |||
# If an explicitdir is set, it will be called when an explicitly listed | ||||
# directory is visited. | ||||
Siddharth Agarwal
|
r19143 | explicitdir = None | ||
Mads Kiilerich
|
r21111 | |||
# If an traversedir is set, it will be called when a directory discovered | ||||
# by recursive traversal is visited. | ||||
Siddharth Agarwal
|
r19143 | traversedir = None | ||
Mads Kiilerich
|
r21111 | |||
Matt Mackall
|
r8587 | def rel(self, f): | ||
Mads Kiilerich
|
r21111 | '''Convert repo path back to path that is relative to cwd of matcher.''' | ||
Matt Mackall
|
r8587 | return util.pathto(self._root, self._cwd, f) | ||
Mads Kiilerich
|
r21111 | |||
Matt Harbison
|
r23480 | def uipath(self, f): | ||
'''Convert repo path to a display path. If patterns or -I/-X were used | ||||
to create this matcher, the display path will be relative to cwd. | ||||
Otherwise it is relative to the root of the repo.''' | ||||
return (self._pathrestricted and self.rel(f)) or f | ||||
Matt Mackall
|
r8587 | def files(self): | ||
Mads Kiilerich
|
r21111 | '''Explicitly listed files or patterns or roots: | ||
if no patterns or .always(): empty list, | ||||
if exact: list exact files, | ||||
if not .anypats(): list all files and dirs, | ||||
else: optimal roots''' | ||||
Matt Mackall
|
r8587 | return self._files | ||
Mads Kiilerich
|
r21111 | |||
def exact(self, f): | ||||
'''Returns True if f is in .files().''' | ||||
return f in self._fmap | ||||
Matt Mackall
|
r8587 | def anypats(self): | ||
Mads Kiilerich
|
r21111 | '''Matcher uses patterns or include/exclude.''' | ||
Matt Mackall
|
r8587 | return self._anypats | ||
Mads Kiilerich
|
r21111 | |||
Jesse Glick
|
r16645 | def always(self): | ||
Mads Kiilerich
|
r21111 | '''Matcher will match everything and .files() will be empty | ||
- optimization might be possible and necessary.''' | ||||
Bryan O'Sullivan
|
r18713 | return self._always | ||
Matt Mackall
|
r8568 | |||
Martin von Zweigbergk
|
r23549 | def exact(root, cwd, files): | ||
return match(root, cwd, files, exact=True) | ||||
Matt Mackall
|
r8585 | |||
Martin von Zweigbergk
|
r23549 | def always(root, cwd): | ||
return match(root, cwd, []) | ||||
Matt Mackall
|
r8585 | |||
Martin Geisler
|
r12165 | class narrowmatcher(match): | ||
"""Adapt a matcher to work on a subdirectory only. | ||||
The paths are remapped to remove/insert the path as needed: | ||||
>>> m1 = match('root', '', ['a.txt', 'sub/b.txt']) | ||||
>>> m2 = narrowmatcher('sub', m1) | ||||
>>> bool(m2('a.txt')) | ||||
False | ||||
>>> bool(m2('b.txt')) | ||||
True | ||||
>>> bool(m2.matchfn('a.txt')) | ||||
False | ||||
>>> bool(m2.matchfn('b.txt')) | ||||
True | ||||
>>> m2.files() | ||||
['b.txt'] | ||||
>>> m2.exact('b.txt') | ||||
True | ||||
Martin Geisler
|
r12267 | >>> m2.rel('b.txt') | ||
'b.txt' | ||||
Martin Geisler
|
r12268 | >>> def bad(f, msg): | ||
... print "%s: %s" % (f, msg) | ||||
>>> m1.bad = bad | ||||
>>> m2.bad('x.txt', 'No such file') | ||||
sub/x.txt: No such file | ||||
Martin Geisler
|
r12165 | """ | ||
def __init__(self, path, matcher): | ||||
Martin Geisler
|
r12267 | self._root = matcher._root | ||
self._cwd = matcher._cwd | ||||
Martin Geisler
|
r12165 | self._path = path | ||
self._matcher = matcher | ||||
Bryan O'Sullivan
|
r18713 | self._always = matcher._always | ||
Matt Harbison
|
r23480 | self._pathrestricted = matcher._pathrestricted | ||
Martin Geisler
|
r12165 | |||
self._files = [f[len(path) + 1:] for f in matcher._files | ||||
if f.startswith(path + "/")] | ||||
self._anypats = matcher._anypats | ||||
self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn) | ||||
self._fmap = set(self._files) | ||||
Martin Geisler
|
r12268 | def bad(self, f, msg): | ||
self._matcher.bad(self._path + "/" + f, msg) | ||||
Mads Kiilerich
|
r21111 | def patkind(pattern, default=None): | ||
'''If pattern is 'kind:pat' with a known kind, return kind.''' | ||||
return _patsplit(pattern, default)[0] | ||||
Matt Mackall
|
r8570 | |||
Mads Kiilerich
|
r21111 | def _patsplit(pattern, default): | ||
"""Split a string into the optional pattern kind prefix and the actual | ||||
pattern.""" | ||||
if ':' in pattern: | ||||
kind, pat = pattern.split(':', 1) | ||||
Steve Borho
|
r13218 | if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre', | ||
Matt Mackall
|
r14675 | 'listfile', 'listfile0', 'set'): | ||
Mads Kiilerich
|
r21111 | return kind, pat | ||
return default, pattern | ||||
Matt Mackall
|
r8570 | |||
Matt Mackall
|
r8582 | def _globre(pat): | ||
Mads Kiilerich
|
r21112 | r'''Convert an extended glob string to a regexp string. | ||
>>> print _globre(r'?') | ||||
. | ||||
>>> print _globre(r'*') | ||||
[^/]* | ||||
>>> print _globre(r'**') | ||||
.* | ||||
Siddharth Agarwal
|
r21815 | >>> print _globre(r'**/a') | ||
(?:.*/)?a | ||||
>>> print _globre(r'a/**/b') | ||||
a\/(?:.*/)?b | ||||
Mads Kiilerich
|
r21112 | >>> print _globre(r'[a*?!^][^b][!c]') | ||
[a*?!^][\^b][^c] | ||||
>>> print _globre(r'{a,b}') | ||||
(?:a|b) | ||||
>>> print _globre(r'.\*\?') | ||||
\.\*\? | ||||
''' | ||||
Matt Mackall
|
r8570 | i, n = 0, len(pat) | ||
res = '' | ||||
group = 0 | ||||
Siddharth Agarwal
|
r21915 | escape = util.re.escape | ||
Matt Mackall
|
r10282 | def peek(): | ||
return i < n and pat[i] | ||||
Matt Mackall
|
r8570 | while i < n: | ||
c = pat[i] | ||||
Matt Mackall
|
r10282 | i += 1 | ||
Matt Mackall
|
r8583 | if c not in '*?[{},\\': | ||
res += escape(c) | ||||
elif c == '*': | ||||
Matt Mackall
|
r8570 | if peek() == '*': | ||
i += 1 | ||||
Siddharth Agarwal
|
r21815 | if peek() == '/': | ||
i += 1 | ||||
res += '(?:.*/)?' | ||||
else: | ||||
res += '.*' | ||||
Matt Mackall
|
r8570 | else: | ||
res += '[^/]*' | ||||
elif c == '?': | ||||
res += '.' | ||||
elif c == '[': | ||||
j = i | ||||
if j < n and pat[j] in '!]': | ||||
j += 1 | ||||
while j < n and pat[j] != ']': | ||||
j += 1 | ||||
if j >= n: | ||||
res += '\\[' | ||||
else: | ||||
stuff = pat[i:j].replace('\\','\\\\') | ||||
i = j + 1 | ||||
if stuff[0] == '!': | ||||
stuff = '^' + stuff[1:] | ||||
elif stuff[0] == '^': | ||||
stuff = '\\' + stuff | ||||
res = '%s[%s]' % (res, stuff) | ||||
elif c == '{': | ||||
group += 1 | ||||
res += '(?:' | ||||
elif c == '}' and group: | ||||
res += ')' | ||||
group -= 1 | ||||
elif c == ',' and group: | ||||
res += '|' | ||||
elif c == '\\': | ||||
p = peek() | ||||
if p: | ||||
i += 1 | ||||
Matt Mackall
|
r8583 | res += escape(p) | ||
Matt Mackall
|
r8570 | else: | ||
Matt Mackall
|
r8583 | res += escape(c) | ||
Matt Mackall
|
r8570 | else: | ||
Matt Mackall
|
r8583 | res += escape(c) | ||
Matt Mackall
|
r8582 | return res | ||
Matt Mackall
|
r8570 | |||
Mads Kiilerich
|
r21111 | def _regex(kind, pat, globsuffix): | ||
'''Convert a (normalized) pattern of any kind into a regular expression. | ||||
globsuffix is appended to the regexp of globs.''' | ||||
if not pat: | ||||
Matt Mackall
|
r8574 | return '' | ||
if kind == 're': | ||||
Mads Kiilerich
|
r21111 | return pat | ||
if kind == 'path': | ||||
Siddharth Agarwal
|
r21915 | return '^' + util.re.escape(pat) + '(?:/|$)' | ||
Mads Kiilerich
|
r21111 | if kind == 'relglob': | ||
return '(?:|.*/)' + _globre(pat) + globsuffix | ||||
if kind == 'relpath': | ||||
Siddharth Agarwal
|
r21915 | return util.re.escape(pat) + '(?:/|$)' | ||
Mads Kiilerich
|
r21111 | if kind == 'relre': | ||
if pat.startswith('^'): | ||||
return pat | ||||
return '.*' + pat | ||||
return _globre(pat) + globsuffix | ||||
Matt Mackall
|
r8574 | |||
Mads Kiilerich
|
r21111 | def _buildmatch(ctx, kindpats, globsuffix): | ||
'''Return regexp string and a matcher function for kindpats. | ||||
globsuffix is appended to the regexp of globs.''' | ||||
fset, kindpats = _expandsets(kindpats, ctx) | ||||
if not kindpats: | ||||
Matt Mackall
|
r14675 | return "", fset.__contains__ | ||
Mads Kiilerich
|
r21111 | regex, mf = _buildregexmatch(kindpats, globsuffix) | ||
Matt Mackall
|
r14675 | if fset: | ||
Mads Kiilerich
|
r21111 | return regex, lambda f: f in fset or mf(f) | ||
return regex, mf | ||||
Matt Mackall
|
r14675 | |||
Mads Kiilerich
|
r21111 | def _buildregexmatch(kindpats, globsuffix): | ||
"""Build a match function from a list of kinds and kindpats, | ||||
return regexp string and a matcher function.""" | ||||
Matt Mackall
|
r8574 | try: | ||
Mads Kiilerich
|
r21111 | regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix) | ||
for (k, p) in kindpats]) | ||||
if len(regex) > 20000: | ||||
Brodie Rao
|
r16687 | raise OverflowError | ||
Mads Kiilerich
|
r21111 | return regex, _rematcher(regex) | ||
Matt Mackall
|
r8574 | except OverflowError: | ||
# We're using a Python with a tiny regex engine and we | ||||
# made it explode, so we'll divide the pattern list in two | ||||
# until it works | ||||
Mads Kiilerich
|
r21111 | l = len(kindpats) | ||
Matt Mackall
|
r8574 | if l < 2: | ||
raise | ||||
Mads Kiilerich
|
r21111 | regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix) | ||
regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix) | ||||
Yuya Nishihara
|
r21191 | return regex, lambda s: a(s) or b(s) | ||
Matt Mackall
|
r8574 | except re.error: | ||
Mads Kiilerich
|
r21111 | for k, p in kindpats: | ||
Matt Mackall
|
r8574 | try: | ||
Mads Kiilerich
|
r21111 | _rematcher('(?:%s)' % _regex(k, p, globsuffix)) | ||
Matt Mackall
|
r8574 | except re.error: | ||
Martin Geisler
|
r12133 | raise util.Abort(_("invalid pattern (%s): %s") % (k, p)) | ||
raise util.Abort(_("invalid pattern")) | ||||
Matt Mackall
|
r8574 | |||
Mads Kiilerich
|
r21111 | def _normalize(patterns, default, root, cwd, auditor): | ||
'''Convert 'kind:pat' from the patterns list to tuples with kind and | ||||
normalized and rooted patterns and with listfiles expanded.''' | ||||
kindpats = [] | ||||
for kind, pat in [_patsplit(p, default) for p in patterns]: | ||||
Matt Mackall
|
r8574 | if kind in ('glob', 'relpath'): | ||
Mads Kiilerich
|
r21111 | pat = pathutil.canonpath(root, cwd, pat, auditor) | ||
Matt Mackall
|
r8574 | elif kind in ('relglob', 'path'): | ||
Mads Kiilerich
|
r21111 | pat = util.normpath(pat) | ||
Steve Borho
|
r13218 | elif kind in ('listfile', 'listfile0'): | ||
try: | ||||
Mads Kiilerich
|
r21111 | files = util.readfile(pat) | ||
Patrick Mezard
|
r14248 | if kind == 'listfile0': | ||
files = files.split('\0') | ||||
else: | ||||
files = files.splitlines() | ||||
Steve Borho
|
r13218 | files = [f for f in files if f] | ||
except EnvironmentError: | ||||
Mads Kiilerich
|
r21111 | raise util.Abort(_("unable to read file list (%s)") % pat) | ||
kindpats += _normalize(files, default, root, cwd, auditor) | ||||
Steve Borho
|
r13218 | continue | ||
Mads Kiilerich
|
r21111 | # else: re or relre - which cannot be normalized | ||
kindpats.append((kind, pat)) | ||||
return kindpats | ||||
Matt Mackall
|
r8574 | |||
Mads Kiilerich
|
r21111 | def _roots(kindpats): | ||
Mads Kiilerich
|
r21079 | '''return roots and exact explicitly listed files from patterns | ||
>>> _roots([('glob', 'g/*'), ('glob', 'g'), ('glob', 'g*')]) | ||||
['g', 'g', '.'] | ||||
>>> _roots([('relpath', 'r'), ('path', 'p/p'), ('path', '')]) | ||||
['r', 'p/p', '.'] | ||||
>>> _roots([('relglob', 'rg*'), ('re', 're/'), ('relre', 'rr')]) | ||||
['.', '.', '.'] | ||||
''' | ||||
Matt Mackall
|
r8576 | r = [] | ||
Mads Kiilerich
|
r21111 | for kind, pat in kindpats: | ||
Matt Mackall
|
r8584 | if kind == 'glob': # find the non-glob prefix | ||
root = [] | ||||
Mads Kiilerich
|
r21111 | for p in pat.split('/'): | ||
Matt Mackall
|
r8584 | if '[' in p or '{' in p or '*' in p or '?' in p: | ||
break | ||||
root.append(p) | ||||
r.append('/'.join(root) or '.') | ||||
Matt Mackall
|
r8574 | elif kind in ('relpath', 'path'): | ||
Mads Kiilerich
|
r21111 | r.append(pat or '.') | ||
Mads Kiilerich
|
r19107 | else: # relglob, re, relre | ||
Matt Mackall
|
r8576 | r.append('.') | ||
return r | ||||
Mads Kiilerich
|
r21111 | def _anypats(kindpats): | ||
for kind, pat in kindpats: | ||||
Patrick Mezard
|
r16182 | if kind in ('glob', 're', 'relglob', 'relre', 'set'): | ||
Matt Mackall
|
r8576 | return True | ||