match.py
248 lines
| 7.5 KiB
| text/x-python
|
PythonLexer
/ mercurial / match.py
Martin Geisler
|
r8231 | # match.py - file name matching | ||
# | ||||
# Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
# GNU General Public License version 2, incorporated herein by reference. | ||||
Matt Mackall
|
r8570 | import util, re | ||
Matt Mackall
|
r6576 | |||
Matt Mackall
|
r8587 | class match(object): | ||
Matt Mackall
|
r8567 | def __init__(self, root, cwd, patterns, include=[], exclude=[], | ||
Matt Mackall
|
r8586 | default='glob', exact=False): | ||
Matt Mackall
|
r8581 | """build an object to match a set of file patterns | ||
arguments: | ||||
root - the canonical root of the tree you're matching against | ||||
cwd - the current working directory, if relevant | ||||
patterns - patterns to find | ||||
include - patterns to include | ||||
exclude - patterns to exclude | ||||
default - if a pattern in names has no explicit type, assume this one | ||||
Matt Mackall
|
r8586 | exact - patterns are actually literals | ||
Matt Mackall
|
r8581 | |||
a pattern is one of: | ||||
'glob:<glob>' - a glob relative to cwd | ||||
're:<regexp>' - a regular expression | ||||
'path:<path>' - a path relative to canonroot | ||||
'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs) | ||||
'relpath:<path>' - a path relative to cwd | ||||
Matt Mackall
|
r8587 | 'relre:<regexp>' - a regexp that needn't match the start of a name | ||
'<something>' - a pattern of the specified default type | ||||
Matt Mackall
|
r8581 | """ | ||
Matt Mackall
|
r8587 | self._root = root | ||
self._cwd = cwd | ||||
self._files = [] | ||||
self._anypats = bool(include or exclude) | ||||
Matt Mackall
|
r8581 | |||
Matt Mackall
|
r8586 | if include: | ||
im = _buildmatch(_normalize(include, 'glob', root, cwd), '(?:/|$)') | ||||
if exclude: | ||||
em = _buildmatch(_normalize(exclude, 'glob', root, cwd), '(?:/|$)') | ||||
if exact: | ||||
Matt Mackall
|
r8587 | self._files = patterns | ||
Matt Mackall
|
r8586 | pm = self.exact | ||
elif patterns: | ||||
Matt Mackall
|
r8581 | pats = _normalize(patterns, default, root, cwd) | ||
Matt Mackall
|
r8587 | self._files = _roots(pats) | ||
self._anypats = self._anypats or _anypats(pats) | ||||
Matt Mackall
|
r8581 | pm = _buildmatch(pats, '$') | ||
Matt Mackall
|
r8586 | if patterns or exact: | ||
Matt Mackall
|
r8581 | if include: | ||
if exclude: | ||||
m = lambda f: im(f) and not em(f) and pm(f) | ||||
else: | ||||
m = lambda f: im(f) and pm(f) | ||||
else: | ||||
if exclude: | ||||
m = lambda f: not em(f) and pm(f) | ||||
else: | ||||
m = pm | ||||
else: | ||||
if include: | ||||
if exclude: | ||||
m = lambda f: im(f) and not em(f) | ||||
else: | ||||
m = im | ||||
else: | ||||
if exclude: | ||||
m = lambda f: not em(f) | ||||
else: | ||||
m = lambda f: True | ||||
Matt Mackall
|
r8587 | self.matchfn = m | ||
self._fmap = set(self._files) | ||||
def __call__(self, fn): | ||||
return self.matchfn(fn) | ||||
def __iter__(self): | ||||
for f in self._files: | ||||
yield f | ||||
def bad(self, f, msg): | ||||
Matt Mackall
|
r8678 | '''callback for each explicit file that can't be | ||
found/accessed, with an error message | ||||
''' | ||||
Matt Mackall
|
r8680 | pass | ||
Matt Mackall
|
r8587 | def dir(self, f): | ||
pass | ||||
def missing(self, f): | ||||
pass | ||||
def exact(self, f): | ||||
return f in self._fmap | ||||
def rel(self, f): | ||||
return util.pathto(self._root, self._cwd, f) | ||||
def files(self): | ||||
return self._files | ||||
def anypats(self): | ||||
return self._anypats | ||||
Matt Mackall
|
r8568 | |||
Matt Mackall
|
r8586 | class exact(match): | ||
Matt Mackall
|
r8585 | def __init__(self, root, cwd, files): | ||
Matt Mackall
|
r8586 | match.__init__(self, root, cwd, files, exact = True) | ||
Matt Mackall
|
r8585 | |||
class always(match): | ||||
def __init__(self, root, cwd): | ||||
match.__init__(self, root, cwd, []) | ||||
Matt Mackall
|
r8568 | def patkind(pat): | ||
Matt Mackall
|
r8570 | return _patsplit(pat, None)[0] | ||
def _patsplit(pat, default): | ||||
"""Split a string into an optional pattern kind prefix and the | ||||
actual pattern.""" | ||||
Matt Mackall
|
r8579 | if ':' in pat: | ||
Matt Mackall
|
r8613 | kind, val = pat.split(':', 1) | ||
if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre'): | ||||
return kind, val | ||||
Matt Mackall
|
r8570 | return default, pat | ||
Matt Mackall
|
r8582 | def _globre(pat): | ||
Matt Mackall
|
r8570 | "convert a glob pattern into a regexp" | ||
i, n = 0, len(pat) | ||||
res = '' | ||||
group = 0 | ||||
Matt Mackall
|
r8583 | escape = re.escape | ||
Matt Mackall
|
r8570 | def peek(): return i < n and pat[i] | ||
while i < n: | ||||
c = pat[i] | ||||
i = i+1 | ||||
Matt Mackall
|
r8583 | if c not in '*?[{},\\': | ||
res += escape(c) | ||||
elif c == '*': | ||||
Matt Mackall
|
r8570 | if peek() == '*': | ||
i += 1 | ||||
res += '.*' | ||||
else: | ||||
res += '[^/]*' | ||||
elif c == '?': | ||||
res += '.' | ||||
elif c == '[': | ||||
j = i | ||||
if j < n and pat[j] in '!]': | ||||
j += 1 | ||||
while j < n and pat[j] != ']': | ||||
j += 1 | ||||
if j >= n: | ||||
res += '\\[' | ||||
else: | ||||
stuff = pat[i:j].replace('\\','\\\\') | ||||
i = j + 1 | ||||
if stuff[0] == '!': | ||||
stuff = '^' + stuff[1:] | ||||
elif stuff[0] == '^': | ||||
stuff = '\\' + stuff | ||||
res = '%s[%s]' % (res, stuff) | ||||
elif c == '{': | ||||
group += 1 | ||||
res += '(?:' | ||||
elif c == '}' and group: | ||||
res += ')' | ||||
group -= 1 | ||||
elif c == ',' and group: | ||||
res += '|' | ||||
elif c == '\\': | ||||
p = peek() | ||||
if p: | ||||
i += 1 | ||||
Matt Mackall
|
r8583 | res += escape(p) | ||
Matt Mackall
|
r8570 | else: | ||
Matt Mackall
|
r8583 | res += escape(c) | ||
Matt Mackall
|
r8570 | else: | ||
Matt Mackall
|
r8583 | res += escape(c) | ||
Matt Mackall
|
r8582 | return res | ||
Matt Mackall
|
r8570 | |||
Matt Mackall
|
r8574 | def _regex(kind, name, tail): | ||
'''convert a pattern into a regular expression''' | ||||
if not name: | ||||
return '' | ||||
if kind == 're': | ||||
return name | ||||
elif kind == 'path': | ||||
return '^' + re.escape(name) + '(?:/|$)' | ||||
elif kind == 'relglob': | ||||
Matt Mackall
|
r8582 | return '(?:|.*/)' + _globre(name) + tail | ||
Matt Mackall
|
r8574 | elif kind == 'relpath': | ||
return re.escape(name) + '(?:/|$)' | ||||
elif kind == 'relre': | ||||
if name.startswith('^'): | ||||
return name | ||||
return '.*' + name | ||||
Matt Mackall
|
r8582 | return _globre(name) + tail | ||
Matt Mackall
|
r8574 | |||
Matt Mackall
|
r8580 | def _buildmatch(pats, tail): | ||
Matt Mackall
|
r8574 | """build a matching function from a set of patterns""" | ||
try: | ||||
pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats]) | ||||
if len(pat) > 20000: | ||||
raise OverflowError() | ||||
return re.compile(pat).match | ||||
except OverflowError: | ||||
# We're using a Python with a tiny regex engine and we | ||||
# made it explode, so we'll divide the pattern list in two | ||||
# until it works | ||||
l = len(pats) | ||||
if l < 2: | ||||
raise | ||||
Matt Mackall
|
r8580 | a, b = _buildmatch(pats[:l//2], tail), _buildmatch(pats[l//2:], tail) | ||
Matt Mackall
|
r8574 | return lambda s: a(s) or b(s) | ||
except re.error: | ||||
for k, p in pats: | ||||
try: | ||||
re.compile('(?:%s)' % _regex(k, p, tail)) | ||||
except re.error: | ||||
raise util.Abort("invalid pattern (%s): %s" % (k, p)) | ||||
raise util.Abort("invalid pattern") | ||||
Matt Mackall
|
r8578 | def _normalize(names, default, root, cwd): | ||
Matt Mackall
|
r8574 | pats = [] | ||
for kind, name in [_patsplit(p, default) for p in names]: | ||||
if kind in ('glob', 'relpath'): | ||||
Matt Mackall
|
r8578 | name = util.canonpath(root, cwd, name) | ||
Matt Mackall
|
r8574 | elif kind in ('relglob', 'path'): | ||
name = util.normpath(name) | ||||
pats.append((kind, name)) | ||||
Matt Mackall
|
r8576 | return pats | ||
Matt Mackall
|
r8574 | |||
Matt Mackall
|
r8576 | def _roots(patterns): | ||
r = [] | ||||
for kind, name in patterns: | ||||
Matt Mackall
|
r8584 | if kind == 'glob': # find the non-glob prefix | ||
root = [] | ||||
for p in name.split('/'): | ||||
if '[' in p or '{' in p or '*' in p or '?' in p: | ||||
break | ||||
root.append(p) | ||||
r.append('/'.join(root) or '.') | ||||
Matt Mackall
|
r8574 | elif kind in ('relpath', 'path'): | ||
Matt Mackall
|
r8576 | r.append(name or '.') | ||
Matt Mackall
|
r8574 | elif kind == 'relglob': | ||
Matt Mackall
|
r8576 | r.append('.') | ||
return r | ||||
def _anypats(patterns): | ||||
for kind, name in patterns: | ||||
if kind in ('glob', 're', 'relglob', 'relre'): | ||||
return True | ||||