##// END OF EJS Templates
tests: run check-code on Python files without .py extension
tests: run check-code on Python files without .py extension

File last commit:

r18713:8728579f default
r19022:cba222f0 default
Show More
match.py
354 lines | 10.7 KiB | text/x-python | PythonLexer
timeless
Generally replace "file name" with "filename" in help and comments.
r8761 # match.py - filename matching
Martin Geisler
match: add copyright and license header
r8231 #
# Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
#
# This software may be used and distributed according to the terms of the
Matt Mackall
Update license to GPLv2+
r10263 # GNU General Public License version 2 or any later version.
Martin Geisler
match: add copyright and license header
r8231
Alejandro Santos
split local and stdlib module imports (eases migration issues)
r9036 import re
Matt Mackall
match: introduce basic fileset support
r14675 import scmutil, util, fileset
Martin Geisler
match: mark error messages for translation
r12133 from i18n import _
Matt Mackall
walk: introduce match objects
r6576
Bryan O'Sullivan
matcher: use re2 bindings if available...
r16943 def _rematcher(pat):
m = util.compilere(pat)
try:
# slightly faster, provided by facebook's re2 bindings
return m.test_match
except AttributeError:
return m.match
Matt Mackall
match: introduce basic fileset support
r14675 def _expandsets(pats, ctx):
'''convert set: patterns into a list of files in the given context'''
fset = set()
other = []
for kind, expr in pats:
if kind == 'set':
if not ctx:
raise util.Abort("fileset expression with no context")
s = fileset.getfileset(ctx, expr)
fset.update(s)
continue
other.append((kind, expr))
return fset, other
Matt Mackall
match: fold match into _match base class
r8587 class match(object):
Matt Mackall
match: add some default args
r8567 def __init__(self, root, cwd, patterns, include=[], exclude=[],
Matt Mackall
match: allow passing a context object to match core
r14674 default='glob', exact=False, auditor=None, ctx=None):
Matt Mackall
match: fold _matcher into match.__init__
r8581 """build an object to match a set of file patterns
arguments:
root - the canonical root of the tree you're matching against
cwd - the current working directory, if relevant
patterns - patterns to find
include - patterns to include
exclude - patterns to exclude
default - if a pattern in names has no explicit type, assume this one
Matt Mackall
match: add exact flag to match() to unify all match forms
r8586 exact - patterns are actually literals
Matt Mackall
match: fold _matcher into match.__init__
r8581
a pattern is one of:
'glob:<glob>' - a glob relative to cwd
're:<regexp>' - a regular expression
Mads Kiilerich
fix wording and not-completely-trivial spelling errors and bad docstrings
r17425 'path:<path>' - a path relative to repository root
Matt Mackall
match: fold _matcher into match.__init__
r8581 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
'relpath:<path>' - a path relative to cwd
Matt Mackall
match: fold match into _match base class
r8587 'relre:<regexp>' - a regexp that needn't match the start of a name
Matt Mackall
match: introduce basic fileset support
r14675 'set:<fileset>' - a fileset expression
Matt Mackall
match: fold match into _match base class
r8587 '<something>' - a pattern of the specified default type
Matt Mackall
match: fold _matcher into match.__init__
r8581 """
Matt Mackall
match: fold match into _match base class
r8587 self._root = root
self._cwd = cwd
self._files = []
self._anypats = bool(include or exclude)
Matt Mackall
match: allow passing a context object to match core
r14674 self._ctx = ctx
Bryan O'Sullivan
match: more accurately report when we're always going to match...
r18713 self._always = False
Matt Mackall
match: fold _matcher into match.__init__
r8581
Matt Mackall
match: add exact flag to match() to unify all match forms
r8586 if include:
jfh
add debugignore which yields the combined ignore patten of the .hgignore files...
r13396 pats = _normalize(include, 'glob', root, cwd, auditor)
Matt Mackall
match: introduce basic fileset support
r14675 self.includepat, im = _buildmatch(ctx, pats, '(?:/|$)')
Matt Mackall
match: add exact flag to match() to unify all match forms
r8586 if exclude:
jfh
add debugignore which yields the combined ignore patten of the .hgignore files...
r13396 pats = _normalize(exclude, 'glob', root, cwd, auditor)
Matt Mackall
match: introduce basic fileset support
r14675 self.excludepat, em = _buildmatch(ctx, pats, '(?:/|$)')
Matt Mackall
match: add exact flag to match() to unify all match forms
r8586 if exact:
FUJIWARA Katsunori
match: make 'match.files()' return list object always...
r16789 if isinstance(patterns, list):
self._files = patterns
else:
self._files = list(patterns)
Matt Mackall
match: add exact flag to match() to unify all match forms
r8586 pm = self.exact
elif patterns:
Martin Geisler
match: accept auditor argument...
r12163 pats = _normalize(patterns, default, root, cwd, auditor)
Matt Mackall
match: fold match into _match base class
r8587 self._files = _roots(pats)
self._anypats = self._anypats or _anypats(pats)
Matt Mackall
match: introduce basic fileset support
r14675 self.patternspat, pm = _buildmatch(ctx, pats, '$')
Matt Mackall
match: fold _matcher into match.__init__
r8581
Matt Mackall
match: add exact flag to match() to unify all match forms
r8586 if patterns or exact:
Matt Mackall
match: fold _matcher into match.__init__
r8581 if include:
if exclude:
m = lambda f: im(f) and not em(f) and pm(f)
else:
m = lambda f: im(f) and pm(f)
else:
if exclude:
m = lambda f: not em(f) and pm(f)
else:
m = pm
else:
if include:
if exclude:
m = lambda f: im(f) and not em(f)
else:
m = im
else:
if exclude:
m = lambda f: not em(f)
else:
m = lambda f: True
Bryan O'Sullivan
match: more accurately report when we're always going to match...
r18713 self._always = True
Matt Mackall
match: fold _matcher into match.__init__
r8581
Matt Mackall
match: fold match into _match base class
r8587 self.matchfn = m
self._fmap = set(self._files)
def __call__(self, fn):
return self.matchfn(fn)
def __iter__(self):
for f in self._files:
yield f
def bad(self, f, msg):
Matt Mackall
match: document bad callback semantics
r8678 '''callback for each explicit file that can't be
found/accessed, with an error message
'''
Matt Mackall
match: ignore return of match.bad...
r8680 pass
Matt Mackall
match: fold match into _match base class
r8587 def dir(self, f):
pass
def missing(self, f):
pass
def exact(self, f):
return f in self._fmap
def rel(self, f):
return util.pathto(self._root, self._cwd, f)
def files(self):
return self._files
def anypats(self):
return self._anypats
Jesse Glick
localrepo: optimize internode status calls using match.always...
r16645 def always(self):
Bryan O'Sullivan
match: more accurately report when we're always going to match...
r18713 return self._always
Matt Mackall
match: refactor patkind...
r8568
Matt Mackall
match: add exact flag to match() to unify all match forms
r8586 class exact(match):
Matt Mackall
match: redefine always and never in terms of match and exact
r8585 def __init__(self, root, cwd, files):
Matt Mackall
match: add exact flag to match() to unify all match forms
r8586 match.__init__(self, root, cwd, files, exact = True)
Matt Mackall
match: redefine always and never in terms of match and exact
r8585
class always(match):
def __init__(self, root, cwd):
match.__init__(self, root, cwd, [])
Bryan O'Sullivan
match: more accurately report when we're always going to match...
r18713 self._always = True
Matt Mackall
match: redefine always and never in terms of match and exact
r8585
Martin Geisler
match: add narrowmatcher class...
r12165 class narrowmatcher(match):
"""Adapt a matcher to work on a subdirectory only.
The paths are remapped to remove/insert the path as needed:
>>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
>>> m2 = narrowmatcher('sub', m1)
>>> bool(m2('a.txt'))
False
>>> bool(m2('b.txt'))
True
>>> bool(m2.matchfn('a.txt'))
False
>>> bool(m2.matchfn('b.txt'))
True
>>> m2.files()
['b.txt']
>>> m2.exact('b.txt')
True
Martin Geisler
narrowmatcher: fix broken rel method
r12267 >>> m2.rel('b.txt')
'b.txt'
Martin Geisler
narrowmatcher: propagate bad method...
r12268 >>> def bad(f, msg):
... print "%s: %s" % (f, msg)
>>> m1.bad = bad
>>> m2.bad('x.txt', 'No such file')
sub/x.txt: No such file
Martin Geisler
match: add narrowmatcher class...
r12165 """
def __init__(self, path, matcher):
Martin Geisler
narrowmatcher: fix broken rel method
r12267 self._root = matcher._root
self._cwd = matcher._cwd
Martin Geisler
match: add narrowmatcher class...
r12165 self._path = path
self._matcher = matcher
Bryan O'Sullivan
match: more accurately report when we're always going to match...
r18713 self._always = matcher._always
Martin Geisler
match: add narrowmatcher class...
r12165
self._files = [f[len(path) + 1:] for f in matcher._files
if f.startswith(path + "/")]
self._anypats = matcher._anypats
self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
self._fmap = set(self._files)
Martin Geisler
narrowmatcher: propagate bad method...
r12268 def bad(self, f, msg):
self._matcher.bad(self._path + "/" + f, msg)
Matt Mackall
match: refactor patkind...
r8568 def patkind(pat):
Matt Mackall
match: move util match functions over
r8570 return _patsplit(pat, None)[0]
def _patsplit(pat, default):
"""Split a string into an optional pattern kind prefix and the
actual pattern."""
Matt Mackall
match: optimize _patsplit
r8579 if ':' in pat:
Matt Mackall
match: fix _patsplit breakage with drive letters
r8613 kind, val = pat.split(':', 1)
Steve Borho
match: support reading pattern lists from files
r13218 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
Matt Mackall
match: introduce basic fileset support
r14675 'listfile', 'listfile0', 'set'):
Matt Mackall
match: fix _patsplit breakage with drive letters
r8613 return kind, val
Matt Mackall
match: move util match functions over
r8570 return default, pat
Matt Mackall
match: remove head and tail args from _globre
r8582 def _globre(pat):
Matt Mackall
match: move util match functions over
r8570 "convert a glob pattern into a regexp"
i, n = 0, len(pat)
res = ''
group = 0
Matt Mackall
match: optimize escaping in _globre...
r8583 escape = re.escape
Matt Mackall
many, many trivial check-code fixups
r10282 def peek():
return i < n and pat[i]
Matt Mackall
match: move util match functions over
r8570 while i < n:
c = pat[i]
Matt Mackall
many, many trivial check-code fixups
r10282 i += 1
Matt Mackall
match: optimize escaping in _globre...
r8583 if c not in '*?[{},\\':
res += escape(c)
elif c == '*':
Matt Mackall
match: move util match functions over
r8570 if peek() == '*':
i += 1
res += '.*'
else:
res += '[^/]*'
elif c == '?':
res += '.'
elif c == '[':
j = i
if j < n and pat[j] in '!]':
j += 1
while j < n and pat[j] != ']':
j += 1
if j >= n:
res += '\\['
else:
stuff = pat[i:j].replace('\\','\\\\')
i = j + 1
if stuff[0] == '!':
stuff = '^' + stuff[1:]
elif stuff[0] == '^':
stuff = '\\' + stuff
res = '%s[%s]' % (res, stuff)
elif c == '{':
group += 1
res += '(?:'
elif c == '}' and group:
res += ')'
group -= 1
elif c == ',' and group:
res += '|'
elif c == '\\':
p = peek()
if p:
i += 1
Matt Mackall
match: optimize escaping in _globre...
r8583 res += escape(p)
Matt Mackall
match: move util match functions over
r8570 else:
Matt Mackall
match: optimize escaping in _globre...
r8583 res += escape(c)
Matt Mackall
match: move util match functions over
r8570 else:
Matt Mackall
match: optimize escaping in _globre...
r8583 res += escape(c)
Matt Mackall
match: remove head and tail args from _globre
r8582 return res
Matt Mackall
match: move util match functions over
r8570
Matt Mackall
match: unnest functions in _matcher
r8574 def _regex(kind, name, tail):
'''convert a pattern into a regular expression'''
if not name:
return ''
if kind == 're':
return name
elif kind == 'path':
return '^' + re.escape(name) + '(?:/|$)'
elif kind == 'relglob':
Matt Mackall
match: remove head and tail args from _globre
r8582 return '(?:|.*/)' + _globre(name) + tail
Matt Mackall
match: unnest functions in _matcher
r8574 elif kind == 'relpath':
return re.escape(name) + '(?:/|$)'
elif kind == 'relre':
if name.startswith('^'):
return name
return '.*' + name
Matt Mackall
match: remove head and tail args from _globre
r8582 return _globre(name) + tail
Matt Mackall
match: unnest functions in _matcher
r8574
Matt Mackall
match: introduce basic fileset support
r14675 def _buildmatch(ctx, pats, tail):
fset, pats = _expandsets(pats, ctx)
if not pats:
return "", fset.__contains__
pat, mf = _buildregexmatch(pats, tail)
if fset:
return pat, lambda f: f in fset or mf(f)
return pat, mf
def _buildregexmatch(pats, tail):
Matt Mackall
match: unnest functions in _matcher
r8574 """build a matching function from a set of patterns"""
try:
pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
if len(pat) > 20000:
Brodie Rao
cleanup: "raise SomeException()" -> "raise SomeException"
r16687 raise OverflowError
Bryan O'Sullivan
matcher: use re2 bindings if available...
r16943 return pat, _rematcher(pat)
Matt Mackall
match: unnest functions in _matcher
r8574 except OverflowError:
# We're using a Python with a tiny regex engine and we
# made it explode, so we'll divide the pattern list in two
# until it works
l = len(pats)
if l < 2:
raise
Peter Arrenbrecht
match: fix bug caused by refactoring in cfc89398f710
r14722 pata, a = _buildregexmatch(pats[:l//2], tail)
patb, b = _buildregexmatch(pats[l//2:], tail)
jfh
add debugignore which yields the combined ignore patten of the .hgignore files...
r13396 return pat, lambda s: a(s) or b(s)
Matt Mackall
match: unnest functions in _matcher
r8574 except re.error:
for k, p in pats:
try:
Bryan O'Sullivan
matcher: use re2 bindings if available...
r16943 _rematcher('(?:%s)' % _regex(k, p, tail))
Matt Mackall
match: unnest functions in _matcher
r8574 except re.error:
Martin Geisler
match: mark error messages for translation
r12133 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
raise util.Abort(_("invalid pattern"))
Matt Mackall
match: unnest functions in _matcher
r8574
Martin Geisler
match: accept auditor argument...
r12163 def _normalize(names, default, root, cwd, auditor):
Matt Mackall
match: unnest functions in _matcher
r8574 pats = []
for kind, name in [_patsplit(p, default) for p in names]:
if kind in ('glob', 'relpath'):
Adrian Buehlmann
move canonpath from util to scmutil
r13971 name = scmutil.canonpath(root, cwd, name, auditor)
Matt Mackall
match: unnest functions in _matcher
r8574 elif kind in ('relglob', 'path'):
name = util.normpath(name)
Steve Borho
match: support reading pattern lists from files
r13218 elif kind in ('listfile', 'listfile0'):
try:
Patrick Mezard
match: make 'listfile:' split on LF and CRLF...
r14248 files = util.readfile(name)
if kind == 'listfile0':
files = files.split('\0')
else:
files = files.splitlines()
Steve Borho
match: support reading pattern lists from files
r13218 files = [f for f in files if f]
except EnvironmentError:
raise util.Abort(_("unable to read file list (%s)") % name)
pats += _normalize(files, default, root, cwd, auditor)
continue
Matt Mackall
match: unnest functions in _matcher
r8574
pats.append((kind, name))
Matt Mackall
match: split up _normalizepats
r8576 return pats
Matt Mackall
match: unnest functions in _matcher
r8574
Matt Mackall
match: split up _normalizepats
r8576 def _roots(patterns):
r = []
for kind, name in patterns:
Matt Mackall
match: fold _globprefix into _roots
r8584 if kind == 'glob': # find the non-glob prefix
root = []
for p in name.split('/'):
if '[' in p or '{' in p or '*' in p or '?' in p:
break
root.append(p)
r.append('/'.join(root) or '.')
Matt Mackall
match: unnest functions in _matcher
r8574 elif kind in ('relpath', 'path'):
Matt Mackall
match: split up _normalizepats
r8576 r.append(name or '.')
Matt Mackall
match: unnest functions in _matcher
r8574 elif kind == 'relglob':
Matt Mackall
match: split up _normalizepats
r8576 r.append('.')
return r
def _anypats(patterns):
for kind, name in patterns:
Patrick Mezard
match: consider filesets as "anypats"...
r16182 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
Matt Mackall
match: split up _normalizepats
r8576 return True