##// END OF EJS Templates
branchmap: stop useless rev -> node -> rev round trip...
branchmap: stop useless rev -> node -> rev round trip We never use the node of new revisions unless in the very specific case of closed heads. So we can just use the revision number. So give another handfull of percent speedup.

File last commit:

r20033:f9628707 default
r20262:cf450ee3 default
Show More
match.py
358 lines | 10.9 KiB | text/x-python | PythonLexer
timeless
Generally replace "file name" with "filename" in help and comments.
r8761 # match.py - filename matching
Martin Geisler
match: add copyright and license header
r8231 #
# Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
#
# This software may be used and distributed according to the terms of the
Matt Mackall
Update license to GPLv2+
r10263 # GNU General Public License version 2 or any later version.
Martin Geisler
match: add copyright and license header
r8231
Alejandro Santos
split local and stdlib module imports (eases migration issues)
r9036 import re
Augie Fackler
pathutil: tease out a new library to break an import cycle from canonpath use
r20033 import util, fileset, pathutil
Martin Geisler
match: mark error messages for translation
r12133 from i18n import _
Matt Mackall
walk: introduce match objects
r6576
Bryan O'Sullivan
matcher: use re2 bindings if available...
r16943 def _rematcher(pat):
m = util.compilere(pat)
try:
# slightly faster, provided by facebook's re2 bindings
return m.test_match
except AttributeError:
return m.match
Matt Mackall
match: introduce basic fileset support
r14675 def _expandsets(pats, ctx):
'''convert set: patterns into a list of files in the given context'''
fset = set()
other = []
for kind, expr in pats:
if kind == 'set':
if not ctx:
raise util.Abort("fileset expression with no context")
s = fileset.getfileset(ctx, expr)
fset.update(s)
continue
other.append((kind, expr))
return fset, other
Matt Mackall
match: fold match into _match base class
r8587 class match(object):
Matt Mackall
match: add some default args
r8567 def __init__(self, root, cwd, patterns, include=[], exclude=[],
Matt Mackall
match: allow passing a context object to match core
r14674 default='glob', exact=False, auditor=None, ctx=None):
Matt Mackall
match: fold _matcher into match.__init__
r8581 """build an object to match a set of file patterns
arguments:
root - the canonical root of the tree you're matching against
cwd - the current working directory, if relevant
patterns - patterns to find
include - patterns to include
exclude - patterns to exclude
default - if a pattern in names has no explicit type, assume this one
Matt Mackall
match: add exact flag to match() to unify all match forms
r8586 exact - patterns are actually literals
Matt Mackall
match: fold _matcher into match.__init__
r8581
a pattern is one of:
'glob:<glob>' - a glob relative to cwd
're:<regexp>' - a regular expression
Mads Kiilerich
fix wording and not-completely-trivial spelling errors and bad docstrings
r17425 'path:<path>' - a path relative to repository root
Matt Mackall
match: fold _matcher into match.__init__
r8581 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
'relpath:<path>' - a path relative to cwd
Matt Mackall
match: fold match into _match base class
r8587 'relre:<regexp>' - a regexp that needn't match the start of a name
Matt Mackall
match: introduce basic fileset support
r14675 'set:<fileset>' - a fileset expression
Matt Mackall
match: fold match into _match base class
r8587 '<something>' - a pattern of the specified default type
Matt Mackall
match: fold _matcher into match.__init__
r8581 """
Matt Mackall
match: fold match into _match base class
r8587 self._root = root
self._cwd = cwd
self._files = []
self._anypats = bool(include or exclude)
Matt Mackall
match: allow passing a context object to match core
r14674 self._ctx = ctx
Bryan O'Sullivan
match: more accurately report when we're always going to match...
r18713 self._always = False
Matt Mackall
match: fold _matcher into match.__init__
r8581
Matt Mackall
match: add exact flag to match() to unify all match forms
r8586 if include:
jfh
add debugignore which yields the combined ignore patten of the .hgignore files...
r13396 pats = _normalize(include, 'glob', root, cwd, auditor)
Matt Mackall
match: introduce basic fileset support
r14675 self.includepat, im = _buildmatch(ctx, pats, '(?:/|$)')
Matt Mackall
match: add exact flag to match() to unify all match forms
r8586 if exclude:
jfh
add debugignore which yields the combined ignore patten of the .hgignore files...
r13396 pats = _normalize(exclude, 'glob', root, cwd, auditor)
Matt Mackall
match: introduce basic fileset support
r14675 self.excludepat, em = _buildmatch(ctx, pats, '(?:/|$)')
Matt Mackall
match: add exact flag to match() to unify all match forms
r8586 if exact:
FUJIWARA Katsunori
match: make 'match.files()' return list object always...
r16789 if isinstance(patterns, list):
self._files = patterns
else:
self._files = list(patterns)
Matt Mackall
match: add exact flag to match() to unify all match forms
r8586 pm = self.exact
elif patterns:
Martin Geisler
match: accept auditor argument...
r12163 pats = _normalize(patterns, default, root, cwd, auditor)
Matt Mackall
match: fold match into _match base class
r8587 self._files = _roots(pats)
self._anypats = self._anypats or _anypats(pats)
Matt Mackall
match: introduce basic fileset support
r14675 self.patternspat, pm = _buildmatch(ctx, pats, '$')
Matt Mackall
match: fold _matcher into match.__init__
r8581
Matt Mackall
match: add exact flag to match() to unify all match forms
r8586 if patterns or exact:
Matt Mackall
match: fold _matcher into match.__init__
r8581 if include:
if exclude:
m = lambda f: im(f) and not em(f) and pm(f)
else:
m = lambda f: im(f) and pm(f)
else:
if exclude:
m = lambda f: not em(f) and pm(f)
else:
m = pm
else:
if include:
if exclude:
m = lambda f: im(f) and not em(f)
else:
m = im
else:
if exclude:
m = lambda f: not em(f)
else:
m = lambda f: True
Bryan O'Sullivan
match: more accurately report when we're always going to match...
r18713 self._always = True
Matt Mackall
match: fold _matcher into match.__init__
r8581
Matt Mackall
match: fold match into _match base class
r8587 self.matchfn = m
self._fmap = set(self._files)
def __call__(self, fn):
return self.matchfn(fn)
def __iter__(self):
for f in self._files:
yield f
def bad(self, f, msg):
Matt Mackall
match: document bad callback semantics
r8678 '''callback for each explicit file that can't be
found/accessed, with an error message
'''
Matt Mackall
match: ignore return of match.bad...
r8680 pass
Siddharth Agarwal
match: add comments to explain explicitdir and traversedir
r19144 # If this is set, it will be called when an explicitly listed directory is
# visited.
Siddharth Agarwal
match: make explicitdir and traversedir None by default...
r19143 explicitdir = None
Siddharth Agarwal
match: add comments to explain explicitdir and traversedir
r19144 # If this is set, it will be called when a directory discovered by recursive
# traversal is visited.
Siddharth Agarwal
match: make explicitdir and traversedir None by default...
r19143 traversedir = None
Matt Mackall
match: fold match into _match base class
r8587 def missing(self, f):
pass
def exact(self, f):
return f in self._fmap
def rel(self, f):
return util.pathto(self._root, self._cwd, f)
def files(self):
return self._files
def anypats(self):
return self._anypats
Jesse Glick
localrepo: optimize internode status calls using match.always...
r16645 def always(self):
Bryan O'Sullivan
match: more accurately report when we're always going to match...
r18713 return self._always
Matt Mackall
match: refactor patkind...
r8568
Matt Mackall
match: add exact flag to match() to unify all match forms
r8586 class exact(match):
Matt Mackall
match: redefine always and never in terms of match and exact
r8585 def __init__(self, root, cwd, files):
Mads Kiilerich
check-code: check for spaces around = for named parameters
r19872 match.__init__(self, root, cwd, files, exact=True)
Matt Mackall
match: redefine always and never in terms of match and exact
r8585
class always(match):
def __init__(self, root, cwd):
match.__init__(self, root, cwd, [])
Bryan O'Sullivan
match: more accurately report when we're always going to match...
r18713 self._always = True
Matt Mackall
match: redefine always and never in terms of match and exact
r8585
Martin Geisler
match: add narrowmatcher class...
r12165 class narrowmatcher(match):
"""Adapt a matcher to work on a subdirectory only.
The paths are remapped to remove/insert the path as needed:
>>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
>>> m2 = narrowmatcher('sub', m1)
>>> bool(m2('a.txt'))
False
>>> bool(m2('b.txt'))
True
>>> bool(m2.matchfn('a.txt'))
False
>>> bool(m2.matchfn('b.txt'))
True
>>> m2.files()
['b.txt']
>>> m2.exact('b.txt')
True
Martin Geisler
narrowmatcher: fix broken rel method
r12267 >>> m2.rel('b.txt')
'b.txt'
Martin Geisler
narrowmatcher: propagate bad method...
r12268 >>> def bad(f, msg):
... print "%s: %s" % (f, msg)
>>> m1.bad = bad
>>> m2.bad('x.txt', 'No such file')
sub/x.txt: No such file
Martin Geisler
match: add narrowmatcher class...
r12165 """
def __init__(self, path, matcher):
Martin Geisler
narrowmatcher: fix broken rel method
r12267 self._root = matcher._root
self._cwd = matcher._cwd
Martin Geisler
match: add narrowmatcher class...
r12165 self._path = path
self._matcher = matcher
Bryan O'Sullivan
match: more accurately report when we're always going to match...
r18713 self._always = matcher._always
Martin Geisler
match: add narrowmatcher class...
r12165
self._files = [f[len(path) + 1:] for f in matcher._files
if f.startswith(path + "/")]
self._anypats = matcher._anypats
self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
self._fmap = set(self._files)
Martin Geisler
narrowmatcher: propagate bad method...
r12268 def bad(self, f, msg):
self._matcher.bad(self._path + "/" + f, msg)
Matt Mackall
match: refactor patkind...
r8568 def patkind(pat):
Matt Mackall
match: move util match functions over
r8570 return _patsplit(pat, None)[0]
def _patsplit(pat, default):
"""Split a string into an optional pattern kind prefix and the
actual pattern."""
Matt Mackall
match: optimize _patsplit
r8579 if ':' in pat:
Matt Mackall
match: fix _patsplit breakage with drive letters
r8613 kind, val = pat.split(':', 1)
Steve Borho
match: support reading pattern lists from files
r13218 if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
Matt Mackall
match: introduce basic fileset support
r14675 'listfile', 'listfile0', 'set'):
Matt Mackall
match: fix _patsplit breakage with drive letters
r8613 return kind, val
Matt Mackall
match: move util match functions over
r8570 return default, pat
Matt Mackall
match: remove head and tail args from _globre
r8582 def _globre(pat):
Matt Mackall
match: move util match functions over
r8570 "convert a glob pattern into a regexp"
i, n = 0, len(pat)
res = ''
group = 0
Matt Mackall
match: optimize escaping in _globre...
r8583 escape = re.escape
Matt Mackall
many, many trivial check-code fixups
r10282 def peek():
return i < n and pat[i]
Matt Mackall
match: move util match functions over
r8570 while i < n:
c = pat[i]
Matt Mackall
many, many trivial check-code fixups
r10282 i += 1
Matt Mackall
match: optimize escaping in _globre...
r8583 if c not in '*?[{},\\':
res += escape(c)
elif c == '*':
Matt Mackall
match: move util match functions over
r8570 if peek() == '*':
i += 1
res += '.*'
else:
res += '[^/]*'
elif c == '?':
res += '.'
elif c == '[':
j = i
if j < n and pat[j] in '!]':
j += 1
while j < n and pat[j] != ']':
j += 1
if j >= n:
res += '\\['
else:
stuff = pat[i:j].replace('\\','\\\\')
i = j + 1
if stuff[0] == '!':
stuff = '^' + stuff[1:]
elif stuff[0] == '^':
stuff = '\\' + stuff
res = '%s[%s]' % (res, stuff)
elif c == '{':
group += 1
res += '(?:'
elif c == '}' and group:
res += ')'
group -= 1
elif c == ',' and group:
res += '|'
elif c == '\\':
p = peek()
if p:
i += 1
Matt Mackall
match: optimize escaping in _globre...
r8583 res += escape(p)
Matt Mackall
match: move util match functions over
r8570 else:
Matt Mackall
match: optimize escaping in _globre...
r8583 res += escape(c)
Matt Mackall
match: move util match functions over
r8570 else:
Matt Mackall
match: optimize escaping in _globre...
r8583 res += escape(c)
Matt Mackall
match: remove head and tail args from _globre
r8582 return res
Matt Mackall
match: move util match functions over
r8570
Matt Mackall
match: unnest functions in _matcher
r8574 def _regex(kind, name, tail):
'''convert a pattern into a regular expression'''
if not name:
return ''
if kind == 're':
return name
elif kind == 'path':
return '^' + re.escape(name) + '(?:/|$)'
elif kind == 'relglob':
Matt Mackall
match: remove head and tail args from _globre
r8582 return '(?:|.*/)' + _globre(name) + tail
Matt Mackall
match: unnest functions in _matcher
r8574 elif kind == 'relpath':
return re.escape(name) + '(?:/|$)'
elif kind == 'relre':
if name.startswith('^'):
return name
return '.*' + name
Matt Mackall
match: remove head and tail args from _globre
r8582 return _globre(name) + tail
Matt Mackall
match: unnest functions in _matcher
r8574
Matt Mackall
match: introduce basic fileset support
r14675 def _buildmatch(ctx, pats, tail):
fset, pats = _expandsets(pats, ctx)
if not pats:
return "", fset.__contains__
pat, mf = _buildregexmatch(pats, tail)
if fset:
return pat, lambda f: f in fset or mf(f)
return pat, mf
def _buildregexmatch(pats, tail):
Matt Mackall
match: unnest functions in _matcher
r8574 """build a matching function from a set of patterns"""
try:
pat = '(?:%s)' % '|'.join([_regex(k, p, tail) for (k, p) in pats])
if len(pat) > 20000:
Brodie Rao
cleanup: "raise SomeException()" -> "raise SomeException"
r16687 raise OverflowError
Bryan O'Sullivan
matcher: use re2 bindings if available...
r16943 return pat, _rematcher(pat)
Matt Mackall
match: unnest functions in _matcher
r8574 except OverflowError:
# We're using a Python with a tiny regex engine and we
# made it explode, so we'll divide the pattern list in two
# until it works
l = len(pats)
if l < 2:
raise
Peter Arrenbrecht
match: fix bug caused by refactoring in cfc89398f710
r14722 pata, a = _buildregexmatch(pats[:l//2], tail)
patb, b = _buildregexmatch(pats[l//2:], tail)
jfh
add debugignore which yields the combined ignore patten of the .hgignore files...
r13396 return pat, lambda s: a(s) or b(s)
Matt Mackall
match: unnest functions in _matcher
r8574 except re.error:
for k, p in pats:
try:
Bryan O'Sullivan
matcher: use re2 bindings if available...
r16943 _rematcher('(?:%s)' % _regex(k, p, tail))
Matt Mackall
match: unnest functions in _matcher
r8574 except re.error:
Martin Geisler
match: mark error messages for translation
r12133 raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
raise util.Abort(_("invalid pattern"))
Matt Mackall
match: unnest functions in _matcher
r8574
Martin Geisler
match: accept auditor argument...
r12163 def _normalize(names, default, root, cwd, auditor):
Matt Mackall
match: unnest functions in _matcher
r8574 pats = []
for kind, name in [_patsplit(p, default) for p in names]:
if kind in ('glob', 'relpath'):
Augie Fackler
pathutil: tease out a new library to break an import cycle from canonpath use
r20033 name = pathutil.canonpath(root, cwd, name, auditor)
Matt Mackall
match: unnest functions in _matcher
r8574 elif kind in ('relglob', 'path'):
name = util.normpath(name)
Steve Borho
match: support reading pattern lists from files
r13218 elif kind in ('listfile', 'listfile0'):
try:
Patrick Mezard
match: make 'listfile:' split on LF and CRLF...
r14248 files = util.readfile(name)
if kind == 'listfile0':
files = files.split('\0')
else:
files = files.splitlines()
Steve Borho
match: support reading pattern lists from files
r13218 files = [f for f in files if f]
except EnvironmentError:
raise util.Abort(_("unable to read file list (%s)") % name)
pats += _normalize(files, default, root, cwd, auditor)
continue
Matt Mackall
match: unnest functions in _matcher
r8574
pats.append((kind, name))
Matt Mackall
match: split up _normalizepats
r8576 return pats
Matt Mackall
match: unnest functions in _matcher
r8574
Matt Mackall
match: split up _normalizepats
r8576 def _roots(patterns):
r = []
for kind, name in patterns:
Matt Mackall
match: fold _globprefix into _roots
r8584 if kind == 'glob': # find the non-glob prefix
root = []
for p in name.split('/'):
if '[' in p or '{' in p or '*' in p or '?' in p:
break
root.append(p)
r.append('/'.join(root) or '.')
Matt Mackall
match: unnest functions in _matcher
r8574 elif kind in ('relpath', 'path'):
Matt Mackall
match: split up _normalizepats
r8576 r.append(name or '.')
Mads Kiilerich
match: fix root calculation for combining regexps with simple paths...
r19107 else: # relglob, re, relre
Matt Mackall
match: split up _normalizepats
r8576 r.append('.')
return r
def _anypats(patterns):
for kind, name in patterns:
Patrick Mezard
match: consider filesets as "anypats"...
r16182 if kind in ('glob', 're', 'relglob', 'relre', 'set'):
Matt Mackall
match: split up _normalizepats
r8576 return True