match.py
656 lines
| 22.7 KiB
| text/x-python
|
PythonLexer
/ mercurial / match.py
timeless
|
r8761 | # match.py - filename matching | ||
Martin Geisler
|
r8231 | # | ||
# Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Martin Geisler
|
r8231 | |||
Alejandro Santos
|
r9036 | import re | ||
Augie Fackler
|
r20401 | import util, pathutil | ||
Martin Geisler
|
r12133 | from i18n import _ | ||
Matt Mackall
|
r6576 | |||
Drew Gottlieb
|
r24636 | propertycache = util.propertycache | ||
Mads Kiilerich
|
r21111 | def _rematcher(regex): | ||
'''compile the regexp with the best available regexp engine and return a | ||||
matcher function''' | ||||
Siddharth Agarwal
|
r21909 | m = util.re.compile(regex) | ||
Bryan O'Sullivan
|
r16943 | try: | ||
# slightly faster, provided by facebook's re2 bindings | ||||
return m.test_match | ||||
except AttributeError: | ||||
return m.match | ||||
Matt Harbison
|
r25122 | def _expandsets(kindpats, ctx, listsubrepos): | ||
Mads Kiilerich
|
r21111 | '''Returns the kindpats list with the 'set' patterns expanded.''' | ||
Matt Mackall
|
r14675 | fset = set() | ||
other = [] | ||||
Durham Goode
|
r25213 | for kind, pat, source in kindpats: | ||
Matt Mackall
|
r14675 | if kind == 'set': | ||
if not ctx: | ||||
raise util.Abort("fileset expression with no context") | ||||
Mads Kiilerich
|
r21111 | s = ctx.getfileset(pat) | ||
Matt Mackall
|
r14675 | fset.update(s) | ||
Matt Harbison
|
r25122 | |||
if listsubrepos: | ||||
for subpath in ctx.substate: | ||||
s = ctx.sub(subpath).getfileset(pat) | ||||
fset.update(subpath + '/' + f for f in s) | ||||
Matt Mackall
|
r14675 | continue | ||
Durham Goode
|
r25213 | other.append((kind, pat, source)) | ||
Matt Mackall
|
r14675 | return fset, other | ||
Durham Goode
|
r25283 | def _expandsubinclude(kindpats, root): | ||
'''Returns the list of subinclude matchers and the kindpats without the | ||||
subincludes in it.''' | ||||
relmatchers = [] | ||||
other = [] | ||||
for kind, pat, source in kindpats: | ||||
if kind == 'subinclude': | ||||
Matt Harbison
|
r25301 | sourceroot = pathutil.dirname(util.normpath(source)) | ||
Durham Goode
|
r25283 | pat = util.pconvert(pat) | ||
path = pathutil.join(sourceroot, pat) | ||||
newroot = pathutil.dirname(path) | ||||
relmatcher = match(newroot, '', [], ['include:%s' % path]) | ||||
prefix = pathutil.canonpath(root, root, newroot) | ||||
if prefix: | ||||
prefix += '/' | ||||
relmatchers.append((prefix, relmatcher)) | ||||
else: | ||||
other.append((kind, pat, source)) | ||||
return relmatchers, other | ||||
Martin von Zweigbergk
|
r24447 | def _kindpatsalwaysmatch(kindpats): | ||
""""Checks whether the kindspats match everything, as e.g. | ||||
'relpath:.' does. | ||||
""" | ||||
Durham Goode
|
r25213 | for kind, pat, source in kindpats: | ||
Martin von Zweigbergk
|
r24447 | if pat != '' or kind not in ['relpath', 'glob']: | ||
return False | ||||
return True | ||||
Matt Mackall
|
r8587 | class match(object): | ||
Matt Mackall
|
r8567 | def __init__(self, root, cwd, patterns, include=[], exclude=[], | ||
Matt Harbison
|
r25122 | default='glob', exact=False, auditor=None, ctx=None, | ||
Durham Goode
|
r25214 | listsubrepos=False, warn=None): | ||
Matt Mackall
|
r8581 | """build an object to match a set of file patterns | ||
arguments: | ||||
root - the canonical root of the tree you're matching against | ||||
cwd - the current working directory, if relevant | ||||
patterns - patterns to find | ||||
Mads Kiilerich
|
r21111 | include - patterns to include (unless they are excluded) | ||
exclude - patterns to exclude (even if they are included) | ||||
default - if a pattern in patterns has no explicit type, assume this one | ||||
exact - patterns are actually filenames (include/exclude still apply) | ||||
Durham Goode
|
r25214 | warn - optional function used for printing warnings | ||
Matt Mackall
|
r8581 | |||
a pattern is one of: | ||||
'glob:<glob>' - a glob relative to cwd | ||||
're:<regexp>' - a regular expression | ||||
Mads Kiilerich
|
r17425 | 'path:<path>' - a path relative to repository root | ||
Matt Mackall
|
r8581 | 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs) | ||
'relpath:<path>' - a path relative to cwd | ||||
Matt Mackall
|
r8587 | 'relre:<regexp>' - a regexp that needn't match the start of a name | ||
Matt Mackall
|
r14675 | 'set:<fileset>' - a fileset expression | ||
Durham Goode
|
r25215 | 'include:<path>' - a file of patterns to read and include | ||
Durham Goode
|
r25283 | 'subinclude:<path>' - a file of patterns to match against files under | ||
the same directory | ||||
Matt Mackall
|
r8587 | '<something>' - a pattern of the specified default type | ||
Matt Mackall
|
r8581 | """ | ||
Matt Mackall
|
r8587 | self._root = root | ||
self._cwd = cwd | ||||
Mads Kiilerich
|
r21079 | self._files = [] # exact files and roots of patterns | ||
Matt Mackall
|
r8587 | self._anypats = bool(include or exclude) | ||
Bryan O'Sullivan
|
r18713 | self._always = False | ||
Matt Harbison
|
r23480 | self._pathrestricted = bool(include or exclude or patterns) | ||
Durham Goode
|
r25214 | self._warn = warn | ||
Drew Gottlieb
|
r25231 | self._includeroots = set() | ||
self._includedirs = set(['.']) | ||||
self._excluderoots = set() | ||||
Matt Mackall
|
r8581 | |||
Martin von Zweigbergk
|
r22513 | matchfns = [] | ||
Matt Mackall
|
r8586 | if include: | ||
Matt Harbison
|
r24789 | kindpats = self._normalize(include, 'glob', root, cwd, auditor) | ||
Matt Harbison
|
r25122 | self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)', | ||
Durham Goode
|
r25238 | listsubrepos, root) | ||
Drew Gottlieb
|
r25231 | self._includeroots.update(_roots(kindpats)) | ||
self._includeroots.discard('.') | ||||
self._includedirs.update(util.dirs(self._includeroots)) | ||||
Martin von Zweigbergk
|
r22513 | matchfns.append(im) | ||
Matt Mackall
|
r8586 | if exclude: | ||
Matt Harbison
|
r24789 | kindpats = self._normalize(exclude, 'glob', root, cwd, auditor) | ||
Matt Harbison
|
r25122 | self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)', | ||
Durham Goode
|
r25238 | listsubrepos, root) | ||
Martin von Zweigbergk
|
r25362 | if not _anypats(kindpats): | ||
self._excluderoots.update(_roots(kindpats)) | ||||
self._excluderoots.discard('.') | ||||
Martin von Zweigbergk
|
r22513 | matchfns.append(lambda f: not em(f)) | ||
Matt Mackall
|
r8586 | if exact: | ||
FUJIWARA Katsunori
|
r16789 | if isinstance(patterns, list): | ||
self._files = patterns | ||||
else: | ||||
self._files = list(patterns) | ||||
Martin von Zweigbergk
|
r22513 | matchfns.append(self.exact) | ||
Matt Mackall
|
r8586 | elif patterns: | ||
Matt Harbison
|
r24789 | kindpats = self._normalize(patterns, default, root, cwd, auditor) | ||
Martin von Zweigbergk
|
r24447 | if not _kindpatsalwaysmatch(kindpats): | ||
self._files = _roots(kindpats) | ||||
self._anypats = self._anypats or _anypats(kindpats) | ||||
Matt Harbison
|
r25122 | self.patternspat, pm = _buildmatch(ctx, kindpats, '$', | ||
Durham Goode
|
r25238 | listsubrepos, root) | ||
Martin von Zweigbergk
|
r24447 | matchfns.append(pm) | ||
Matt Mackall
|
r8581 | |||
Martin von Zweigbergk
|
r22513 | if not matchfns: | ||
m = util.always | ||||
self._always = True | ||||
elif len(matchfns) == 1: | ||||
m = matchfns[0] | ||||
Matt Mackall
|
r8581 | else: | ||
Martin von Zweigbergk
|
r22513 | def m(f): | ||
for matchfn in matchfns: | ||||
if not matchfn(f): | ||||
return False | ||||
return True | ||||
Matt Mackall
|
r8581 | |||
Matt Mackall
|
r8587 | self.matchfn = m | ||
Drew Gottlieb
|
r25189 | self._fileroots = set(self._files) | ||
Matt Mackall
|
r8587 | |||
def __call__(self, fn): | ||||
return self.matchfn(fn) | ||||
def __iter__(self): | ||||
for f in self._files: | ||||
yield f | ||||
Mads Kiilerich
|
r21111 | |||
# Callbacks related to how the matcher is used by dirstate.walk. | ||||
# Subscribers to these events must monkeypatch the matcher object. | ||||
Matt Mackall
|
r8587 | def bad(self, f, msg): | ||
Mads Kiilerich
|
r21111 | '''Callback from dirstate.walk for each explicit file that can't be | ||
found/accessed, with an error message.''' | ||||
Matt Mackall
|
r8680 | pass | ||
Mads Kiilerich
|
r21111 | |||
# If an explicitdir is set, it will be called when an explicitly listed | ||||
# directory is visited. | ||||
Siddharth Agarwal
|
r19143 | explicitdir = None | ||
Mads Kiilerich
|
r21111 | |||
# If an traversedir is set, it will be called when a directory discovered | ||||
# by recursive traversal is visited. | ||||
Siddharth Agarwal
|
r19143 | traversedir = None | ||
Mads Kiilerich
|
r21111 | |||
Matt Harbison
|
r23685 | def abs(self, f): | ||
'''Convert a repo path back to path that is relative to the root of the | ||||
matcher.''' | ||||
return f | ||||
Matt Mackall
|
r8587 | def rel(self, f): | ||
Mads Kiilerich
|
r21111 | '''Convert repo path back to path that is relative to cwd of matcher.''' | ||
Matt Mackall
|
r8587 | return util.pathto(self._root, self._cwd, f) | ||
Mads Kiilerich
|
r21111 | |||
Matt Harbison
|
r23480 | def uipath(self, f): | ||
'''Convert repo path to a display path. If patterns or -I/-X were used | ||||
to create this matcher, the display path will be relative to cwd. | ||||
Otherwise it is relative to the root of the repo.''' | ||||
Matt Harbison
|
r23686 | return (self._pathrestricted and self.rel(f)) or self.abs(f) | ||
Matt Harbison
|
r23480 | |||
Matt Mackall
|
r8587 | def files(self): | ||
Mads Kiilerich
|
r21111 | '''Explicitly listed files or patterns or roots: | ||
if no patterns or .always(): empty list, | ||||
if exact: list exact files, | ||||
if not .anypats(): list all files and dirs, | ||||
else: optimal roots''' | ||||
Matt Mackall
|
r8587 | return self._files | ||
Mads Kiilerich
|
r21111 | |||
Drew Gottlieb
|
r24636 | @propertycache | ||
def _dirs(self): | ||||
Drew Gottlieb
|
r25189 | return set(util.dirs(self._fileroots)) | set(['.']) | ||
Drew Gottlieb
|
r24636 | |||
def visitdir(self, dir): | ||||
Drew Gottlieb
|
r25231 | '''Decides whether a directory should be visited based on whether it | ||
has potential matches in it or one of its subdirectories. This is | ||||
based on the match's primary, included, and excluded patterns. | ||||
This function's behavior is undefined if it has returned False for | ||||
one of the dir's parent directories. | ||||
''' | ||||
if dir in self._excluderoots: | ||||
return False | ||||
parentdirs = None | ||||
if (self._includeroots and dir not in self._includeroots and | ||||
dir not in self._includedirs): | ||||
Drew Gottlieb
|
r25250 | parentdirs = list(util.finddirs(dir)) | ||
Drew Gottlieb
|
r25231 | if not any(parent in self._includeroots for parent in parentdirs): | ||
return False | ||||
Drew Gottlieb
|
r25189 | return (not self._fileroots or '.' in self._fileroots or | ||
dir in self._fileroots or dir in self._dirs or | ||||
any(parentdir in self._fileroots | ||||
Drew Gottlieb
|
r25231 | for parentdir in parentdirs or util.finddirs(dir))) | ||
Drew Gottlieb
|
r24636 | |||
Mads Kiilerich
|
r21111 | def exact(self, f): | ||
'''Returns True if f is in .files().''' | ||||
Drew Gottlieb
|
r25189 | return f in self._fileroots | ||
Mads Kiilerich
|
r21111 | |||
Matt Mackall
|
r8587 | def anypats(self): | ||
Mads Kiilerich
|
r21111 | '''Matcher uses patterns or include/exclude.''' | ||
Matt Mackall
|
r8587 | return self._anypats | ||
Mads Kiilerich
|
r21111 | |||
Jesse Glick
|
r16645 | def always(self): | ||
Mads Kiilerich
|
r21111 | '''Matcher will match everything and .files() will be empty | ||
- optimization might be possible and necessary.''' | ||||
Bryan O'Sullivan
|
r18713 | return self._always | ||
Matt Mackall
|
r8568 | |||
Drew Gottlieb
|
r25114 | def ispartial(self): | ||
'''True if the matcher won't always match. | ||||
Although it's just the inverse of _always in this implementation, | ||||
an extenion such as narrowhg might make it return something | ||||
slightly different.''' | ||||
return not self._always | ||||
Martin von Zweigbergk
|
r24448 | def isexact(self): | ||
return self.matchfn == self.exact | ||||
Martin von Zweigbergk
|
r25233 | def prefix(self): | ||
return not self.always() and not self.isexact() and not self.anypats() | ||||
Matt Harbison
|
r24789 | def _normalize(self, patterns, default, root, cwd, auditor): | ||
'''Convert 'kind:pat' from the patterns list to tuples with kind and | ||||
normalized and rooted patterns and with listfiles expanded.''' | ||||
kindpats = [] | ||||
for kind, pat in [_patsplit(p, default) for p in patterns]: | ||||
if kind in ('glob', 'relpath'): | ||||
pat = pathutil.canonpath(root, cwd, pat, auditor) | ||||
elif kind in ('relglob', 'path'): | ||||
pat = util.normpath(pat) | ||||
elif kind in ('listfile', 'listfile0'): | ||||
try: | ||||
files = util.readfile(pat) | ||||
if kind == 'listfile0': | ||||
files = files.split('\0') | ||||
else: | ||||
files = files.splitlines() | ||||
files = [f for f in files if f] | ||||
except EnvironmentError: | ||||
raise util.Abort(_("unable to read file list (%s)") % pat) | ||||
Durham Goode
|
r25213 | for k, p, source in self._normalize(files, default, root, cwd, | ||
auditor): | ||||
kindpats.append((k, p, pat)) | ||||
Matt Harbison
|
r24789 | continue | ||
Durham Goode
|
r25215 | elif kind == 'include': | ||
try: | ||||
includepats = readpatternfile(pat, self._warn) | ||||
for k, p, source in self._normalize(includepats, default, | ||||
root, cwd, auditor): | ||||
kindpats.append((k, p, source or pat)) | ||||
except util.Abort, inst: | ||||
raise util.Abort('%s: %s' % (pat, inst[0])) | ||||
except IOError, inst: | ||||
if self._warn: | ||||
self._warn(_("skipping unreadable pattern file " | ||||
"'%s': %s\n") % (pat, inst.strerror)) | ||||
continue | ||||
Matt Harbison
|
r24789 | # else: re or relre - which cannot be normalized | ||
Durham Goode
|
r25213 | kindpats.append((kind, pat, '')) | ||
Matt Harbison
|
r24789 | return kindpats | ||
Martin von Zweigbergk
|
r23549 | def exact(root, cwd, files): | ||
return match(root, cwd, files, exact=True) | ||||
Matt Mackall
|
r8585 | |||
Martin von Zweigbergk
|
r23549 | def always(root, cwd): | ||
return match(root, cwd, []) | ||||
Matt Mackall
|
r8585 | |||
Martin Geisler
|
r12165 | class narrowmatcher(match): | ||
"""Adapt a matcher to work on a subdirectory only. | ||||
The paths are remapped to remove/insert the path as needed: | ||||
>>> m1 = match('root', '', ['a.txt', 'sub/b.txt']) | ||||
>>> m2 = narrowmatcher('sub', m1) | ||||
>>> bool(m2('a.txt')) | ||||
False | ||||
>>> bool(m2('b.txt')) | ||||
True | ||||
>>> bool(m2.matchfn('a.txt')) | ||||
False | ||||
>>> bool(m2.matchfn('b.txt')) | ||||
True | ||||
>>> m2.files() | ||||
['b.txt'] | ||||
>>> m2.exact('b.txt') | ||||
True | ||||
Matt Harbison
|
r23686 | >>> util.pconvert(m2.rel('b.txt')) | ||
'sub/b.txt' | ||||
Martin Geisler
|
r12268 | >>> def bad(f, msg): | ||
... print "%s: %s" % (f, msg) | ||||
>>> m1.bad = bad | ||||
>>> m2.bad('x.txt', 'No such file') | ||||
sub/x.txt: No such file | ||||
Matt Harbison
|
r23685 | >>> m2.abs('c.txt') | ||
'sub/c.txt' | ||||
Martin Geisler
|
r12165 | """ | ||
def __init__(self, path, matcher): | ||||
Martin Geisler
|
r12267 | self._root = matcher._root | ||
self._cwd = matcher._cwd | ||||
Martin Geisler
|
r12165 | self._path = path | ||
self._matcher = matcher | ||||
Bryan O'Sullivan
|
r18713 | self._always = matcher._always | ||
Matt Harbison
|
r23480 | self._pathrestricted = matcher._pathrestricted | ||
Martin Geisler
|
r12165 | |||
self._files = [f[len(path) + 1:] for f in matcher._files | ||||
if f.startswith(path + "/")] | ||||
Matt Harbison
|
r25194 | |||
# If the parent repo had a path to this subrepo and no patterns are | ||||
# specified, this submatcher always matches. | ||||
if not self._always and not matcher._anypats: | ||||
Matt Mackall
|
r25195 | self._always = any(f == path for f in matcher._files) | ||
Matt Harbison
|
r25194 | |||
Martin Geisler
|
r12165 | self._anypats = matcher._anypats | ||
self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn) | ||||
Drew Gottlieb
|
r25189 | self._fileroots = set(self._files) | ||
Martin Geisler
|
r12165 | |||
Matt Harbison
|
r23685 | def abs(self, f): | ||
return self._matcher.abs(self._path + "/" + f) | ||||
Martin Geisler
|
r12268 | def bad(self, f, msg): | ||
self._matcher.bad(self._path + "/" + f, msg) | ||||
Matt Harbison
|
r23686 | def rel(self, f): | ||
return self._matcher.rel(self._path + "/" + f) | ||||
Matt Harbison
|
r24790 | class icasefsmatcher(match): | ||
"""A matcher for wdir on case insensitive filesystems, which normalizes the | ||||
given patterns to the case in the filesystem. | ||||
""" | ||||
def __init__(self, root, cwd, patterns, include, exclude, default, auditor, | ||||
Matt Harbison
|
r25122 | ctx, listsubrepos=False): | ||
Matt Harbison
|
r24790 | init = super(icasefsmatcher, self).__init__ | ||
self._dsnormalize = ctx.repo().dirstate.normalize | ||||
init(root, cwd, patterns, include, exclude, default, auditor=auditor, | ||||
Matt Harbison
|
r25122 | ctx=ctx, listsubrepos=listsubrepos) | ||
Matt Harbison
|
r24790 | |||
# m.exact(file) must be based off of the actual user input, otherwise | ||||
# inexact case matches are treated as exact, and not noted without -v. | ||||
if self._files: | ||||
Drew Gottlieb
|
r25189 | self._fileroots = set(_roots(self._kp)) | ||
Matt Harbison
|
r24790 | |||
def _normalize(self, patterns, default, root, cwd, auditor): | ||||
self._kp = super(icasefsmatcher, self)._normalize(patterns, default, | ||||
root, cwd, auditor) | ||||
kindpats = [] | ||||
Durham Goode
|
r25213 | for kind, pats, source in self._kp: | ||
Matt Harbison
|
r24790 | if kind not in ('re', 'relre'): # regex can't be normalized | ||
pats = self._dsnormalize(pats) | ||||
Durham Goode
|
r25213 | kindpats.append((kind, pats, source)) | ||
Matt Harbison
|
r24790 | return kindpats | ||
Mads Kiilerich
|
r21111 | def patkind(pattern, default=None): | ||
'''If pattern is 'kind:pat' with a known kind, return kind.''' | ||||
return _patsplit(pattern, default)[0] | ||||
Matt Mackall
|
r8570 | |||
Mads Kiilerich
|
r21111 | def _patsplit(pattern, default): | ||
"""Split a string into the optional pattern kind prefix and the actual | ||||
pattern.""" | ||||
if ':' in pattern: | ||||
kind, pat = pattern.split(':', 1) | ||||
Steve Borho
|
r13218 | if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre', | ||
Durham Goode
|
r25283 | 'listfile', 'listfile0', 'set', 'include', 'subinclude'): | ||
Mads Kiilerich
|
r21111 | return kind, pat | ||
return default, pattern | ||||
Matt Mackall
|
r8570 | |||
Matt Mackall
|
r8582 | def _globre(pat): | ||
Mads Kiilerich
|
r21112 | r'''Convert an extended glob string to a regexp string. | ||
>>> print _globre(r'?') | ||||
. | ||||
>>> print _globre(r'*') | ||||
[^/]* | ||||
>>> print _globre(r'**') | ||||
.* | ||||
Siddharth Agarwal
|
r21815 | >>> print _globre(r'**/a') | ||
(?:.*/)?a | ||||
>>> print _globre(r'a/**/b') | ||||
a\/(?:.*/)?b | ||||
Mads Kiilerich
|
r21112 | >>> print _globre(r'[a*?!^][^b][!c]') | ||
[a*?!^][\^b][^c] | ||||
>>> print _globre(r'{a,b}') | ||||
(?:a|b) | ||||
>>> print _globre(r'.\*\?') | ||||
\.\*\? | ||||
''' | ||||
Matt Mackall
|
r8570 | i, n = 0, len(pat) | ||
res = '' | ||||
group = 0 | ||||
Siddharth Agarwal
|
r21915 | escape = util.re.escape | ||
Matt Mackall
|
r10282 | def peek(): | ||
return i < n and pat[i] | ||||
Matt Mackall
|
r8570 | while i < n: | ||
c = pat[i] | ||||
Matt Mackall
|
r10282 | i += 1 | ||
Matt Mackall
|
r8583 | if c not in '*?[{},\\': | ||
res += escape(c) | ||||
elif c == '*': | ||||
Matt Mackall
|
r8570 | if peek() == '*': | ||
i += 1 | ||||
Siddharth Agarwal
|
r21815 | if peek() == '/': | ||
i += 1 | ||||
res += '(?:.*/)?' | ||||
else: | ||||
res += '.*' | ||||
Matt Mackall
|
r8570 | else: | ||
res += '[^/]*' | ||||
elif c == '?': | ||||
res += '.' | ||||
elif c == '[': | ||||
j = i | ||||
if j < n and pat[j] in '!]': | ||||
j += 1 | ||||
while j < n and pat[j] != ']': | ||||
j += 1 | ||||
if j >= n: | ||||
res += '\\[' | ||||
else: | ||||
stuff = pat[i:j].replace('\\','\\\\') | ||||
i = j + 1 | ||||
if stuff[0] == '!': | ||||
stuff = '^' + stuff[1:] | ||||
elif stuff[0] == '^': | ||||
stuff = '\\' + stuff | ||||
res = '%s[%s]' % (res, stuff) | ||||
elif c == '{': | ||||
group += 1 | ||||
res += '(?:' | ||||
elif c == '}' and group: | ||||
res += ')' | ||||
group -= 1 | ||||
elif c == ',' and group: | ||||
res += '|' | ||||
elif c == '\\': | ||||
p = peek() | ||||
if p: | ||||
i += 1 | ||||
Matt Mackall
|
r8583 | res += escape(p) | ||
Matt Mackall
|
r8570 | else: | ||
Matt Mackall
|
r8583 | res += escape(c) | ||
Matt Mackall
|
r8570 | else: | ||
Matt Mackall
|
r8583 | res += escape(c) | ||
Matt Mackall
|
r8582 | return res | ||
Matt Mackall
|
r8570 | |||
Mads Kiilerich
|
r21111 | def _regex(kind, pat, globsuffix): | ||
'''Convert a (normalized) pattern of any kind into a regular expression. | ||||
globsuffix is appended to the regexp of globs.''' | ||||
if not pat: | ||||
Matt Mackall
|
r8574 | return '' | ||
if kind == 're': | ||||
Mads Kiilerich
|
r21111 | return pat | ||
if kind == 'path': | ||||
Siddharth Agarwal
|
r21915 | return '^' + util.re.escape(pat) + '(?:/|$)' | ||
Mads Kiilerich
|
r21111 | if kind == 'relglob': | ||
return '(?:|.*/)' + _globre(pat) + globsuffix | ||||
if kind == 'relpath': | ||||
Siddharth Agarwal
|
r21915 | return util.re.escape(pat) + '(?:/|$)' | ||
Mads Kiilerich
|
r21111 | if kind == 'relre': | ||
if pat.startswith('^'): | ||||
return pat | ||||
return '.*' + pat | ||||
return _globre(pat) + globsuffix | ||||
Matt Mackall
|
r8574 | |||
Durham Goode
|
r25238 | def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root): | ||
Mads Kiilerich
|
r21111 | '''Return regexp string and a matcher function for kindpats. | ||
globsuffix is appended to the regexp of globs.''' | ||||
Durham Goode
|
r25239 | matchfuncs = [] | ||
Durham Goode
|
r25283 | subincludes, kindpats = _expandsubinclude(kindpats, root) | ||
if subincludes: | ||||
def matchsubinclude(f): | ||||
for prefix, mf in subincludes: | ||||
if f.startswith(prefix) and mf(f[len(prefix):]): | ||||
return True | ||||
return False | ||||
matchfuncs.append(matchsubinclude) | ||||
Matt Harbison
|
r25122 | fset, kindpats = _expandsets(kindpats, ctx, listsubrepos) | ||
Durham Goode
|
r25239 | if fset: | ||
matchfuncs.append(fset.__contains__) | ||||
Matt Mackall
|
r14675 | |||
Durham Goode
|
r25239 | regex = '' | ||
if kindpats: | ||||
regex, mf = _buildregexmatch(kindpats, globsuffix) | ||||
matchfuncs.append(mf) | ||||
if len(matchfuncs) == 1: | ||||
return regex, matchfuncs[0] | ||||
else: | ||||
return regex, lambda f: any(mf(f) for mf in matchfuncs) | ||||
Matt Mackall
|
r14675 | |||
Mads Kiilerich
|
r21111 | def _buildregexmatch(kindpats, globsuffix): | ||
"""Build a match function from a list of kinds and kindpats, | ||||
return regexp string and a matcher function.""" | ||||
Matt Mackall
|
r8574 | try: | ||
Mads Kiilerich
|
r21111 | regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix) | ||
Durham Goode
|
r25213 | for (k, p, s) in kindpats]) | ||
Mads Kiilerich
|
r21111 | if len(regex) > 20000: | ||
Brodie Rao
|
r16687 | raise OverflowError | ||
Mads Kiilerich
|
r21111 | return regex, _rematcher(regex) | ||
Matt Mackall
|
r8574 | except OverflowError: | ||
# We're using a Python with a tiny regex engine and we | ||||
# made it explode, so we'll divide the pattern list in two | ||||
# until it works | ||||
Mads Kiilerich
|
r21111 | l = len(kindpats) | ||
Matt Mackall
|
r8574 | if l < 2: | ||
raise | ||||
Mads Kiilerich
|
r21111 | regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix) | ||
regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix) | ||||
Yuya Nishihara
|
r21191 | return regex, lambda s: a(s) or b(s) | ||
Matt Mackall
|
r8574 | except re.error: | ||
Durham Goode
|
r25213 | for k, p, s in kindpats: | ||
Matt Mackall
|
r8574 | try: | ||
Mads Kiilerich
|
r21111 | _rematcher('(?:%s)' % _regex(k, p, globsuffix)) | ||
Matt Mackall
|
r8574 | except re.error: | ||
Durham Goode
|
r25213 | if s: | ||
raise util.Abort(_("%s: invalid pattern (%s): %s") % | ||||
(s, k, p)) | ||||
else: | ||||
raise util.Abort(_("invalid pattern (%s): %s") % (k, p)) | ||||
Martin Geisler
|
r12133 | raise util.Abort(_("invalid pattern")) | ||
Matt Mackall
|
r8574 | |||
Mads Kiilerich
|
r21111 | def _roots(kindpats): | ||
Mads Kiilerich
|
r21079 | '''return roots and exact explicitly listed files from patterns | ||
Durham Goode
|
r25213 | >>> _roots([('glob', 'g/*', ''), ('glob', 'g', ''), ('glob', 'g*', '')]) | ||
Mads Kiilerich
|
r21079 | ['g', 'g', '.'] | ||
Durham Goode
|
r25213 | >>> _roots([('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')]) | ||
Mads Kiilerich
|
r21079 | ['r', 'p/p', '.'] | ||
Durham Goode
|
r25213 | >>> _roots([('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')]) | ||
Mads Kiilerich
|
r21079 | ['.', '.', '.'] | ||
''' | ||||
Matt Mackall
|
r8576 | r = [] | ||
Durham Goode
|
r25213 | for kind, pat, source in kindpats: | ||
Matt Mackall
|
r8584 | if kind == 'glob': # find the non-glob prefix | ||
root = [] | ||||
Mads Kiilerich
|
r21111 | for p in pat.split('/'): | ||
Matt Mackall
|
r8584 | if '[' in p or '{' in p or '*' in p or '?' in p: | ||
break | ||||
root.append(p) | ||||
r.append('/'.join(root) or '.') | ||||
Matt Mackall
|
r8574 | elif kind in ('relpath', 'path'): | ||
Mads Kiilerich
|
r21111 | r.append(pat or '.') | ||
Mads Kiilerich
|
r19107 | else: # relglob, re, relre | ||
Matt Mackall
|
r8576 | r.append('.') | ||
return r | ||||
Mads Kiilerich
|
r21111 | def _anypats(kindpats): | ||
Durham Goode
|
r25213 | for kind, pat, source in kindpats: | ||
Patrick Mezard
|
r16182 | if kind in ('glob', 're', 'relglob', 'relre', 'set'): | ||
Matt Mackall
|
r8576 | return True | ||
Durham Goode
|
r25167 | |||
_commentre = None | ||||
def readpatternfile(filepath, warn): | ||||
'''parse a pattern file, returning a list of | ||||
patterns. These patterns should be given to compile() | ||||
Durham Goode
|
r25216 | to be validated and converted into a match function. | ||
trailing white space is dropped. | ||||
the escape character is backslash. | ||||
comments start with #. | ||||
empty lines are skipped. | ||||
lines can be of the following formats: | ||||
syntax: regexp # defaults following lines to non-rooted regexps | ||||
syntax: glob # defaults following lines to non-rooted globs | ||||
re:pattern # non-rooted regular expression | ||||
glob:pattern # non-rooted glob | ||||
pattern # pattern of the current default type''' | ||||
Durham Goode
|
r25215 | syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:', | ||
Durham Goode
|
r25283 | 'include': 'include', 'subinclude': 'subinclude'} | ||
Durham Goode
|
r25167 | syntax = 'relre:' | ||
patterns = [] | ||||
fp = open(filepath) | ||||
for line in fp: | ||||
if "#" in line: | ||||
global _commentre | ||||
if not _commentre: | ||||
_commentre = re.compile(r'((^|[^\\])(\\\\)*)#.*') | ||||
# remove comments prefixed by an even number of escapes | ||||
line = _commentre.sub(r'\1', line) | ||||
# fixup properly escaped comments that survived the above | ||||
line = line.replace("\\#", "#") | ||||
line = line.rstrip() | ||||
if not line: | ||||
continue | ||||
if line.startswith('syntax:'): | ||||
s = line[7:].strip() | ||||
try: | ||||
syntax = syntaxes[s] | ||||
except KeyError: | ||||
Durham Goode
|
r25214 | if warn: | ||
warn(_("%s: ignoring invalid syntax '%s'\n") % | ||||
(filepath, s)) | ||||
Durham Goode
|
r25167 | continue | ||
linesyntax = syntax | ||||
for s, rels in syntaxes.iteritems(): | ||||
if line.startswith(rels): | ||||
linesyntax = rels | ||||
line = line[len(rels):] | ||||
break | ||||
elif line.startswith(s+':'): | ||||
linesyntax = rels | ||||
line = line[len(s) + 1:] | ||||
break | ||||
patterns.append(linesyntax + line) | ||||
fp.close() | ||||
return patterns | ||||