Show More
match.py
784 lines
| 27.5 KiB
| text/x-python
|
PythonLexer
/ mercurial / match.py
timeless
|
r8761 | # match.py - filename matching | ||
Martin Geisler
|
r8231 | # | ||
# Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Martin Geisler
|
r8231 | |||
Gregory Szorc
|
r25958 | from __future__ import absolute_import | ||
import copy | ||||
import os | ||||
import re | ||||
from .i18n import _ | ||||
from . import ( | ||||
Pierre-Yves David
|
r26587 | error, | ||
Gregory Szorc
|
r25958 | pathutil, | ||
util, | ||||
) | ||||
Matt Mackall
|
r6576 | |||
Drew Gottlieb
|
r24636 | propertycache = util.propertycache | ||
Mads Kiilerich
|
r21111 | def _rematcher(regex): | ||
'''compile the regexp with the best available regexp engine and return a | ||||
matcher function''' | ||||
Siddharth Agarwal
|
r21909 | m = util.re.compile(regex) | ||
Bryan O'Sullivan
|
r16943 | try: | ||
# slightly faster, provided by facebook's re2 bindings | ||||
return m.test_match | ||||
except AttributeError: | ||||
return m.match | ||||
Matt Harbison
|
r25122 | def _expandsets(kindpats, ctx, listsubrepos): | ||
Mads Kiilerich
|
r21111 | '''Returns the kindpats list with the 'set' patterns expanded.''' | ||
Matt Mackall
|
r14675 | fset = set() | ||
other = [] | ||||
Durham Goode
|
r25213 | for kind, pat, source in kindpats: | ||
Matt Mackall
|
r14675 | if kind == 'set': | ||
if not ctx: | ||||
liscju
|
r29389 | raise error.Abort(_("fileset expression with no context")) | ||
Mads Kiilerich
|
r21111 | s = ctx.getfileset(pat) | ||
Matt Mackall
|
r14675 | fset.update(s) | ||
Matt Harbison
|
r25122 | |||
if listsubrepos: | ||||
for subpath in ctx.substate: | ||||
s = ctx.sub(subpath).getfileset(pat) | ||||
fset.update(subpath + '/' + f for f in s) | ||||
Matt Mackall
|
r14675 | continue | ||
Durham Goode
|
r25213 | other.append((kind, pat, source)) | ||
Matt Mackall
|
r14675 | return fset, other | ||
Durham Goode
|
r25283 | def _expandsubinclude(kindpats, root): | ||
'''Returns the list of subinclude matchers and the kindpats without the | ||||
subincludes in it.''' | ||||
relmatchers = [] | ||||
other = [] | ||||
for kind, pat, source in kindpats: | ||||
if kind == 'subinclude': | ||||
Matt Harbison
|
r25301 | sourceroot = pathutil.dirname(util.normpath(source)) | ||
Durham Goode
|
r25283 | pat = util.pconvert(pat) | ||
path = pathutil.join(sourceroot, pat) | ||||
newroot = pathutil.dirname(path) | ||||
relmatcher = match(newroot, '', [], ['include:%s' % path]) | ||||
prefix = pathutil.canonpath(root, root, newroot) | ||||
if prefix: | ||||
prefix += '/' | ||||
relmatchers.append((prefix, relmatcher)) | ||||
else: | ||||
other.append((kind, pat, source)) | ||||
return relmatchers, other | ||||
Martin von Zweigbergk
|
r24447 | def _kindpatsalwaysmatch(kindpats): | ||
""""Checks whether the kindspats match everything, as e.g. | ||||
'relpath:.' does. | ||||
""" | ||||
Durham Goode
|
r25213 | for kind, pat, source in kindpats: | ||
Martin von Zweigbergk
|
r24447 | if pat != '' or kind not in ['relpath', 'glob']: | ||
return False | ||||
return True | ||||
Matt Mackall
|
r8587 | class match(object): | ||
Matt Mackall
|
r8567 | def __init__(self, root, cwd, patterns, include=[], exclude=[], | ||
Matt Harbison
|
r25122 | default='glob', exact=False, auditor=None, ctx=None, | ||
Matt Harbison
|
r25464 | listsubrepos=False, warn=None, badfn=None): | ||
Matt Mackall
|
r8581 | """build an object to match a set of file patterns | ||
arguments: | ||||
root - the canonical root of the tree you're matching against | ||||
cwd - the current working directory, if relevant | ||||
patterns - patterns to find | ||||
Mads Kiilerich
|
r21111 | include - patterns to include (unless they are excluded) | ||
exclude - patterns to exclude (even if they are included) | ||||
default - if a pattern in patterns has no explicit type, assume this one | ||||
exact - patterns are actually filenames (include/exclude still apply) | ||||
Durham Goode
|
r25214 | warn - optional function used for printing warnings | ||
Matt Harbison
|
r25464 | badfn - optional bad() callback for this matcher instead of the default | ||
Matt Mackall
|
r8581 | |||
a pattern is one of: | ||||
'glob:<glob>' - a glob relative to cwd | ||||
're:<regexp>' - a regular expression | ||||
Rodrigo Damazio Bovendorp
|
r31012 | 'path:<path>' - a path relative to repository root, which is matched | ||
recursively | ||||
'rootfilesin:<path>' - a path relative to repository root, which is | ||||
matched non-recursively (will not match subdirectories) | ||||
Matt Mackall
|
r8581 | 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs) | ||
'relpath:<path>' - a path relative to cwd | ||||
Matt Mackall
|
r8587 | 'relre:<regexp>' - a regexp that needn't match the start of a name | ||
Matt Mackall
|
r14675 | 'set:<fileset>' - a fileset expression | ||
Durham Goode
|
r25215 | 'include:<path>' - a file of patterns to read and include | ||
Durham Goode
|
r25283 | 'subinclude:<path>' - a file of patterns to match against files under | ||
the same directory | ||||
Matt Mackall
|
r8587 | '<something>' - a pattern of the specified default type | ||
Matt Mackall
|
r8581 | """ | ||
Matt Mackall
|
r8587 | self._root = root | ||
self._cwd = cwd | ||||
Mads Kiilerich
|
r21079 | self._files = [] # exact files and roots of patterns | ||
Matt Mackall
|
r8587 | self._anypats = bool(include or exclude) | ||
Bryan O'Sullivan
|
r18713 | self._always = False | ||
Matt Harbison
|
r23480 | self._pathrestricted = bool(include or exclude or patterns) | ||
Durham Goode
|
r25214 | self._warn = warn | ||
Rodrigo Damazio Bovendorp
|
r31013 | |||
# roots are directories which are recursively included/excluded. | ||||
Drew Gottlieb
|
r25231 | self._includeroots = set() | ||
Rodrigo Damazio Bovendorp
|
r31013 | self._excluderoots = set() | ||
# dirs are directories which are non-recursively included. | ||||
Drew Gottlieb
|
r25231 | self._includedirs = set(['.']) | ||
Matt Mackall
|
r8581 | |||
Matt Harbison
|
r25464 | if badfn is not None: | ||
self.bad = badfn | ||||
Matt Mackall
|
r8581 | |||
Martin von Zweigbergk
|
r22513 | matchfns = [] | ||
Matt Mackall
|
r8586 | if include: | ||
Matt Harbison
|
r24789 | kindpats = self._normalize(include, 'glob', root, cwd, auditor) | ||
Matt Harbison
|
r25122 | self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)', | ||
Durham Goode
|
r25238 | listsubrepos, root) | ||
Rodrigo Damazio Bovendorp
|
r31013 | roots, dirs = _rootsanddirs(kindpats) | ||
self._includeroots.update(roots) | ||||
self._includedirs.update(dirs) | ||||
Martin von Zweigbergk
|
r22513 | matchfns.append(im) | ||
Matt Mackall
|
r8586 | if exclude: | ||
Matt Harbison
|
r24789 | kindpats = self._normalize(exclude, 'glob', root, cwd, auditor) | ||
Matt Harbison
|
r25122 | self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)', | ||
Durham Goode
|
r25238 | listsubrepos, root) | ||
Martin von Zweigbergk
|
r25362 | if not _anypats(kindpats): | ||
Rodrigo Damazio Bovendorp
|
r31013 | # Only consider recursive excludes as such - if a non-recursive | ||
# exclude is used, we must still recurse into the excluded | ||||
# directory, at least to find subdirectories. In such a case, | ||||
# the regex still won't match the non-recursively-excluded | ||||
# files. | ||||
Martin von Zweigbergk
|
r25362 | self._excluderoots.update(_roots(kindpats)) | ||
Martin von Zweigbergk
|
r22513 | matchfns.append(lambda f: not em(f)) | ||
Matt Mackall
|
r8586 | if exact: | ||
FUJIWARA Katsunori
|
r16789 | if isinstance(patterns, list): | ||
self._files = patterns | ||||
else: | ||||
self._files = list(patterns) | ||||
Martin von Zweigbergk
|
r22513 | matchfns.append(self.exact) | ||
Matt Mackall
|
r8586 | elif patterns: | ||
Matt Harbison
|
r24789 | kindpats = self._normalize(patterns, default, root, cwd, auditor) | ||
Martin von Zweigbergk
|
r24447 | if not _kindpatsalwaysmatch(kindpats): | ||
Rodrigo Damazio Bovendorp
|
r31012 | self._files = _explicitfiles(kindpats) | ||
Martin von Zweigbergk
|
r24447 | self._anypats = self._anypats or _anypats(kindpats) | ||
Matt Harbison
|
r25122 | self.patternspat, pm = _buildmatch(ctx, kindpats, '$', | ||
Durham Goode
|
r25238 | listsubrepos, root) | ||
Martin von Zweigbergk
|
r24447 | matchfns.append(pm) | ||
Matt Mackall
|
r8581 | |||
Martin von Zweigbergk
|
r22513 | if not matchfns: | ||
m = util.always | ||||
self._always = True | ||||
elif len(matchfns) == 1: | ||||
m = matchfns[0] | ||||
Matt Mackall
|
r8581 | else: | ||
Martin von Zweigbergk
|
r22513 | def m(f): | ||
for matchfn in matchfns: | ||||
if not matchfn(f): | ||||
return False | ||||
return True | ||||
Matt Mackall
|
r8581 | |||
Matt Mackall
|
r8587 | self.matchfn = m | ||
Drew Gottlieb
|
r25189 | self._fileroots = set(self._files) | ||
Matt Mackall
|
r8587 | |||
def __call__(self, fn): | ||||
return self.matchfn(fn) | ||||
def __iter__(self): | ||||
for f in self._files: | ||||
yield f | ||||
Mads Kiilerich
|
r21111 | |||
# Callbacks related to how the matcher is used by dirstate.walk. | ||||
# Subscribers to these events must monkeypatch the matcher object. | ||||
Matt Mackall
|
r8587 | def bad(self, f, msg): | ||
Mads Kiilerich
|
r21111 | '''Callback from dirstate.walk for each explicit file that can't be | ||
found/accessed, with an error message.''' | ||||
Matt Mackall
|
r8680 | pass | ||
Mads Kiilerich
|
r21111 | |||
# If an explicitdir is set, it will be called when an explicitly listed | ||||
# directory is visited. | ||||
Siddharth Agarwal
|
r19143 | explicitdir = None | ||
Mads Kiilerich
|
r21111 | |||
# If an traversedir is set, it will be called when a directory discovered | ||||
# by recursive traversal is visited. | ||||
Siddharth Agarwal
|
r19143 | traversedir = None | ||
Mads Kiilerich
|
r21111 | |||
Matt Harbison
|
r23685 | def abs(self, f): | ||
'''Convert a repo path back to path that is relative to the root of the | ||||
matcher.''' | ||||
return f | ||||
Matt Mackall
|
r8587 | def rel(self, f): | ||
Mads Kiilerich
|
r21111 | '''Convert repo path back to path that is relative to cwd of matcher.''' | ||
Matt Mackall
|
r8587 | return util.pathto(self._root, self._cwd, f) | ||
Mads Kiilerich
|
r21111 | |||
Matt Harbison
|
r23480 | def uipath(self, f): | ||
'''Convert repo path to a display path. If patterns or -I/-X were used | ||||
to create this matcher, the display path will be relative to cwd. | ||||
Otherwise it is relative to the root of the repo.''' | ||||
Matt Harbison
|
r23686 | return (self._pathrestricted and self.rel(f)) or self.abs(f) | ||
Matt Harbison
|
r23480 | |||
Matt Mackall
|
r8587 | def files(self): | ||
Mads Kiilerich
|
r21111 | '''Explicitly listed files or patterns or roots: | ||
if no patterns or .always(): empty list, | ||||
if exact: list exact files, | ||||
if not .anypats(): list all files and dirs, | ||||
else: optimal roots''' | ||||
Matt Mackall
|
r8587 | return self._files | ||
Mads Kiilerich
|
r21111 | |||
Drew Gottlieb
|
r24636 | @propertycache | ||
def _dirs(self): | ||||
Drew Gottlieb
|
r25189 | return set(util.dirs(self._fileroots)) | set(['.']) | ||
Drew Gottlieb
|
r24636 | |||
def visitdir(self, dir): | ||||
Drew Gottlieb
|
r25231 | '''Decides whether a directory should be visited based on whether it | ||
has potential matches in it or one of its subdirectories. This is | ||||
based on the match's primary, included, and excluded patterns. | ||||
Martin von Zweigbergk
|
r27343 | Returns the string 'all' if the given directory and all subdirectories | ||
should be visited. Otherwise returns True or False indicating whether | ||||
the given directory should be visited. | ||||
Drew Gottlieb
|
r25231 | This function's behavior is undefined if it has returned False for | ||
one of the dir's parent directories. | ||||
''' | ||||
Martin von Zweigbergk
|
r27343 | if self.prefix() and dir in self._fileroots: | ||
return 'all' | ||||
Drew Gottlieb
|
r25231 | if dir in self._excluderoots: | ||
return False | ||||
Rodrigo Damazio Bovendorp
|
r31013 | if ((self._includeroots or self._includedirs != set(['.'])) and | ||
Martin von Zweigbergk
|
r25579 | '.' not in self._includeroots and | ||
Martin von Zweigbergk
|
r25576 | dir not in self._includeroots and | ||
Martin von Zweigbergk
|
r25578 | dir not in self._includedirs and | ||
not any(parent in self._includeroots | ||||
for parent in util.finddirs(dir))): | ||||
return False | ||||
Martin von Zweigbergk
|
r25576 | return (not self._fileroots or | ||
'.' in self._fileroots or | ||||
dir in self._fileroots or | ||||
dir in self._dirs or | ||||
Drew Gottlieb
|
r25189 | any(parentdir in self._fileroots | ||
Martin von Zweigbergk
|
r25577 | for parentdir in util.finddirs(dir))) | ||
Drew Gottlieb
|
r24636 | |||
Mads Kiilerich
|
r21111 | def exact(self, f): | ||
'''Returns True if f is in .files().''' | ||||
Drew Gottlieb
|
r25189 | return f in self._fileroots | ||
Mads Kiilerich
|
r21111 | |||
Matt Mackall
|
r8587 | def anypats(self): | ||
Mads Kiilerich
|
r21111 | '''Matcher uses patterns or include/exclude.''' | ||
Matt Mackall
|
r8587 | return self._anypats | ||
Mads Kiilerich
|
r21111 | |||
Jesse Glick
|
r16645 | def always(self): | ||
Mads Kiilerich
|
r21111 | '''Matcher will match everything and .files() will be empty | ||
- optimization might be possible and necessary.''' | ||||
Bryan O'Sullivan
|
r18713 | return self._always | ||
Matt Mackall
|
r8568 | |||
Drew Gottlieb
|
r25114 | def ispartial(self): | ||
'''True if the matcher won't always match. | ||||
Although it's just the inverse of _always in this implementation, | ||||
Mads Kiilerich
|
r26781 | an extension such as narrowhg might make it return something | ||
Drew Gottlieb
|
r25114 | slightly different.''' | ||
return not self._always | ||||
Martin von Zweigbergk
|
r24448 | def isexact(self): | ||
return self.matchfn == self.exact | ||||
Martin von Zweigbergk
|
r25233 | def prefix(self): | ||
return not self.always() and not self.isexact() and not self.anypats() | ||||
Matt Harbison
|
r24789 | def _normalize(self, patterns, default, root, cwd, auditor): | ||
'''Convert 'kind:pat' from the patterns list to tuples with kind and | ||||
normalized and rooted patterns and with listfiles expanded.''' | ||||
kindpats = [] | ||||
for kind, pat in [_patsplit(p, default) for p in patterns]: | ||||
if kind in ('glob', 'relpath'): | ||||
pat = pathutil.canonpath(root, cwd, pat, auditor) | ||||
Rodrigo Damazio Bovendorp
|
r31012 | elif kind in ('relglob', 'path', 'rootfilesin'): | ||
Matt Harbison
|
r24789 | pat = util.normpath(pat) | ||
elif kind in ('listfile', 'listfile0'): | ||||
try: | ||||
files = util.readfile(pat) | ||||
if kind == 'listfile0': | ||||
files = files.split('\0') | ||||
else: | ||||
files = files.splitlines() | ||||
files = [f for f in files if f] | ||||
except EnvironmentError: | ||||
Pierre-Yves David
|
r26587 | raise error.Abort(_("unable to read file list (%s)") % pat) | ||
Durham Goode
|
r25213 | for k, p, source in self._normalize(files, default, root, cwd, | ||
auditor): | ||||
kindpats.append((k, p, pat)) | ||||
Matt Harbison
|
r24789 | continue | ||
Durham Goode
|
r25215 | elif kind == 'include': | ||
try: | ||||
Yuya Nishihara
|
r25875 | fullpath = os.path.join(root, util.localpath(pat)) | ||
Durham Goode
|
r25870 | includepats = readpatternfile(fullpath, self._warn) | ||
Durham Goode
|
r25215 | for k, p, source in self._normalize(includepats, default, | ||
root, cwd, auditor): | ||||
kindpats.append((k, p, source or pat)) | ||||
Pierre-Yves David
|
r26587 | except error.Abort as inst: | ||
raise error.Abort('%s: %s' % (pat, inst[0])) | ||||
Gregory Szorc
|
r25660 | except IOError as inst: | ||
Durham Goode
|
r25215 | if self._warn: | ||
self._warn(_("skipping unreadable pattern file " | ||||
"'%s': %s\n") % (pat, inst.strerror)) | ||||
Matt Harbison
|
r24789 | continue | ||
# else: re or relre - which cannot be normalized | ||||
Durham Goode
|
r25213 | kindpats.append((kind, pat, '')) | ||
Matt Harbison
|
r24789 | return kindpats | ||
Matt Harbison
|
r25464 | def exact(root, cwd, files, badfn=None): | ||
return match(root, cwd, files, exact=True, badfn=badfn) | ||||
Matt Mackall
|
r8585 | |||
Martin von Zweigbergk
|
r23549 | def always(root, cwd): | ||
return match(root, cwd, []) | ||||
Matt Mackall
|
r8585 | |||
Matt Harbison
|
r25433 | def badmatch(match, badfn): | ||
"""Make a copy of the given matcher, replacing its bad method with the given | ||||
one. | ||||
""" | ||||
m = copy.copy(match) | ||||
m.bad = badfn | ||||
return m | ||||
Martin von Zweigbergk
|
r28017 | class subdirmatcher(match): | ||
Martin Geisler
|
r12165 | """Adapt a matcher to work on a subdirectory only. | ||
The paths are remapped to remove/insert the path as needed: | ||||
>>> m1 = match('root', '', ['a.txt', 'sub/b.txt']) | ||||
Martin von Zweigbergk
|
r28017 | >>> m2 = subdirmatcher('sub', m1) | ||
Martin Geisler
|
r12165 | >>> bool(m2('a.txt')) | ||
False | ||||
>>> bool(m2('b.txt')) | ||||
True | ||||
>>> bool(m2.matchfn('a.txt')) | ||||
False | ||||
>>> bool(m2.matchfn('b.txt')) | ||||
True | ||||
>>> m2.files() | ||||
['b.txt'] | ||||
>>> m2.exact('b.txt') | ||||
True | ||||
Matt Harbison
|
r23686 | >>> util.pconvert(m2.rel('b.txt')) | ||
'sub/b.txt' | ||||
Martin Geisler
|
r12268 | >>> def bad(f, msg): | ||
... print "%s: %s" % (f, msg) | ||||
>>> m1.bad = bad | ||||
>>> m2.bad('x.txt', 'No such file') | ||||
sub/x.txt: No such file | ||||
Matt Harbison
|
r23685 | >>> m2.abs('c.txt') | ||
'sub/c.txt' | ||||
Martin Geisler
|
r12165 | """ | ||
def __init__(self, path, matcher): | ||||
Martin Geisler
|
r12267 | self._root = matcher._root | ||
self._cwd = matcher._cwd | ||||
Martin Geisler
|
r12165 | self._path = path | ||
self._matcher = matcher | ||||
Bryan O'Sullivan
|
r18713 | self._always = matcher._always | ||
Matt Harbison
|
r23480 | self._pathrestricted = matcher._pathrestricted | ||
Martin Geisler
|
r12165 | |||
self._files = [f[len(path) + 1:] for f in matcher._files | ||||
if f.startswith(path + "/")] | ||||
Matt Harbison
|
r25194 | |||
# If the parent repo had a path to this subrepo and no patterns are | ||||
# specified, this submatcher always matches. | ||||
if not self._always and not matcher._anypats: | ||||
Matt Mackall
|
r25195 | self._always = any(f == path for f in matcher._files) | ||
Matt Harbison
|
r25194 | |||
Martin Geisler
|
r12165 | self._anypats = matcher._anypats | ||
Martin von Zweigbergk
|
r28128 | # Some information is lost in the superclass's constructor, so we | ||
# can not accurately create the matching function for the subdirectory | ||||
# from the inputs. Instead, we override matchfn() and visitdir() to | ||||
# call the original matcher with the subdirectory path prepended. | ||||
Martin Geisler
|
r12165 | self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn) | ||
Martin von Zweigbergk
|
r28128 | def visitdir(dir): | ||
if dir == '.': | ||||
return matcher.visitdir(self._path) | ||||
return matcher.visitdir(self._path + "/" + dir) | ||||
self.visitdir = visitdir | ||||
Drew Gottlieb
|
r25189 | self._fileroots = set(self._files) | ||
Martin Geisler
|
r12165 | |||
Matt Harbison
|
r23685 | def abs(self, f): | ||
return self._matcher.abs(self._path + "/" + f) | ||||
Martin Geisler
|
r12268 | def bad(self, f, msg): | ||
self._matcher.bad(self._path + "/" + f, msg) | ||||
Matt Harbison
|
r23686 | def rel(self, f): | ||
return self._matcher.rel(self._path + "/" + f) | ||||
Matt Harbison
|
r24790 | class icasefsmatcher(match): | ||
"""A matcher for wdir on case insensitive filesystems, which normalizes the | ||||
given patterns to the case in the filesystem. | ||||
""" | ||||
def __init__(self, root, cwd, patterns, include, exclude, default, auditor, | ||||
Matt Harbison
|
r25464 | ctx, listsubrepos=False, badfn=None): | ||
Matt Harbison
|
r24790 | init = super(icasefsmatcher, self).__init__ | ||
Matt Harbison
|
r26000 | self._dirstate = ctx.repo().dirstate | ||
self._dsnormalize = self._dirstate.normalize | ||||
Matt Harbison
|
r24790 | |||
init(root, cwd, patterns, include, exclude, default, auditor=auditor, | ||||
Matt Harbison
|
r25464 | ctx=ctx, listsubrepos=listsubrepos, badfn=badfn) | ||
Matt Harbison
|
r24790 | |||
# m.exact(file) must be based off of the actual user input, otherwise | ||||
# inexact case matches are treated as exact, and not noted without -v. | ||||
if self._files: | ||||
Rodrigo Damazio Bovendorp
|
r31013 | roots, dirs = _rootsanddirs(self._kp) | ||
self._fileroots = set(roots) | ||||
self._fileroots.update(dirs) | ||||
Matt Harbison
|
r24790 | |||
def _normalize(self, patterns, default, root, cwd, auditor): | ||||
self._kp = super(icasefsmatcher, self)._normalize(patterns, default, | ||||
root, cwd, auditor) | ||||
kindpats = [] | ||||
Durham Goode
|
r25213 | for kind, pats, source in self._kp: | ||
Matt Harbison
|
r24790 | if kind not in ('re', 'relre'): # regex can't be normalized | ||
Matt Harbison
|
r26000 | p = pats | ||
Matt Harbison
|
r24790 | pats = self._dsnormalize(pats) | ||
Matt Harbison
|
r26000 | |||
# Preserve the original to handle a case only rename. | ||||
if p != pats and p in self._dirstate: | ||||
kindpats.append((kind, p, source)) | ||||
Durham Goode
|
r25213 | kindpats.append((kind, pats, source)) | ||
Matt Harbison
|
r24790 | return kindpats | ||
Mads Kiilerich
|
r21111 | def patkind(pattern, default=None): | ||
'''If pattern is 'kind:pat' with a known kind, return kind.''' | ||||
return _patsplit(pattern, default)[0] | ||||
Matt Mackall
|
r8570 | |||
Mads Kiilerich
|
r21111 | def _patsplit(pattern, default): | ||
"""Split a string into the optional pattern kind prefix and the actual | ||||
pattern.""" | ||||
if ':' in pattern: | ||||
kind, pat = pattern.split(':', 1) | ||||
Steve Borho
|
r13218 | if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre', | ||
Rodrigo Damazio Bovendorp
|
r31012 | 'listfile', 'listfile0', 'set', 'include', 'subinclude', | ||
'rootfilesin'): | ||||
Mads Kiilerich
|
r21111 | return kind, pat | ||
return default, pattern | ||||
Matt Mackall
|
r8570 | |||
Matt Mackall
|
r8582 | def _globre(pat): | ||
Mads Kiilerich
|
r21112 | r'''Convert an extended glob string to a regexp string. | ||
>>> print _globre(r'?') | ||||
. | ||||
>>> print _globre(r'*') | ||||
[^/]* | ||||
>>> print _globre(r'**') | ||||
.* | ||||
Siddharth Agarwal
|
r21815 | >>> print _globre(r'**/a') | ||
(?:.*/)?a | ||||
>>> print _globre(r'a/**/b') | ||||
a\/(?:.*/)?b | ||||
Mads Kiilerich
|
r21112 | >>> print _globre(r'[a*?!^][^b][!c]') | ||
[a*?!^][\^b][^c] | ||||
>>> print _globre(r'{a,b}') | ||||
(?:a|b) | ||||
>>> print _globre(r'.\*\?') | ||||
\.\*\? | ||||
''' | ||||
Matt Mackall
|
r8570 | i, n = 0, len(pat) | ||
res = '' | ||||
group = 0 | ||||
Siddharth Agarwal
|
r21915 | escape = util.re.escape | ||
Matt Mackall
|
r10282 | def peek(): | ||
return i < n and pat[i] | ||||
Matt Mackall
|
r8570 | while i < n: | ||
c = pat[i] | ||||
Matt Mackall
|
r10282 | i += 1 | ||
Matt Mackall
|
r8583 | if c not in '*?[{},\\': | ||
res += escape(c) | ||||
elif c == '*': | ||||
Matt Mackall
|
r8570 | if peek() == '*': | ||
i += 1 | ||||
Siddharth Agarwal
|
r21815 | if peek() == '/': | ||
i += 1 | ||||
res += '(?:.*/)?' | ||||
else: | ||||
res += '.*' | ||||
Matt Mackall
|
r8570 | else: | ||
res += '[^/]*' | ||||
elif c == '?': | ||||
res += '.' | ||||
elif c == '[': | ||||
j = i | ||||
if j < n and pat[j] in '!]': | ||||
j += 1 | ||||
while j < n and pat[j] != ']': | ||||
j += 1 | ||||
if j >= n: | ||||
res += '\\[' | ||||
else: | ||||
stuff = pat[i:j].replace('\\','\\\\') | ||||
i = j + 1 | ||||
if stuff[0] == '!': | ||||
stuff = '^' + stuff[1:] | ||||
elif stuff[0] == '^': | ||||
stuff = '\\' + stuff | ||||
res = '%s[%s]' % (res, stuff) | ||||
elif c == '{': | ||||
group += 1 | ||||
res += '(?:' | ||||
elif c == '}' and group: | ||||
res += ')' | ||||
group -= 1 | ||||
elif c == ',' and group: | ||||
res += '|' | ||||
elif c == '\\': | ||||
p = peek() | ||||
if p: | ||||
i += 1 | ||||
Matt Mackall
|
r8583 | res += escape(p) | ||
Matt Mackall
|
r8570 | else: | ||
Matt Mackall
|
r8583 | res += escape(c) | ||
Matt Mackall
|
r8570 | else: | ||
Matt Mackall
|
r8583 | res += escape(c) | ||
Matt Mackall
|
r8582 | return res | ||
Matt Mackall
|
r8570 | |||
Mads Kiilerich
|
r21111 | def _regex(kind, pat, globsuffix): | ||
'''Convert a (normalized) pattern of any kind into a regular expression. | ||||
globsuffix is appended to the regexp of globs.''' | ||||
if not pat: | ||||
Matt Mackall
|
r8574 | return '' | ||
if kind == 're': | ||||
Mads Kiilerich
|
r21111 | return pat | ||
if kind == 'path': | ||||
Matt Harbison
|
r25636 | if pat == '.': | ||
return '' | ||||
Siddharth Agarwal
|
r21915 | return '^' + util.re.escape(pat) + '(?:/|$)' | ||
Rodrigo Damazio Bovendorp
|
r31012 | if kind == 'rootfilesin': | ||
if pat == '.': | ||||
escaped = '' | ||||
else: | ||||
# Pattern is a directory name. | ||||
escaped = util.re.escape(pat) + '/' | ||||
# Anything after the pattern must be a non-directory. | ||||
return '^' + escaped + '[^/]+$' | ||||
Mads Kiilerich
|
r21111 | if kind == 'relglob': | ||
return '(?:|.*/)' + _globre(pat) + globsuffix | ||||
if kind == 'relpath': | ||||
Siddharth Agarwal
|
r21915 | return util.re.escape(pat) + '(?:/|$)' | ||
Mads Kiilerich
|
r21111 | if kind == 'relre': | ||
if pat.startswith('^'): | ||||
return pat | ||||
return '.*' + pat | ||||
return _globre(pat) + globsuffix | ||||
Matt Mackall
|
r8574 | |||
Durham Goode
|
r25238 | def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root): | ||
Mads Kiilerich
|
r21111 | '''Return regexp string and a matcher function for kindpats. | ||
globsuffix is appended to the regexp of globs.''' | ||||
Durham Goode
|
r25239 | matchfuncs = [] | ||
Durham Goode
|
r25283 | subincludes, kindpats = _expandsubinclude(kindpats, root) | ||
if subincludes: | ||||
def matchsubinclude(f): | ||||
for prefix, mf in subincludes: | ||||
if f.startswith(prefix) and mf(f[len(prefix):]): | ||||
return True | ||||
return False | ||||
matchfuncs.append(matchsubinclude) | ||||
Matt Mackall
|
r14675 | |||
Matt Harbison
|
r25122 | fset, kindpats = _expandsets(kindpats, ctx, listsubrepos) | ||
Matt Mackall
|
r14675 | if fset: | ||
Durham Goode
|
r25239 | matchfuncs.append(fset.__contains__) | ||
Matt Mackall
|
r14675 | |||
Durham Goode
|
r25239 | regex = '' | ||
if kindpats: | ||||
regex, mf = _buildregexmatch(kindpats, globsuffix) | ||||
matchfuncs.append(mf) | ||||
if len(matchfuncs) == 1: | ||||
return regex, matchfuncs[0] | ||||
else: | ||||
return regex, lambda f: any(mf(f) for mf in matchfuncs) | ||||
Matt Mackall
|
r14675 | |||
Mads Kiilerich
|
r21111 | def _buildregexmatch(kindpats, globsuffix): | ||
"""Build a match function from a list of kinds and kindpats, | ||||
return regexp string and a matcher function.""" | ||||
Matt Mackall
|
r8574 | try: | ||
Mads Kiilerich
|
r21111 | regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix) | ||
Durham Goode
|
r25213 | for (k, p, s) in kindpats]) | ||
Mads Kiilerich
|
r21111 | if len(regex) > 20000: | ||
Brodie Rao
|
r16687 | raise OverflowError | ||
Mads Kiilerich
|
r21111 | return regex, _rematcher(regex) | ||
Matt Mackall
|
r8574 | except OverflowError: | ||
# We're using a Python with a tiny regex engine and we | ||||
# made it explode, so we'll divide the pattern list in two | ||||
# until it works | ||||
Mads Kiilerich
|
r21111 | l = len(kindpats) | ||
Matt Mackall
|
r8574 | if l < 2: | ||
raise | ||||
Mads Kiilerich
|
r21111 | regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix) | ||
regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix) | ||||
Yuya Nishihara
|
r21191 | return regex, lambda s: a(s) or b(s) | ||
Matt Mackall
|
r8574 | except re.error: | ||
Durham Goode
|
r25213 | for k, p, s in kindpats: | ||
Matt Mackall
|
r8574 | try: | ||
Mads Kiilerich
|
r21111 | _rematcher('(?:%s)' % _regex(k, p, globsuffix)) | ||
Matt Mackall
|
r8574 | except re.error: | ||
Durham Goode
|
r25213 | if s: | ||
Pierre-Yves David
|
r26587 | raise error.Abort(_("%s: invalid pattern (%s): %s") % | ||
Durham Goode
|
r25213 | (s, k, p)) | ||
else: | ||||
Pierre-Yves David
|
r26587 | raise error.Abort(_("invalid pattern (%s): %s") % (k, p)) | ||
raise error.Abort(_("invalid pattern")) | ||||
Matt Mackall
|
r8574 | |||
Rodrigo Damazio Bovendorp
|
r31013 | def _patternrootsanddirs(kindpats): | ||
'''Returns roots and directories corresponding to each pattern. | ||||
Mads Kiilerich
|
r21079 | |||
Rodrigo Damazio Bovendorp
|
r31013 | This calculates the roots and directories exactly matching the patterns and | ||
returns a tuple of (roots, dirs) for each. It does not return other | ||||
directories which may also need to be considered, like the parent | ||||
directories. | ||||
Mads Kiilerich
|
r21079 | ''' | ||
Matt Mackall
|
r8576 | r = [] | ||
Rodrigo Damazio Bovendorp
|
r31013 | d = [] | ||
Durham Goode
|
r25213 | for kind, pat, source in kindpats: | ||
Matt Mackall
|
r8584 | if kind == 'glob': # find the non-glob prefix | ||
root = [] | ||||
Mads Kiilerich
|
r21111 | for p in pat.split('/'): | ||
Matt Mackall
|
r8584 | if '[' in p or '{' in p or '*' in p or '?' in p: | ||
break | ||||
root.append(p) | ||||
r.append('/'.join(root) or '.') | ||||
Rodrigo Damazio Bovendorp
|
r31013 | elif kind in ('relpath', 'path'): | ||
Mads Kiilerich
|
r21111 | r.append(pat or '.') | ||
Rodrigo Damazio Bovendorp
|
r31013 | elif kind in ('rootfilesin',): | ||
d.append(pat or '.') | ||||
Mads Kiilerich
|
r19107 | else: # relglob, re, relre | ||
Matt Mackall
|
r8576 | r.append('.') | ||
Rodrigo Damazio Bovendorp
|
r31013 | return r, d | ||
def _roots(kindpats): | ||||
'''Returns root directories to match recursively from the given patterns.''' | ||||
roots, dirs = _patternrootsanddirs(kindpats) | ||||
return roots | ||||
def _rootsanddirs(kindpats): | ||||
'''Returns roots and exact directories from patterns. | ||||
roots are directories to match recursively, whereas exact directories should | ||||
be matched non-recursively. The returned (roots, dirs) tuple will also | ||||
include directories that need to be implicitly considered as either, such as | ||||
parent directories. | ||||
>>> _rootsanddirs(\ | ||||
[('glob', 'g/h/*', ''), ('glob', 'g/h', ''), ('glob', 'g*', '')]) | ||||
(['g/h', 'g/h', '.'], ['g']) | ||||
>>> _rootsanddirs(\ | ||||
[('rootfilesin', 'g/h', ''), ('rootfilesin', '', '')]) | ||||
([], ['g/h', '.', 'g']) | ||||
>>> _rootsanddirs(\ | ||||
[('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')]) | ||||
(['r', 'p/p', '.'], ['p']) | ||||
>>> _rootsanddirs(\ | ||||
[('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')]) | ||||
(['.', '.', '.'], []) | ||||
''' | ||||
r, d = _patternrootsanddirs(kindpats) | ||||
# Append the parents as non-recursive/exact directories, since they must be | ||||
# scanned to get to either the roots or the other exact directories. | ||||
d.extend(util.dirs(d)) | ||||
d.extend(util.dirs(r)) | ||||
return r, d | ||||
Matt Mackall
|
r8576 | |||
Rodrigo Damazio Bovendorp
|
r31012 | def _explicitfiles(kindpats): | ||
'''Returns the potential explicit filenames from the patterns. | ||||
>>> _explicitfiles([('path', 'foo/bar', '')]) | ||||
['foo/bar'] | ||||
>>> _explicitfiles([('rootfilesin', 'foo/bar', '')]) | ||||
[] | ||||
''' | ||||
# Keep only the pattern kinds where one can specify filenames (vs only | ||||
# directory names). | ||||
filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)] | ||||
return _roots(filable) | ||||
Mads Kiilerich
|
r21111 | def _anypats(kindpats): | ||
Durham Goode
|
r25213 | for kind, pat, source in kindpats: | ||
Rodrigo Damazio Bovendorp
|
r31012 | if kind in ('glob', 're', 'relglob', 'relre', 'set', 'rootfilesin'): | ||
Matt Mackall
|
r8576 | return True | ||
Durham Goode
|
r25167 | |||
_commentre = None | ||||
Laurent Charignon
|
r27595 | def readpatternfile(filepath, warn, sourceinfo=False): | ||
Durham Goode
|
r25167 | '''parse a pattern file, returning a list of | ||
patterns. These patterns should be given to compile() | ||||
Durham Goode
|
r25216 | to be validated and converted into a match function. | ||
trailing white space is dropped. | ||||
the escape character is backslash. | ||||
comments start with #. | ||||
empty lines are skipped. | ||||
lines can be of the following formats: | ||||
syntax: regexp # defaults following lines to non-rooted regexps | ||||
syntax: glob # defaults following lines to non-rooted globs | ||||
re:pattern # non-rooted regular expression | ||||
glob:pattern # non-rooted glob | ||||
Laurent Charignon
|
r27595 | pattern # pattern of the current default type | ||
if sourceinfo is set, returns a list of tuples: | ||||
(pattern, lineno, originalline). This is useful to debug ignore patterns. | ||||
''' | ||||
Durham Goode
|
r25216 | |||
Durham Goode
|
r25215 | syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:', | ||
Durham Goode
|
r25283 | 'include': 'include', 'subinclude': 'subinclude'} | ||
Durham Goode
|
r25167 | syntax = 'relre:' | ||
patterns = [] | ||||
fp = open(filepath) | ||||
Jun Wu
|
r30399 | for lineno, line in enumerate(util.iterfile(fp), start=1): | ||
Durham Goode
|
r25167 | if "#" in line: | ||
global _commentre | ||||
if not _commentre: | ||||
Bryan O'Sullivan
|
r27327 | _commentre = util.re.compile(r'((?:^|[^\\])(?:\\\\)*)#.*') | ||
Durham Goode
|
r25167 | # remove comments prefixed by an even number of escapes | ||
Bryan O'Sullivan
|
r27327 | m = _commentre.search(line) | ||
if m: | ||||
line = line[:m.end(1)] | ||||
Durham Goode
|
r25167 | # fixup properly escaped comments that survived the above | ||
line = line.replace("\\#", "#") | ||||
line = line.rstrip() | ||||
if not line: | ||||
continue | ||||
if line.startswith('syntax:'): | ||||
s = line[7:].strip() | ||||
try: | ||||
syntax = syntaxes[s] | ||||
except KeyError: | ||||
Durham Goode
|
r25214 | if warn: | ||
warn(_("%s: ignoring invalid syntax '%s'\n") % | ||||
(filepath, s)) | ||||
Durham Goode
|
r25167 | continue | ||
linesyntax = syntax | ||||
for s, rels in syntaxes.iteritems(): | ||||
if line.startswith(rels): | ||||
linesyntax = rels | ||||
line = line[len(rels):] | ||||
break | ||||
elif line.startswith(s+':'): | ||||
linesyntax = rels | ||||
line = line[len(s) + 1:] | ||||
break | ||||
Laurent Charignon
|
r27595 | if sourceinfo: | ||
patterns.append((linesyntax + line, lineno, line)) | ||||
else: | ||||
patterns.append(linesyntax + line) | ||||
Durham Goode
|
r25167 | fp.close() | ||
return patterns | ||||