match.py
1001 lines
| 33.5 KiB
| text/x-python
|
PythonLexer
/ mercurial / match.py
timeless
|
r8761 | # match.py - filename matching | ||
Martin Geisler
|
r8231 | # | ||
# Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Martin Geisler
|
r8231 | |||
Gregory Szorc
|
r25958 | from __future__ import absolute_import | ||
import copy | ||||
import os | ||||
import re | ||||
from .i18n import _ | ||||
from . import ( | ||||
Pierre-Yves David
|
r26587 | error, | ||
Gregory Szorc
|
r25958 | pathutil, | ||
util, | ||||
) | ||||
Matt Mackall
|
r6576 | |||
Drew Gottlieb
|
r24636 | propertycache = util.propertycache | ||
Mads Kiilerich
|
r21111 | def _rematcher(regex): | ||
'''compile the regexp with the best available regexp engine and return a | ||||
matcher function''' | ||||
Siddharth Agarwal
|
r21909 | m = util.re.compile(regex) | ||
Bryan O'Sullivan
|
r16943 | try: | ||
# slightly faster, provided by facebook's re2 bindings | ||||
return m.test_match | ||||
except AttributeError: | ||||
return m.match | ||||
Matt Harbison
|
r25122 | def _expandsets(kindpats, ctx, listsubrepos): | ||
Mads Kiilerich
|
r21111 | '''Returns the kindpats list with the 'set' patterns expanded.''' | ||
Matt Mackall
|
r14675 | fset = set() | ||
other = [] | ||||
Durham Goode
|
r25213 | for kind, pat, source in kindpats: | ||
Matt Mackall
|
r14675 | if kind == 'set': | ||
if not ctx: | ||||
Martin von Zweigbergk
|
r32444 | raise error.ProgrammingError("fileset expression with no " | ||
"context") | ||||
Mads Kiilerich
|
r21111 | s = ctx.getfileset(pat) | ||
Matt Mackall
|
r14675 | fset.update(s) | ||
Matt Harbison
|
r25122 | |||
if listsubrepos: | ||||
for subpath in ctx.substate: | ||||
s = ctx.sub(subpath).getfileset(pat) | ||||
fset.update(subpath + '/' + f for f in s) | ||||
Matt Mackall
|
r14675 | continue | ||
Durham Goode
|
r25213 | other.append((kind, pat, source)) | ||
Matt Mackall
|
r14675 | return fset, other | ||
Durham Goode
|
r25283 | def _expandsubinclude(kindpats, root): | ||
Durham Goode
|
r32132 | '''Returns the list of subinclude matcher args and the kindpats without the | ||
Durham Goode
|
r25283 | subincludes in it.''' | ||
relmatchers = [] | ||||
other = [] | ||||
for kind, pat, source in kindpats: | ||||
if kind == 'subinclude': | ||||
Matt Harbison
|
r25301 | sourceroot = pathutil.dirname(util.normpath(source)) | ||
Durham Goode
|
r25283 | pat = util.pconvert(pat) | ||
path = pathutil.join(sourceroot, pat) | ||||
newroot = pathutil.dirname(path) | ||||
Durham Goode
|
r32132 | matcherargs = (newroot, '', [], ['include:%s' % path]) | ||
Durham Goode
|
r25283 | |||
prefix = pathutil.canonpath(root, root, newroot) | ||||
if prefix: | ||||
prefix += '/' | ||||
Durham Goode
|
r32132 | relmatchers.append((prefix, matcherargs)) | ||
Durham Goode
|
r25283 | else: | ||
other.append((kind, pat, source)) | ||||
return relmatchers, other | ||||
Martin von Zweigbergk
|
r24447 | def _kindpatsalwaysmatch(kindpats): | ||
""""Checks whether the kindspats match everything, as e.g. | ||||
'relpath:.' does. | ||||
""" | ||||
Durham Goode
|
r25213 | for kind, pat, source in kindpats: | ||
Martin von Zweigbergk
|
r24447 | if pat != '' or kind not in ['relpath', 'glob']: | ||
return False | ||||
return True | ||||
Martin von Zweigbergk
|
r32394 | def match(root, cwd, patterns, include=None, exclude=None, default='glob', | ||
exact=False, auditor=None, ctx=None, listsubrepos=False, warn=None, | ||||
Martin von Zweigbergk
|
r32400 | badfn=None, icasefs=False): | ||
Martin von Zweigbergk
|
r32394 | """build an object to match a set of file patterns | ||
arguments: | ||||
root - the canonical root of the tree you're matching against | ||||
cwd - the current working directory, if relevant | ||||
patterns - patterns to find | ||||
include - patterns to include (unless they are excluded) | ||||
exclude - patterns to exclude (even if they are included) | ||||
default - if a pattern in patterns has no explicit type, assume this one | ||||
exact - patterns are actually filenames (include/exclude still apply) | ||||
warn - optional function used for printing warnings | ||||
badfn - optional bad() callback for this matcher instead of the default | ||||
Martin von Zweigbergk
|
r32400 | icasefs - make a matcher for wdir on case insensitive filesystems, which | ||
normalizes the given patterns to the case in the filesystem | ||||
Martin von Zweigbergk
|
r32394 | |||
a pattern is one of: | ||||
'glob:<glob>' - a glob relative to cwd | ||||
're:<regexp>' - a regular expression | ||||
'path:<path>' - a path relative to repository root, which is matched | ||||
recursively | ||||
'rootfilesin:<path>' - a path relative to repository root, which is | ||||
matched non-recursively (will not match subdirectories) | ||||
'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs) | ||||
'relpath:<path>' - a path relative to cwd | ||||
'relre:<regexp>' - a regexp that needn't match the start of a name | ||||
'set:<fileset>' - a fileset expression | ||||
'include:<path>' - a file of patterns to read and include | ||||
'subinclude:<path>' - a file of patterns to match against files under | ||||
the same directory | ||||
'<something>' - a pattern of the specified default type | ||||
""" | ||||
Martin von Zweigbergk
|
r32400 | normalize = _donormalize | ||
if icasefs: | ||||
Martin von Zweigbergk
|
r32415 | if exact: | ||
Martin von Zweigbergk
|
r32444 | raise error.ProgrammingError("a case-insensitive exact matcher " | ||
"doesn't make sense") | ||||
Martin von Zweigbergk
|
r32400 | dirstate = ctx.repo().dirstate | ||
dsnormalize = dirstate.normalize | ||||
def normalize(patterns, default, root, cwd, auditor, warn): | ||||
kp = _donormalize(patterns, default, root, cwd, auditor, warn) | ||||
kindpats = [] | ||||
for kind, pats, source in kp: | ||||
if kind not in ('re', 'relre'): # regex can't be normalized | ||||
p = pats | ||||
pats = dsnormalize(pats) | ||||
# Preserve the original to handle a case only rename. | ||||
if p != pats and p in dirstate: | ||||
kindpats.append((kind, p, source)) | ||||
kindpats.append((kind, pats, source)) | ||||
return kindpats | ||||
Martin von Zweigbergk
|
r32499 | if exact: | ||
m = exactmatcher(root, cwd, patterns, badfn) | ||||
Martin von Zweigbergk
|
r32553 | elif patterns: | ||
Martin von Zweigbergk
|
r32504 | m = patternmatcher(root, cwd, normalize, patterns, default=default, | ||
auditor=auditor, ctx=ctx, listsubrepos=listsubrepos, | ||||
warn=warn, badfn=badfn) | ||||
Martin von Zweigbergk
|
r32553 | else: | ||
# It's a little strange that no patterns means to match everything. | ||||
# Consider changing this to match nothing (probably adding a | ||||
# "nevermatcher"). | ||||
m = alwaysmatcher(root, cwd, badfn) | ||||
Martin von Zweigbergk
|
r32497 | if include: | ||
Martin von Zweigbergk
|
r32502 | im = includematcher(root, cwd, normalize, include, auditor=auditor, | ||
ctx=ctx, listsubrepos=listsubrepos, warn=warn, | ||||
badfn=None) | ||||
Martin von Zweigbergk
|
r32497 | m = intersectmatchers(m, im) | ||
Martin von Zweigbergk
|
r32465 | if exclude: | ||
Martin von Zweigbergk
|
r32502 | em = includematcher(root, cwd, normalize, exclude, auditor=auditor, | ||
ctx=ctx, listsubrepos=listsubrepos, warn=warn, | ||||
badfn=None) | ||||
Martin von Zweigbergk
|
r32465 | m = differencematcher(m, em) | ||
return m | ||||
Martin von Zweigbergk
|
r32394 | |||
def exact(root, cwd, files, badfn=None): | ||||
Martin von Zweigbergk
|
r32499 | return exactmatcher(root, cwd, files, badfn=badfn) | ||
Martin von Zweigbergk
|
r32394 | |||
def always(root, cwd): | ||||
Martin von Zweigbergk
|
r32553 | return alwaysmatcher(root, cwd) | ||
Martin von Zweigbergk
|
r32394 | |||
def badmatch(match, badfn): | ||||
"""Make a copy of the given matcher, replacing its bad method with the given | ||||
one. | ||||
""" | ||||
m = copy.copy(match) | ||||
m.bad = badfn | ||||
return m | ||||
Martin von Zweigbergk
|
r32396 | def _donormalize(patterns, default, root, cwd, auditor, warn): | ||
'''Convert 'kind:pat' from the patterns list to tuples with kind and | ||||
normalized and rooted patterns and with listfiles expanded.''' | ||||
kindpats = [] | ||||
for kind, pat in [_patsplit(p, default) for p in patterns]: | ||||
if kind in ('glob', 'relpath'): | ||||
pat = pathutil.canonpath(root, cwd, pat, auditor) | ||||
elif kind in ('relglob', 'path', 'rootfilesin'): | ||||
pat = util.normpath(pat) | ||||
elif kind in ('listfile', 'listfile0'): | ||||
try: | ||||
files = util.readfile(pat) | ||||
if kind == 'listfile0': | ||||
files = files.split('\0') | ||||
else: | ||||
files = files.splitlines() | ||||
files = [f for f in files if f] | ||||
except EnvironmentError: | ||||
raise error.Abort(_("unable to read file list (%s)") % pat) | ||||
for k, p, source in _donormalize(files, default, root, cwd, | ||||
auditor, warn): | ||||
kindpats.append((k, p, pat)) | ||||
continue | ||||
elif kind == 'include': | ||||
try: | ||||
fullpath = os.path.join(root, util.localpath(pat)) | ||||
includepats = readpatternfile(fullpath, warn) | ||||
for k, p, source in _donormalize(includepats, default, | ||||
root, cwd, auditor, warn): | ||||
kindpats.append((k, p, source or pat)) | ||||
except error.Abort as inst: | ||||
raise error.Abort('%s: %s' % (pat, inst[0])) | ||||
except IOError as inst: | ||||
if warn: | ||||
warn(_("skipping unreadable pattern file '%s': %s\n") % | ||||
(pat, inst.strerror)) | ||||
continue | ||||
# else: re or relre - which cannot be normalized | ||||
kindpats.append((kind, pat, '')) | ||||
return kindpats | ||||
Martin von Zweigbergk
|
r32454 | class basematcher(object): | ||
Martin von Zweigbergk
|
r32496 | def __init__(self, root, cwd, badfn=None, relativeuipath=True): | ||
Martin von Zweigbergk
|
r32454 | self._root = root | ||
self._cwd = cwd | ||||
if badfn is not None: | ||||
self.bad = badfn | ||||
Martin von Zweigbergk
|
r32496 | self._relativeuipath = relativeuipath | ||
Martin von Zweigbergk
|
r32454 | |||
def __call__(self, fn): | ||||
return self.matchfn(fn) | ||||
def __iter__(self): | ||||
for f in self._files: | ||||
yield f | ||||
# Callbacks related to how the matcher is used by dirstate.walk. | ||||
# Subscribers to these events must monkeypatch the matcher object. | ||||
def bad(self, f, msg): | ||||
'''Callback from dirstate.walk for each explicit file that can't be | ||||
found/accessed, with an error message.''' | ||||
pass | ||||
# If an explicitdir is set, it will be called when an explicitly listed | ||||
# directory is visited. | ||||
explicitdir = None | ||||
# If an traversedir is set, it will be called when a directory discovered | ||||
# by recursive traversal is visited. | ||||
traversedir = None | ||||
def abs(self, f): | ||||
'''Convert a repo path back to path that is relative to the root of the | ||||
matcher.''' | ||||
return f | ||||
def rel(self, f): | ||||
'''Convert repo path back to path that is relative to cwd of matcher.''' | ||||
return util.pathto(self._root, self._cwd, f) | ||||
def uipath(self, f): | ||||
'''Convert repo path to a display path. If patterns or -I/-X were used | ||||
to create this matcher, the display path will be relative to cwd. | ||||
Otherwise it is relative to the root of the repo.''' | ||||
Martin von Zweigbergk
|
r32496 | return (self._relativeuipath and self.rel(f)) or self.abs(f) | ||
Martin von Zweigbergk
|
r32454 | |||
Martin von Zweigbergk
|
r32455 | @propertycache | ||
def _files(self): | ||||
return [] | ||||
Martin von Zweigbergk
|
r32454 | def files(self): | ||
'''Explicitly listed files or patterns or roots: | ||||
if no patterns or .always(): empty list, | ||||
if exact: list exact files, | ||||
if not .anypats(): list all files and dirs, | ||||
else: optimal roots''' | ||||
return self._files | ||||
@propertycache | ||||
def _fileset(self): | ||||
return set(self._files) | ||||
def exact(self, f): | ||||
'''Returns True if f is in .files().''' | ||||
return f in self._fileset | ||||
Martin von Zweigbergk
|
r32463 | def matchfn(self, f): | ||
return False | ||||
Martin von Zweigbergk
|
r32454 | def visitdir(self, dir): | ||
'''Decides whether a directory should be visited based on whether it | ||||
has potential matches in it or one of its subdirectories. This is | ||||
based on the match's primary, included, and excluded patterns. | ||||
Returns the string 'all' if the given directory and all subdirectories | ||||
should be visited. Otherwise returns True or False indicating whether | ||||
the given directory should be visited. | ||||
This function's behavior is undefined if it has returned False for | ||||
one of the dir's parent directories. | ||||
''' | ||||
return False | ||||
def anypats(self): | ||||
'''Matcher uses patterns or include/exclude.''' | ||||
return False | ||||
def always(self): | ||||
'''Matcher will match everything and .files() will be empty | ||||
- optimization might be possible and necessary.''' | ||||
return False | ||||
def isexact(self): | ||||
return False | ||||
def prefix(self): | ||||
return not self.always() and not self.isexact() and not self.anypats() | ||||
Martin von Zweigbergk
|
r32553 | class alwaysmatcher(basematcher): | ||
'''Matches everything.''' | ||||
def __init__(self, root, cwd, badfn=None): | ||||
super(alwaysmatcher, self).__init__(root, cwd, badfn, | ||||
relativeuipath=False) | ||||
def always(self): | ||||
return True | ||||
def matchfn(self, f): | ||||
return True | ||||
def visitdir(self, dir): | ||||
return 'all' | ||||
def __repr__(self): | ||||
return '<alwaysmatcher>' | ||||
Martin von Zweigbergk
|
r32501 | class patternmatcher(basematcher): | ||
Martin von Zweigbergk
|
r32394 | |||
Martin von Zweigbergk
|
r32504 | def __init__(self, root, cwd, normalize, patterns, default='glob', | ||
auditor=None, ctx=None, listsubrepos=False, warn=None, | ||||
badfn=None): | ||||
Martin von Zweigbergk
|
r32501 | super(patternmatcher, self).__init__(root, cwd, badfn, | ||
Martin von Zweigbergk
|
r32504 | relativeuipath=bool(patterns)) | ||
Matt Mackall
|
r8581 | |||
Martin von Zweigbergk
|
r32504 | self._anypats = False | ||
Bryan O'Sullivan
|
r18713 | self._always = False | ||
Martin von Zweigbergk
|
r32406 | self.patternspat = None | ||
Matt Mackall
|
r8581 | |||
Martin von Zweigbergk
|
r22513 | matchfns = [] | ||
Martin von Zweigbergk
|
r32500 | if patterns: | ||
Martin von Zweigbergk
|
r32398 | kindpats = normalize(patterns, default, root, cwd, auditor, warn) | ||
Martin von Zweigbergk
|
r24447 | if not _kindpatsalwaysmatch(kindpats): | ||
Rodrigo Damazio Bovendorp
|
r31012 | self._files = _explicitfiles(kindpats) | ||
Martin von Zweigbergk
|
r24447 | self._anypats = self._anypats or _anypats(kindpats) | ||
Matt Harbison
|
r25122 | self.patternspat, pm = _buildmatch(ctx, kindpats, '$', | ||
Durham Goode
|
r25238 | listsubrepos, root) | ||
Martin von Zweigbergk
|
r24447 | matchfns.append(pm) | ||
Matt Mackall
|
r8581 | |||
Martin von Zweigbergk
|
r22513 | if not matchfns: | ||
m = util.always | ||||
self._always = True | ||||
elif len(matchfns) == 1: | ||||
m = matchfns[0] | ||||
Matt Mackall
|
r8581 | else: | ||
Martin von Zweigbergk
|
r22513 | def m(f): | ||
for matchfn in matchfns: | ||||
if not matchfn(f): | ||||
return False | ||||
return True | ||||
Matt Mackall
|
r8581 | |||
Matt Mackall
|
r8587 | self.matchfn = m | ||
Martin von Zweigbergk
|
r32323 | @propertycache | ||
Drew Gottlieb
|
r24636 | def _dirs(self): | ||
Martin von Zweigbergk
|
r32323 | return set(util.dirs(self._fileset)) | {'.'} | ||
Drew Gottlieb
|
r24636 | |||
def visitdir(self, dir): | ||||
Martin von Zweigbergk
|
r32554 | if self.always(): | ||
return 'all' | ||||
Martin von Zweigbergk
|
r32323 | if self.prefix() and dir in self._fileset: | ||
Martin von Zweigbergk
|
r27343 | return 'all' | ||
Martin von Zweigbergk
|
r32554 | return ('.' in self._fileset or | ||
Martin von Zweigbergk
|
r32323 | dir in self._fileset or | ||
Martin von Zweigbergk
|
r25576 | dir in self._dirs or | ||
Martin von Zweigbergk
|
r32323 | any(parentdir in self._fileset | ||
Martin von Zweigbergk
|
r25577 | for parentdir in util.finddirs(dir))) | ||
Drew Gottlieb
|
r24636 | |||
Matt Mackall
|
r8587 | def anypats(self): | ||
return self._anypats | ||||
Mads Kiilerich
|
r21111 | |||
Jesse Glick
|
r16645 | def always(self): | ||
Bryan O'Sullivan
|
r18713 | return self._always | ||
Matt Mackall
|
r8568 | |||
Martin von Zweigbergk
|
r32406 | def __repr__(self): | ||
Martin von Zweigbergk
|
r32504 | return ('<patternmatcher patterns=%r>' % self.patternspat) | ||
Martin von Zweigbergk
|
r32501 | |||
class includematcher(basematcher): | ||||
Martin von Zweigbergk
|
r32502 | def __init__(self, root, cwd, normalize, include, auditor=None, ctx=None, | ||
Martin von Zweigbergk
|
r32501 | listsubrepos=False, warn=None, badfn=None): | ||
Martin von Zweigbergk
|
r32502 | super(includematcher, self).__init__(root, cwd, badfn) | ||
Martin von Zweigbergk
|
r32501 | |||
Martin von Zweigbergk
|
r32502 | kindpats = normalize(include, 'glob', root, cwd, auditor, warn) | ||
self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)', | ||||
listsubrepos, root) | ||||
Martin von Zweigbergk
|
r32503 | self._anypats = _anypats(kindpats) | ||
Martin von Zweigbergk
|
r32502 | roots, dirs = _rootsanddirs(kindpats) | ||
# roots are directories which are recursively included. | ||||
Martin von Zweigbergk
|
r32503 | self._roots = set(roots) | ||
Martin von Zweigbergk
|
r32502 | # dirs are directories which are non-recursively included. | ||
Martin von Zweigbergk
|
r32503 | self._dirs = set(dirs) | ||
Martin von Zweigbergk
|
r32502 | self.matchfn = im | ||
Martin von Zweigbergk
|
r32501 | |||
def visitdir(self, dir): | ||||
Martin von Zweigbergk
|
r32503 | if not self._anypats and dir in self._roots: | ||
Martin von Zweigbergk
|
r32502 | # The condition above is essentially self.prefix() for includes | ||
Martin von Zweigbergk
|
r32501 | return 'all' | ||
Martin von Zweigbergk
|
r32503 | return ('.' in self._roots or | ||
dir in self._roots or | ||||
dir in self._dirs or | ||||
any(parentdir in self._roots | ||||
for parentdir in util.finddirs(dir))) | ||||
Martin von Zweigbergk
|
r32501 | |||
def anypats(self): | ||||
Martin von Zweigbergk
|
r32502 | return True | ||
Martin von Zweigbergk
|
r32501 | |||
def __repr__(self): | ||||
Martin von Zweigbergk
|
r32502 | return ('<includematcher includes=%r>' % self.includepat) | ||
Martin von Zweigbergk
|
r32406 | |||
Martin von Zweigbergk
|
r32499 | class exactmatcher(basematcher): | ||
'''Matches the input files exactly. They are interpreted as paths, not | ||||
patterns (so no kind-prefixes). | ||||
''' | ||||
def __init__(self, root, cwd, files, badfn=None): | ||||
super(exactmatcher, self).__init__(root, cwd, badfn) | ||||
if isinstance(files, list): | ||||
self._files = files | ||||
else: | ||||
self._files = list(files) | ||||
Yuya Nishihara
|
r32543 | |||
matchfn = basematcher.exact | ||||
Martin von Zweigbergk
|
r32499 | |||
@propertycache | ||||
def _dirs(self): | ||||
return set(util.dirs(self._fileset)) | {'.'} | ||||
def visitdir(self, dir): | ||||
return dir in self._dirs | ||||
def isexact(self): | ||||
return True | ||||
def __repr__(self): | ||||
return ('<exactmatcher files=%r>' % self._files) | ||||
Martin von Zweigbergk
|
r32465 | class differencematcher(basematcher): | ||
'''Composes two matchers by matching if the first matches and the second | ||||
does not. Well, almost... If the user provides a pattern like "-X foo foo", | ||||
Mercurial actually does match "foo" against that. That's because exact | ||||
matches are treated specially. So, since this differencematcher is used for | ||||
excludes, it needs to special-case exact matching. | ||||
The second matcher's non-matching-attributes (root, cwd, bad, explicitdir, | ||||
traversedir) are ignored. | ||||
TODO: If we want to keep the behavior described above for exact matches, we | ||||
should consider instead treating the above case something like this: | ||||
union(exact(foo), difference(pattern(foo), include(foo))) | ||||
''' | ||||
def __init__(self, m1, m2): | ||||
super(differencematcher, self).__init__(m1._root, m1._cwd) | ||||
self._m1 = m1 | ||||
self._m2 = m2 | ||||
self.bad = m1.bad | ||||
self.explicitdir = m1.explicitdir | ||||
self.traversedir = m1.traversedir | ||||
def matchfn(self, f): | ||||
return self._m1(f) and (not self._m2(f) or self._m1.exact(f)) | ||||
@propertycache | ||||
def _files(self): | ||||
if self.isexact(): | ||||
return [f for f in self._m1.files() if self(f)] | ||||
# If m1 is not an exact matcher, we can't easily figure out the set of | ||||
# files, because its files() are not always files. For example, if | ||||
# m1 is "path:dir" and m2 is "rootfileins:.", we don't | ||||
# want to remove "dir" from the set even though it would match m2, | ||||
# because the "dir" in m1 may not be a file. | ||||
return self._m1.files() | ||||
def visitdir(self, dir): | ||||
if self._m2.visitdir(dir) == 'all': | ||||
# There's a bug here: If m1 matches file 'dir/file' and m2 excludes | ||||
# 'dir' (recursively), we should still visit 'dir' due to the | ||||
# exception we have for exact matches. | ||||
return False | ||||
return bool(self._m1.visitdir(dir)) | ||||
def isexact(self): | ||||
return self._m1.isexact() | ||||
def anypats(self): | ||||
return self._m1.anypats() or self._m2.anypats() | ||||
def __repr__(self): | ||||
return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)) | ||||
Martin von Zweigbergk
|
r32497 | def intersectmatchers(m1, m2): | ||
'''Composes two matchers by matching if both of them match. | ||||
The second matcher's non-matching-attributes (root, cwd, bad, explicitdir, | ||||
traversedir) are ignored. | ||||
''' | ||||
if m1 is None or m2 is None: | ||||
return m1 or m2 | ||||
if m1.always(): | ||||
m = copy.copy(m2) | ||||
# TODO: Consider encapsulating these things in a class so there's only | ||||
# one thing to copy from m1. | ||||
m.bad = m1.bad | ||||
m.explicitdir = m1.explicitdir | ||||
m.traversedir = m1.traversedir | ||||
m.abs = m1.abs | ||||
m.rel = m1.rel | ||||
m._relativeuipath |= m1._relativeuipath | ||||
return m | ||||
if m2.always(): | ||||
m = copy.copy(m1) | ||||
m._relativeuipath |= m2._relativeuipath | ||||
return m | ||||
return intersectionmatcher(m1, m2) | ||||
class intersectionmatcher(basematcher): | ||||
def __init__(self, m1, m2): | ||||
super(intersectionmatcher, self).__init__(m1._root, m1._cwd) | ||||
self._m1 = m1 | ||||
self._m2 = m2 | ||||
self.bad = m1.bad | ||||
self.explicitdir = m1.explicitdir | ||||
self.traversedir = m1.traversedir | ||||
@propertycache | ||||
def _files(self): | ||||
if self.isexact(): | ||||
m1, m2 = self._m1, self._m2 | ||||
if not m1.isexact(): | ||||
m1, m2 = m2, m1 | ||||
return [f for f in m1.files() if m2(f)] | ||||
# It neither m1 nor m2 is an exact matcher, we can't easily intersect | ||||
# the set of files, because their files() are not always files. For | ||||
# example, if intersecting a matcher "-I glob:foo.txt" with matcher of | ||||
# "path:dir2", we don't want to remove "dir2" from the set. | ||||
return self._m1.files() + self._m2.files() | ||||
def matchfn(self, f): | ||||
return self._m1(f) and self._m2(f) | ||||
def visitdir(self, dir): | ||||
visit1 = self._m1.visitdir(dir) | ||||
if visit1 == 'all': | ||||
return self._m2.visitdir(dir) | ||||
# bool() because visit1=True + visit2='all' should not be 'all' | ||||
return bool(visit1 and self._m2.visitdir(dir)) | ||||
def always(self): | ||||
return self._m1.always() and self._m2.always() | ||||
def isexact(self): | ||||
return self._m1.isexact() or self._m2.isexact() | ||||
def anypats(self): | ||||
return self._m1.anypats() or self._m2.anypats() | ||||
def __repr__(self): | ||||
return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)) | ||||
Martin von Zweigbergk
|
r32456 | class subdirmatcher(basematcher): | ||
Martin Geisler
|
r12165 | """Adapt a matcher to work on a subdirectory only. | ||
The paths are remapped to remove/insert the path as needed: | ||||
>>> m1 = match('root', '', ['a.txt', 'sub/b.txt']) | ||||
Martin von Zweigbergk
|
r28017 | >>> m2 = subdirmatcher('sub', m1) | ||
Martin Geisler
|
r12165 | >>> bool(m2('a.txt')) | ||
False | ||||
>>> bool(m2('b.txt')) | ||||
True | ||||
>>> bool(m2.matchfn('a.txt')) | ||||
False | ||||
>>> bool(m2.matchfn('b.txt')) | ||||
True | ||||
>>> m2.files() | ||||
['b.txt'] | ||||
>>> m2.exact('b.txt') | ||||
True | ||||
Matt Harbison
|
r23686 | >>> util.pconvert(m2.rel('b.txt')) | ||
'sub/b.txt' | ||||
Martin Geisler
|
r12268 | >>> def bad(f, msg): | ||
... print "%s: %s" % (f, msg) | ||||
>>> m1.bad = bad | ||||
>>> m2.bad('x.txt', 'No such file') | ||||
sub/x.txt: No such file | ||||
Matt Harbison
|
r23685 | >>> m2.abs('c.txt') | ||
'sub/c.txt' | ||||
Martin Geisler
|
r12165 | """ | ||
def __init__(self, path, matcher): | ||||
Martin von Zweigbergk
|
r32456 | super(subdirmatcher, self).__init__(matcher._root, matcher._cwd) | ||
Martin Geisler
|
r12165 | self._path = path | ||
self._matcher = matcher | ||||
Martin von Zweigbergk
|
r32456 | self._always = matcher.always() | ||
Martin Geisler
|
r12165 | |||
self._files = [f[len(path) + 1:] for f in matcher._files | ||||
if f.startswith(path + "/")] | ||||
Matt Harbison
|
r25194 | |||
Martin von Zweigbergk
|
r32326 | # If the parent repo had a path to this subrepo and the matcher is | ||
# a prefix matcher, this submatcher always matches. | ||||
if matcher.prefix(): | ||||
Matt Mackall
|
r25195 | self._always = any(f == path for f in matcher._files) | ||
Matt Harbison
|
r25194 | |||
Martin von Zweigbergk
|
r32325 | def bad(self, f, msg): | ||
self._matcher.bad(self._path + "/" + f, msg) | ||||
Matt Harbison
|
r23685 | def abs(self, f): | ||
return self._matcher.abs(self._path + "/" + f) | ||||
Matt Harbison
|
r23686 | def rel(self, f): | ||
return self._matcher.rel(self._path + "/" + f) | ||||
Martin von Zweigbergk
|
r32325 | def uipath(self, f): | ||
return self._matcher.uipath(self._path + "/" + f) | ||||
Martin von Zweigbergk
|
r32464 | def matchfn(self, f): | ||
# Some information is lost in the superclass's constructor, so we | ||||
# can not accurately create the matching function for the subdirectory | ||||
# from the inputs. Instead, we override matchfn() and visitdir() to | ||||
# call the original matcher with the subdirectory path prepended. | ||||
return self._matcher.matchfn(self._path + "/" + f) | ||||
Martin von Zweigbergk
|
r32324 | def visitdir(self, dir): | ||
if dir == '.': | ||||
dir = self._path | ||||
else: | ||||
dir = self._path + "/" + dir | ||||
return self._matcher.visitdir(dir) | ||||
Martin von Zweigbergk
|
r32456 | def always(self): | ||
return self._always | ||||
def anypats(self): | ||||
return self._matcher.anypats() | ||||
Martin von Zweigbergk
|
r32552 | def __repr__(self): | ||
return ('<subdirmatcher path=%r, matcher=%r>' % | ||||
(self._path, self._matcher)) | ||||
Mads Kiilerich
|
r21111 | def patkind(pattern, default=None): | ||
'''If pattern is 'kind:pat' with a known kind, return kind.''' | ||||
return _patsplit(pattern, default)[0] | ||||
Matt Mackall
|
r8570 | |||
Mads Kiilerich
|
r21111 | def _patsplit(pattern, default): | ||
"""Split a string into the optional pattern kind prefix and the actual | ||||
pattern.""" | ||||
if ':' in pattern: | ||||
kind, pat = pattern.split(':', 1) | ||||
Steve Borho
|
r13218 | if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre', | ||
Rodrigo Damazio Bovendorp
|
r31012 | 'listfile', 'listfile0', 'set', 'include', 'subinclude', | ||
'rootfilesin'): | ||||
Mads Kiilerich
|
r21111 | return kind, pat | ||
return default, pattern | ||||
Matt Mackall
|
r8570 | |||
Matt Mackall
|
r8582 | def _globre(pat): | ||
Mads Kiilerich
|
r21112 | r'''Convert an extended glob string to a regexp string. | ||
>>> print _globre(r'?') | ||||
. | ||||
>>> print _globre(r'*') | ||||
[^/]* | ||||
>>> print _globre(r'**') | ||||
.* | ||||
Siddharth Agarwal
|
r21815 | >>> print _globre(r'**/a') | ||
(?:.*/)?a | ||||
>>> print _globre(r'a/**/b') | ||||
a\/(?:.*/)?b | ||||
Mads Kiilerich
|
r21112 | >>> print _globre(r'[a*?!^][^b][!c]') | ||
[a*?!^][\^b][^c] | ||||
>>> print _globre(r'{a,b}') | ||||
(?:a|b) | ||||
>>> print _globre(r'.\*\?') | ||||
\.\*\? | ||||
''' | ||||
Matt Mackall
|
r8570 | i, n = 0, len(pat) | ||
res = '' | ||||
group = 0 | ||||
Siddharth Agarwal
|
r21915 | escape = util.re.escape | ||
Matt Mackall
|
r10282 | def peek(): | ||
Pulkit Goyal
|
r31421 | return i < n and pat[i:i + 1] | ||
Matt Mackall
|
r8570 | while i < n: | ||
Pulkit Goyal
|
r31421 | c = pat[i:i + 1] | ||
Matt Mackall
|
r10282 | i += 1 | ||
Matt Mackall
|
r8583 | if c not in '*?[{},\\': | ||
res += escape(c) | ||||
elif c == '*': | ||||
Matt Mackall
|
r8570 | if peek() == '*': | ||
i += 1 | ||||
Siddharth Agarwal
|
r21815 | if peek() == '/': | ||
i += 1 | ||||
res += '(?:.*/)?' | ||||
else: | ||||
res += '.*' | ||||
Matt Mackall
|
r8570 | else: | ||
res += '[^/]*' | ||||
elif c == '?': | ||||
res += '.' | ||||
elif c == '[': | ||||
j = i | ||||
Pulkit Goyal
|
r31421 | if j < n and pat[j:j + 1] in '!]': | ||
Matt Mackall
|
r8570 | j += 1 | ||
Pulkit Goyal
|
r31421 | while j < n and pat[j:j + 1] != ']': | ||
Matt Mackall
|
r8570 | j += 1 | ||
if j >= n: | ||||
res += '\\[' | ||||
else: | ||||
stuff = pat[i:j].replace('\\','\\\\') | ||||
i = j + 1 | ||||
Pulkit Goyal
|
r31421 | if stuff[0:1] == '!': | ||
Matt Mackall
|
r8570 | stuff = '^' + stuff[1:] | ||
Pulkit Goyal
|
r31421 | elif stuff[0:1] == '^': | ||
Matt Mackall
|
r8570 | stuff = '\\' + stuff | ||
res = '%s[%s]' % (res, stuff) | ||||
elif c == '{': | ||||
group += 1 | ||||
res += '(?:' | ||||
elif c == '}' and group: | ||||
res += ')' | ||||
group -= 1 | ||||
elif c == ',' and group: | ||||
res += '|' | ||||
elif c == '\\': | ||||
p = peek() | ||||
if p: | ||||
i += 1 | ||||
Matt Mackall
|
r8583 | res += escape(p) | ||
Matt Mackall
|
r8570 | else: | ||
Matt Mackall
|
r8583 | res += escape(c) | ||
Matt Mackall
|
r8570 | else: | ||
Matt Mackall
|
r8583 | res += escape(c) | ||
Matt Mackall
|
r8582 | return res | ||
Matt Mackall
|
r8570 | |||
Mads Kiilerich
|
r21111 | def _regex(kind, pat, globsuffix): | ||
'''Convert a (normalized) pattern of any kind into a regular expression. | ||||
globsuffix is appended to the regexp of globs.''' | ||||
if not pat: | ||||
Matt Mackall
|
r8574 | return '' | ||
if kind == 're': | ||||
Mads Kiilerich
|
r21111 | return pat | ||
if kind == 'path': | ||||
Matt Harbison
|
r25636 | if pat == '.': | ||
return '' | ||||
Siddharth Agarwal
|
r21915 | return '^' + util.re.escape(pat) + '(?:/|$)' | ||
Rodrigo Damazio Bovendorp
|
r31012 | if kind == 'rootfilesin': | ||
if pat == '.': | ||||
escaped = '' | ||||
else: | ||||
# Pattern is a directory name. | ||||
escaped = util.re.escape(pat) + '/' | ||||
# Anything after the pattern must be a non-directory. | ||||
return '^' + escaped + '[^/]+$' | ||||
Mads Kiilerich
|
r21111 | if kind == 'relglob': | ||
return '(?:|.*/)' + _globre(pat) + globsuffix | ||||
if kind == 'relpath': | ||||
Siddharth Agarwal
|
r21915 | return util.re.escape(pat) + '(?:/|$)' | ||
Mads Kiilerich
|
r21111 | if kind == 'relre': | ||
if pat.startswith('^'): | ||||
return pat | ||||
return '.*' + pat | ||||
return _globre(pat) + globsuffix | ||||
Matt Mackall
|
r8574 | |||
Durham Goode
|
r25238 | def _buildmatch(ctx, kindpats, globsuffix, listsubrepos, root): | ||
Mads Kiilerich
|
r21111 | '''Return regexp string and a matcher function for kindpats. | ||
globsuffix is appended to the regexp of globs.''' | ||||
Durham Goode
|
r25239 | matchfuncs = [] | ||
Durham Goode
|
r25283 | subincludes, kindpats = _expandsubinclude(kindpats, root) | ||
if subincludes: | ||||
Durham Goode
|
r32132 | submatchers = {} | ||
Durham Goode
|
r25283 | def matchsubinclude(f): | ||
Durham Goode
|
r32132 | for prefix, matcherargs in subincludes: | ||
if f.startswith(prefix): | ||||
mf = submatchers.get(prefix) | ||||
if mf is None: | ||||
mf = match(*matcherargs) | ||||
submatchers[prefix] = mf | ||||
if mf(f[len(prefix):]): | ||||
return True | ||||
Durham Goode
|
r25283 | return False | ||
matchfuncs.append(matchsubinclude) | ||||
Matt Mackall
|
r14675 | |||
Matt Harbison
|
r25122 | fset, kindpats = _expandsets(kindpats, ctx, listsubrepos) | ||
Matt Mackall
|
r14675 | if fset: | ||
Durham Goode
|
r25239 | matchfuncs.append(fset.__contains__) | ||
Matt Mackall
|
r14675 | |||
Durham Goode
|
r25239 | regex = '' | ||
if kindpats: | ||||
regex, mf = _buildregexmatch(kindpats, globsuffix) | ||||
matchfuncs.append(mf) | ||||
if len(matchfuncs) == 1: | ||||
return regex, matchfuncs[0] | ||||
else: | ||||
return regex, lambda f: any(mf(f) for mf in matchfuncs) | ||||
Matt Mackall
|
r14675 | |||
Mads Kiilerich
|
r21111 | def _buildregexmatch(kindpats, globsuffix): | ||
"""Build a match function from a list of kinds and kindpats, | ||||
return regexp string and a matcher function.""" | ||||
Matt Mackall
|
r8574 | try: | ||
Mads Kiilerich
|
r21111 | regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix) | ||
Durham Goode
|
r25213 | for (k, p, s) in kindpats]) | ||
Mads Kiilerich
|
r21111 | if len(regex) > 20000: | ||
Brodie Rao
|
r16687 | raise OverflowError | ||
Mads Kiilerich
|
r21111 | return regex, _rematcher(regex) | ||
Matt Mackall
|
r8574 | except OverflowError: | ||
# We're using a Python with a tiny regex engine and we | ||||
# made it explode, so we'll divide the pattern list in two | ||||
# until it works | ||||
Mads Kiilerich
|
r21111 | l = len(kindpats) | ||
Matt Mackall
|
r8574 | if l < 2: | ||
raise | ||||
Mads Kiilerich
|
r21111 | regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix) | ||
regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix) | ||||
Yuya Nishihara
|
r21191 | return regex, lambda s: a(s) or b(s) | ||
Matt Mackall
|
r8574 | except re.error: | ||
Durham Goode
|
r25213 | for k, p, s in kindpats: | ||
Matt Mackall
|
r8574 | try: | ||
Mads Kiilerich
|
r21111 | _rematcher('(?:%s)' % _regex(k, p, globsuffix)) | ||
Matt Mackall
|
r8574 | except re.error: | ||
Durham Goode
|
r25213 | if s: | ||
Pierre-Yves David
|
r26587 | raise error.Abort(_("%s: invalid pattern (%s): %s") % | ||
Durham Goode
|
r25213 | (s, k, p)) | ||
else: | ||||
Pierre-Yves David
|
r26587 | raise error.Abort(_("invalid pattern (%s): %s") % (k, p)) | ||
raise error.Abort(_("invalid pattern")) | ||||
Matt Mackall
|
r8574 | |||
Rodrigo Damazio Bovendorp
|
r31013 | def _patternrootsanddirs(kindpats): | ||
'''Returns roots and directories corresponding to each pattern. | ||||
Mads Kiilerich
|
r21079 | |||
Rodrigo Damazio Bovendorp
|
r31013 | This calculates the roots and directories exactly matching the patterns and | ||
returns a tuple of (roots, dirs) for each. It does not return other | ||||
directories which may also need to be considered, like the parent | ||||
directories. | ||||
Mads Kiilerich
|
r21079 | ''' | ||
Matt Mackall
|
r8576 | r = [] | ||
Rodrigo Damazio Bovendorp
|
r31013 | d = [] | ||
Durham Goode
|
r25213 | for kind, pat, source in kindpats: | ||
Matt Mackall
|
r8584 | if kind == 'glob': # find the non-glob prefix | ||
root = [] | ||||
Mads Kiilerich
|
r21111 | for p in pat.split('/'): | ||
Matt Mackall
|
r8584 | if '[' in p or '{' in p or '*' in p or '?' in p: | ||
break | ||||
root.append(p) | ||||
r.append('/'.join(root) or '.') | ||||
Rodrigo Damazio Bovendorp
|
r31013 | elif kind in ('relpath', 'path'): | ||
Mads Kiilerich
|
r21111 | r.append(pat or '.') | ||
Rodrigo Damazio Bovendorp
|
r31013 | elif kind in ('rootfilesin',): | ||
d.append(pat or '.') | ||||
Mads Kiilerich
|
r19107 | else: # relglob, re, relre | ||
Matt Mackall
|
r8576 | r.append('.') | ||
Rodrigo Damazio Bovendorp
|
r31013 | return r, d | ||
def _roots(kindpats): | ||||
'''Returns root directories to match recursively from the given patterns.''' | ||||
roots, dirs = _patternrootsanddirs(kindpats) | ||||
return roots | ||||
def _rootsanddirs(kindpats): | ||||
'''Returns roots and exact directories from patterns. | ||||
roots are directories to match recursively, whereas exact directories should | ||||
be matched non-recursively. The returned (roots, dirs) tuple will also | ||||
include directories that need to be implicitly considered as either, such as | ||||
parent directories. | ||||
>>> _rootsanddirs(\ | ||||
[('glob', 'g/h/*', ''), ('glob', 'g/h', ''), ('glob', 'g*', '')]) | ||||
Martin von Zweigbergk
|
r32176 | (['g/h', 'g/h', '.'], ['g', '.']) | ||
Rodrigo Damazio Bovendorp
|
r31013 | >>> _rootsanddirs(\ | ||
[('rootfilesin', 'g/h', ''), ('rootfilesin', '', '')]) | ||||
Martin von Zweigbergk
|
r32176 | ([], ['g/h', '.', 'g', '.']) | ||
Rodrigo Damazio Bovendorp
|
r31013 | >>> _rootsanddirs(\ | ||
[('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')]) | ||||
Martin von Zweigbergk
|
r32176 | (['r', 'p/p', '.'], ['p', '.']) | ||
Rodrigo Damazio Bovendorp
|
r31013 | >>> _rootsanddirs(\ | ||
[('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')]) | ||||
Martin von Zweigbergk
|
r32176 | (['.', '.', '.'], ['.']) | ||
Rodrigo Damazio Bovendorp
|
r31013 | ''' | ||
r, d = _patternrootsanddirs(kindpats) | ||||
# Append the parents as non-recursive/exact directories, since they must be | ||||
# scanned to get to either the roots or the other exact directories. | ||||
d.extend(util.dirs(d)) | ||||
d.extend(util.dirs(r)) | ||||
Martin von Zweigbergk
|
r32176 | # util.dirs() does not include the root directory, so add it manually | ||
d.append('.') | ||||
Rodrigo Damazio Bovendorp
|
r31013 | |||
return r, d | ||||
Matt Mackall
|
r8576 | |||
Rodrigo Damazio Bovendorp
|
r31012 | def _explicitfiles(kindpats): | ||
'''Returns the potential explicit filenames from the patterns. | ||||
>>> _explicitfiles([('path', 'foo/bar', '')]) | ||||
['foo/bar'] | ||||
>>> _explicitfiles([('rootfilesin', 'foo/bar', '')]) | ||||
[] | ||||
''' | ||||
# Keep only the pattern kinds where one can specify filenames (vs only | ||||
# directory names). | ||||
filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)] | ||||
return _roots(filable) | ||||
Mads Kiilerich
|
r21111 | def _anypats(kindpats): | ||
Durham Goode
|
r25213 | for kind, pat, source in kindpats: | ||
Rodrigo Damazio Bovendorp
|
r31012 | if kind in ('glob', 're', 'relglob', 'relre', 'set', 'rootfilesin'): | ||
Matt Mackall
|
r8576 | return True | ||
Durham Goode
|
r25167 | |||
_commentre = None | ||||
Laurent Charignon
|
r27595 | def readpatternfile(filepath, warn, sourceinfo=False): | ||
Durham Goode
|
r25167 | '''parse a pattern file, returning a list of | ||
patterns. These patterns should be given to compile() | ||||
Durham Goode
|
r25216 | to be validated and converted into a match function. | ||
trailing white space is dropped. | ||||
the escape character is backslash. | ||||
comments start with #. | ||||
empty lines are skipped. | ||||
lines can be of the following formats: | ||||
syntax: regexp # defaults following lines to non-rooted regexps | ||||
syntax: glob # defaults following lines to non-rooted globs | ||||
re:pattern # non-rooted regular expression | ||||
glob:pattern # non-rooted glob | ||||
Laurent Charignon
|
r27595 | pattern # pattern of the current default type | ||
if sourceinfo is set, returns a list of tuples: | ||||
(pattern, lineno, originalline). This is useful to debug ignore patterns. | ||||
''' | ||||
Durham Goode
|
r25216 | |||
Durham Goode
|
r25215 | syntaxes = {'re': 'relre:', 'regexp': 'relre:', 'glob': 'relglob:', | ||
Durham Goode
|
r25283 | 'include': 'include', 'subinclude': 'subinclude'} | ||
Durham Goode
|
r25167 | syntax = 'relre:' | ||
patterns = [] | ||||
Rishabh Madan
|
r31403 | fp = open(filepath, 'rb') | ||
Jun Wu
|
r30399 | for lineno, line in enumerate(util.iterfile(fp), start=1): | ||
Durham Goode
|
r25167 | if "#" in line: | ||
global _commentre | ||||
if not _commentre: | ||||
Pulkit Goyal
|
r31420 | _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*') | ||
Durham Goode
|
r25167 | # remove comments prefixed by an even number of escapes | ||
Bryan O'Sullivan
|
r27327 | m = _commentre.search(line) | ||
if m: | ||||
line = line[:m.end(1)] | ||||
Durham Goode
|
r25167 | # fixup properly escaped comments that survived the above | ||
line = line.replace("\\#", "#") | ||||
line = line.rstrip() | ||||
if not line: | ||||
continue | ||||
if line.startswith('syntax:'): | ||||
s = line[7:].strip() | ||||
try: | ||||
syntax = syntaxes[s] | ||||
except KeyError: | ||||
Durham Goode
|
r25214 | if warn: | ||
warn(_("%s: ignoring invalid syntax '%s'\n") % | ||||
(filepath, s)) | ||||
Durham Goode
|
r25167 | continue | ||
linesyntax = syntax | ||||
for s, rels in syntaxes.iteritems(): | ||||
if line.startswith(rels): | ||||
linesyntax = rels | ||||
line = line[len(rels):] | ||||
break | ||||
elif line.startswith(s+':'): | ||||
linesyntax = rels | ||||
line = line[len(s) + 1:] | ||||
break | ||||
Laurent Charignon
|
r27595 | if sourceinfo: | ||
patterns.append((linesyntax + line, lineno, line)) | ||||
else: | ||||
patterns.append(linesyntax + line) | ||||
Durham Goode
|
r25167 | fp.close() | ||
return patterns | ||||