match.py
1703 lines
| 53.2 KiB
| text/x-python
|
PythonLexer
/ mercurial / match.py
timeless
|
r8761 | # match.py - filename matching | ||
Martin Geisler
|
r8231 | # | ||
Raphaël Gomès
|
r47575 | # Copyright 2008, 2009 Olivia Mackall <olivia@selenic.com> and others | ||
Martin Geisler
|
r8231 | # | ||
# This software may be used and distributed according to the terms of the | ||||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Martin Geisler
|
r8231 | |||
Gregory Szorc
|
r25958 | |||
Kyle Lippincott
|
r47634 | import bisect | ||
Gregory Szorc
|
r25958 | import copy | ||
spectral
|
r38990 | import itertools | ||
Gregory Szorc
|
r25958 | import os | ||
import re | ||||
from .i18n import _ | ||||
Gregory Szorc
|
r43355 | from .pycompat import open | ||
Gregory Szorc
|
r25958 | from . import ( | ||
Pulkit Goyal
|
r36067 | encoding, | ||
Pierre-Yves David
|
r26587 | error, | ||
Gregory Szorc
|
r25958 | pathutil, | ||
Georges Racinet
|
r42652 | policy, | ||
Augie Fackler
|
r36590 | pycompat, | ||
Gregory Szorc
|
r25958 | util, | ||
) | ||||
Augie Fackler
|
r43346 | from .utils import stringutil | ||
Matt Mackall
|
r6576 | |||
Raphaël Gomès
|
r44589 | rustmod = policy.importrust('dirstate') | ||
Raphaël Gomès
|
r42516 | |||
Augie Fackler
|
r43346 | allpatternkinds = ( | ||
Augie Fackler
|
r43347 | b're', | ||
b'glob', | ||||
b'path', | ||||
Raphaël Gomès
|
r51588 | b'filepath', | ||
Augie Fackler
|
r43347 | b'relglob', | ||
b'relpath', | ||||
b'relre', | ||||
b'rootglob', | ||||
b'listfile', | ||||
b'listfile0', | ||||
b'set', | ||||
b'include', | ||||
b'subinclude', | ||||
b'rootfilesin', | ||||
Augie Fackler
|
r43346 | ) | ||
Augie Fackler
|
r43347 | cwdrelativepatternkinds = (b'relpath', b'glob') | ||
Kostia Balytskyi
|
r33647 | |||
Drew Gottlieb
|
r24636 | propertycache = util.propertycache | ||
Augie Fackler
|
r43346 | |||
Mads Kiilerich
|
r21111 | def _rematcher(regex): | ||
Augie Fackler
|
r46554 | """compile the regexp with the best available regexp engine and return a | ||
matcher function""" | ||||
Siddharth Agarwal
|
r21909 | m = util.re.compile(regex) | ||
Bryan O'Sullivan
|
r16943 | try: | ||
# slightly faster, provided by facebook's re2 bindings | ||||
return m.test_match | ||||
except AttributeError: | ||||
return m.match | ||||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r44461 | def _expandsets(cwd, kindpats, ctx=None, listsubrepos=False, badfn=None): | ||
Yuya Nishihara
|
r38631 | '''Returns the kindpats list with the 'set' patterns expanded to matchers''' | ||
matchers = [] | ||||
Matt Mackall
|
r14675 | other = [] | ||
Durham Goode
|
r25213 | for kind, pat, source in kindpats: | ||
Augie Fackler
|
r43347 | if kind == b'set': | ||
Yuya Nishihara
|
r41144 | if ctx is None: | ||
Augie Fackler
|
r43346 | raise error.ProgrammingError( | ||
Martin von Zweigbergk
|
r43387 | b"fileset expression with no context" | ||
Augie Fackler
|
r43346 | ) | ||
Matt Harbison
|
r44461 | matchers.append(ctx.matchfileset(cwd, pat, badfn=badfn)) | ||
Matt Harbison
|
r25122 | |||
if listsubrepos: | ||||
for subpath in ctx.substate: | ||||
Matt Harbison
|
r44461 | sm = ctx.sub(subpath).matchfileset(cwd, pat, badfn=badfn) | ||
Martin von Zweigbergk
|
r41824 | pm = prefixdirmatcher(subpath, sm, badfn=badfn) | ||
Yuya Nishihara
|
r38631 | matchers.append(pm) | ||
Matt Harbison
|
r25122 | |||
Matt Mackall
|
r14675 | continue | ||
Durham Goode
|
r25213 | other.append((kind, pat, source)) | ||
Yuya Nishihara
|
r38631 | return matchers, other | ||
Matt Mackall
|
r14675 | |||
Augie Fackler
|
r43346 | |||
Durham Goode
|
r25283 | def _expandsubinclude(kindpats, root): | ||
Augie Fackler
|
r46554 | """Returns the list of subinclude matcher args and the kindpats without the | ||
subincludes in it.""" | ||||
Durham Goode
|
r25283 | relmatchers = [] | ||
other = [] | ||||
for kind, pat, source in kindpats: | ||||
Augie Fackler
|
r43347 | if kind == b'subinclude': | ||
Matt Harbison
|
r25301 | sourceroot = pathutil.dirname(util.normpath(source)) | ||
Durham Goode
|
r25283 | pat = util.pconvert(pat) | ||
path = pathutil.join(sourceroot, pat) | ||||
newroot = pathutil.dirname(path) | ||||
Augie Fackler
|
r43347 | matcherargs = (newroot, b'', [], [b'include:%s' % path]) | ||
Durham Goode
|
r25283 | |||
prefix = pathutil.canonpath(root, root, newroot) | ||||
if prefix: | ||||
Augie Fackler
|
r43347 | prefix += b'/' | ||
Durham Goode
|
r32132 | relmatchers.append((prefix, matcherargs)) | ||
Durham Goode
|
r25283 | else: | ||
other.append((kind, pat, source)) | ||||
return relmatchers, other | ||||
Augie Fackler
|
r43346 | |||
Martin von Zweigbergk
|
r24447 | def _kindpatsalwaysmatch(kindpats): | ||
Matt Harbison
|
r46558 | """Checks whether the kindspats match everything, as e.g. | ||
Martin von Zweigbergk
|
r24447 | 'relpath:.' does. | ||
""" | ||||
Durham Goode
|
r25213 | for kind, pat, source in kindpats: | ||
Augie Fackler
|
r43347 | if pat != b'' or kind not in [b'relpath', b'glob']: | ||
Martin von Zweigbergk
|
r24447 | return False | ||
return True | ||||
Augie Fackler
|
r43346 | |||
def _buildkindpatsmatcher( | ||||
Augie Fackler
|
r46554 | matchercls, | ||
root, | ||||
cwd, | ||||
kindpats, | ||||
ctx=None, | ||||
listsubrepos=False, | ||||
badfn=None, | ||||
Augie Fackler
|
r43346 | ): | ||
Yuya Nishihara
|
r38599 | matchers = [] | ||
Augie Fackler
|
r43346 | fms, kindpats = _expandsets( | ||
Augie Fackler
|
r46554 | cwd, | ||
kindpats, | ||||
ctx=ctx, | ||||
listsubrepos=listsubrepos, | ||||
badfn=badfn, | ||||
Augie Fackler
|
r43346 | ) | ||
Yuya Nishihara
|
r38599 | if kindpats: | ||
Martin von Zweigbergk
|
r41824 | m = matchercls(root, kindpats, badfn=badfn) | ||
Yuya Nishihara
|
r38599 | matchers.append(m) | ||
Yuya Nishihara
|
r38631 | if fms: | ||
matchers.extend(fms) | ||||
Yuya Nishihara
|
r38599 | if not matchers: | ||
Martin von Zweigbergk
|
r41824 | return nevermatcher(badfn=badfn) | ||
Yuya Nishihara
|
r38599 | if len(matchers) == 1: | ||
return matchers[0] | ||||
return unionmatcher(matchers) | ||||
Augie Fackler
|
r43346 | |||
def match( | ||||
root, | ||||
cwd, | ||||
patterns=None, | ||||
include=None, | ||||
exclude=None, | ||||
Augie Fackler
|
r43347 | default=b'glob', | ||
Augie Fackler
|
r43346 | auditor=None, | ||
ctx=None, | ||||
listsubrepos=False, | ||||
warn=None, | ||||
badfn=None, | ||||
icasefs=False, | ||||
): | ||||
Denis Laxalde
|
r42253 | r"""build an object to match a set of file patterns | ||
Martin von Zweigbergk
|
r32394 | |||
arguments: | ||||
root - the canonical root of the tree you're matching against | ||||
cwd - the current working directory, if relevant | ||||
patterns - patterns to find | ||||
include - patterns to include (unless they are excluded) | ||||
exclude - patterns to exclude (even if they are included) | ||||
default - if a pattern in patterns has no explicit type, assume this one | ||||
Denis Laxalde
|
r42252 | auditor - optional path auditor | ||
ctx - optional changecontext | ||||
listsubrepos - if True, recurse into subrepositories | ||||
Martin von Zweigbergk
|
r32394 | warn - optional function used for printing warnings | ||
badfn - optional bad() callback for this matcher instead of the default | ||||
Martin von Zweigbergk
|
r32400 | icasefs - make a matcher for wdir on case insensitive filesystems, which | ||
normalizes the given patterns to the case in the filesystem | ||||
Martin von Zweigbergk
|
r32394 | |||
a pattern is one of: | ||||
'glob:<glob>' - a glob relative to cwd | ||||
're:<regexp>' - a regular expression | ||||
'path:<path>' - a path relative to repository root, which is matched | ||||
recursively | ||||
Raphaël Gomès
|
r51588 | 'filepath:<path>' - an exact path to a single file, relative to the | ||
repository root | ||||
Martin von Zweigbergk
|
r32394 | 'rootfilesin:<path>' - a path relative to repository root, which is | ||
matched non-recursively (will not match subdirectories) | ||||
'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs) | ||||
'relpath:<path>' - a path relative to cwd | ||||
'relre:<regexp>' - a regexp that needn't match the start of a name | ||||
'set:<fileset>' - a fileset expression | ||||
'include:<path>' - a file of patterns to read and include | ||||
'subinclude:<path>' - a file of patterns to match against files under | ||||
the same directory | ||||
'<something>' - a pattern of the specified default type | ||||
Denis Laxalde
|
r42253 | |||
Matt Harbison
|
r44416 | >>> def _match(root, *args, **kwargs): | ||
... return match(util.localpath(root), *args, **kwargs) | ||||
Denis Laxalde
|
r42253 | Usually a patternmatcher is returned: | ||
Mads Kiilerich
|
r51229 | >>> _match(b'/foo', b'.', [br're:.*\.c$', b'path:foo/a', b'*.py']) | ||
r51285 | <patternmatcher patterns='[^/]*\\.py$|foo/a(?:/|$)|.*\\.c$'> | |||
Denis Laxalde
|
r42253 | |||
Combining 'patterns' with 'include' (resp. 'exclude') gives an | ||||
intersectionmatcher (resp. a differencematcher): | ||||
Mads Kiilerich
|
r51229 | >>> type(_match(b'/foo', b'.', [br're:.*\.c$'], include=[b'path:lib'])) | ||
Denis Laxalde
|
r42253 | <class 'mercurial.match.intersectionmatcher'> | ||
Mads Kiilerich
|
r51229 | >>> type(_match(b'/foo', b'.', [br're:.*\.c$'], exclude=[b'path:build'])) | ||
Denis Laxalde
|
r42253 | <class 'mercurial.match.differencematcher'> | ||
Notice that, if 'patterns' is empty, an alwaysmatcher is returned: | ||||
Matt Harbison
|
r44416 | >>> _match(b'/foo', b'.', []) | ||
Denis Laxalde
|
r42253 | <alwaysmatcher> | ||
The 'default' argument determines which kind of pattern is assumed if a | ||||
pattern has no prefix: | ||||
Mads Kiilerich
|
r51229 | >>> _match(b'/foo', b'.', [br'.*\.c$'], default=b're') | ||
Denis Laxalde
|
r42253 | <patternmatcher patterns='.*\\.c$'> | ||
Matt Harbison
|
r44416 | >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath') | ||
Denis Laxalde
|
r42253 | <patternmatcher patterns='main\\.py(?:/|$)'> | ||
Matt Harbison
|
r44416 | >>> _match(b'/foo', b'.', [b'main.py'], default=b're') | ||
Denis Laxalde
|
r42253 | <patternmatcher patterns='main.py'> | ||
The primary use of matchers is to check whether a value (usually a file | ||||
name) matches againset one of the patterns given at initialization. There | ||||
are two ways of doing this check. | ||||
Mads Kiilerich
|
r51229 | >>> m = _match(b'/foo', b'', [br're:.*\.c$', b'relpath:a']) | ||
Denis Laxalde
|
r42253 | |||
1. Calling the matcher with a file name returns True if any pattern | ||||
matches that file name: | ||||
Pulkit Goyal
|
r42267 | >>> m(b'a') | ||
Denis Laxalde
|
r42253 | True | ||
Pulkit Goyal
|
r42267 | >>> m(b'main.c') | ||
Denis Laxalde
|
r42253 | True | ||
Pulkit Goyal
|
r42267 | >>> m(b'test.py') | ||
Denis Laxalde
|
r42253 | False | ||
2. Using the exact() method only returns True if the file name matches one | ||||
of the exact patterns (i.e. not re: or glob: patterns): | ||||
Pulkit Goyal
|
r42267 | >>> m.exact(b'a') | ||
Denis Laxalde
|
r42253 | True | ||
Pulkit Goyal
|
r42267 | >>> m.exact(b'main.c') | ||
Denis Laxalde
|
r42253 | False | ||
Martin von Zweigbergk
|
r32394 | """ | ||
Martin von Zweigbergk
|
r44401 | assert os.path.isabs(root) | ||
Matt Harbison
|
r44417 | cwd = os.path.join(root, util.localpath(cwd)) | ||
Martin von Zweigbergk
|
r32400 | normalize = _donormalize | ||
if icasefs: | ||||
dirstate = ctx.repo().dirstate | ||||
dsnormalize = dirstate.normalize | ||||
def normalize(patterns, default, root, cwd, auditor, warn): | ||||
kp = _donormalize(patterns, default, root, cwd, auditor, warn) | ||||
kindpats = [] | ||||
for kind, pats, source in kp: | ||||
Augie Fackler
|
r43347 | if kind not in (b're', b'relre'): # regex can't be normalized | ||
Martin von Zweigbergk
|
r32400 | p = pats | ||
pats = dsnormalize(pats) | ||||
# Preserve the original to handle a case only rename. | ||||
if p != pats and p in dirstate: | ||||
kindpats.append((kind, p, source)) | ||||
kindpats.append((kind, pats, source)) | ||||
return kindpats | ||||
Martin von Zweigbergk
|
r41771 | if patterns: | ||
Martin von Zweigbergk
|
r32556 | kindpats = normalize(patterns, default, root, cwd, auditor, warn) | ||
Martin von Zweigbergk
|
r32557 | if _kindpatsalwaysmatch(kindpats): | ||
Martin von Zweigbergk
|
r41824 | m = alwaysmatcher(badfn) | ||
Martin von Zweigbergk
|
r32557 | else: | ||
Augie Fackler
|
r43346 | m = _buildkindpatsmatcher( | ||
patternmatcher, | ||||
root, | ||||
Matt Harbison
|
r44461 | cwd, | ||
Augie Fackler
|
r43346 | kindpats, | ||
ctx=ctx, | ||||
listsubrepos=listsubrepos, | ||||
badfn=badfn, | ||||
) | ||||
Martin von Zweigbergk
|
r32553 | else: | ||
# It's a little strange that no patterns means to match everything. | ||||
Martin von Zweigbergk
|
r32650 | # Consider changing this to match nothing (probably using nevermatcher). | ||
Martin von Zweigbergk
|
r41824 | m = alwaysmatcher(badfn) | ||
Martin von Zweigbergk
|
r32553 | |||
Martin von Zweigbergk
|
r32497 | if include: | ||
Augie Fackler
|
r43347 | kindpats = normalize(include, b'glob', root, cwd, auditor, warn) | ||
Augie Fackler
|
r43346 | im = _buildkindpatsmatcher( | ||
includematcher, | ||||
root, | ||||
Matt Harbison
|
r44461 | cwd, | ||
Augie Fackler
|
r43346 | kindpats, | ||
ctx=ctx, | ||||
listsubrepos=listsubrepos, | ||||
badfn=None, | ||||
) | ||||
Martin von Zweigbergk
|
r32497 | m = intersectmatchers(m, im) | ||
Martin von Zweigbergk
|
r32465 | if exclude: | ||
Augie Fackler
|
r43347 | kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn) | ||
Augie Fackler
|
r43346 | em = _buildkindpatsmatcher( | ||
includematcher, | ||||
root, | ||||
Matt Harbison
|
r44461 | cwd, | ||
Augie Fackler
|
r43346 | kindpats, | ||
ctx=ctx, | ||||
listsubrepos=listsubrepos, | ||||
badfn=None, | ||||
) | ||||
Martin von Zweigbergk
|
r32465 | m = differencematcher(m, em) | ||
return m | ||||
Martin von Zweigbergk
|
r32394 | |||
Augie Fackler
|
r43346 | |||
Martin von Zweigbergk
|
r41825 | def exact(files, badfn=None): | ||
Martin von Zweigbergk
|
r41824 | return exactmatcher(files, badfn=badfn) | ||
Martin von Zweigbergk
|
r32394 | |||
Augie Fackler
|
r43346 | |||
Martin von Zweigbergk
|
r41825 | def always(badfn=None): | ||
return alwaysmatcher(badfn) | ||||
Martin von Zweigbergk
|
r32394 | |||
Augie Fackler
|
r43346 | |||
Martin von Zweigbergk
|
r41825 | def never(badfn=None): | ||
return nevermatcher(badfn) | ||||
Siddharth Agarwal
|
r32600 | |||
Augie Fackler
|
r43346 | |||
Martin von Zweigbergk
|
r32394 | def badmatch(match, badfn): | ||
"""Make a copy of the given matcher, replacing its bad method with the given | ||||
one. | ||||
""" | ||||
m = copy.copy(match) | ||||
m.bad = badfn | ||||
return m | ||||
Augie Fackler
|
r43346 | |||
Denis Laxalde
|
r42254 | def _donormalize(patterns, default, root, cwd, auditor=None, warn=None): | ||
Augie Fackler
|
r46554 | """Convert 'kind:pat' from the patterns list to tuples with kind and | ||
normalized and rooted patterns and with listfiles expanded.""" | ||||
Martin von Zweigbergk
|
r32396 | kindpats = [] | ||
Raphaël Gomès
|
r51588 | kinds_to_normalize = ( | ||
b'relglob', | ||||
b'path', | ||||
b'filepath', | ||||
b'rootfilesin', | ||||
b'rootglob', | ||||
) | ||||
Martin von Zweigbergk
|
r32396 | for kind, pat in [_patsplit(p, default) for p in patterns]: | ||
Kostia Balytskyi
|
r33647 | if kind in cwdrelativepatternkinds: | ||
Denis Laxalde
|
r42254 | pat = pathutil.canonpath(root, cwd, pat, auditor=auditor) | ||
Raphaël Gomès
|
r51588 | elif kind in kinds_to_normalize: | ||
Martin von Zweigbergk
|
r32396 | pat = util.normpath(pat) | ||
Augie Fackler
|
r43347 | elif kind in (b'listfile', b'listfile0'): | ||
Martin von Zweigbergk
|
r32396 | try: | ||
files = util.readfile(pat) | ||||
Augie Fackler
|
r43347 | if kind == b'listfile0': | ||
files = files.split(b'\0') | ||||
Martin von Zweigbergk
|
r32396 | else: | ||
files = files.splitlines() | ||||
files = [f for f in files if f] | ||||
except EnvironmentError: | ||||
Augie Fackler
|
r43347 | raise error.Abort(_(b"unable to read file list (%s)") % pat) | ||
Augie Fackler
|
r43346 | for k, p, source in _donormalize( | ||
files, default, root, cwd, auditor, warn | ||||
): | ||||
Martin von Zweigbergk
|
r32396 | kindpats.append((k, p, pat)) | ||
continue | ||||
Augie Fackler
|
r43347 | elif kind == b'include': | ||
Martin von Zweigbergk
|
r32396 | try: | ||
fullpath = os.path.join(root, util.localpath(pat)) | ||||
includepats = readpatternfile(fullpath, warn) | ||||
Augie Fackler
|
r43346 | for k, p, source in _donormalize( | ||
includepats, default, root, cwd, auditor, warn | ||||
): | ||||
Martin von Zweigbergk
|
r32396 | kindpats.append((k, p, source or pat)) | ||
except error.Abort as inst: | ||||
Augie Fackler
|
r43786 | raise error.Abort( | ||
b'%s: %s' | ||||
Martin von Zweigbergk
|
r46274 | % ( | ||
pat, | ||||
inst.message, | ||||
Matt Harbison
|
r50700 | ) | ||
Augie Fackler
|
r43786 | ) | ||
Martin von Zweigbergk
|
r32396 | except IOError as inst: | ||
if warn: | ||||
Augie Fackler
|
r43346 | warn( | ||
Augie Fackler
|
r43347 | _(b"skipping unreadable pattern file '%s': %s\n") | ||
Augie Fackler
|
r43346 | % (pat, stringutil.forcebytestr(inst.strerror)) | ||
) | ||||
Martin von Zweigbergk
|
r32396 | continue | ||
# else: re or relre - which cannot be normalized | ||||
Augie Fackler
|
r43347 | kindpats.append((kind, pat, b'')) | ||
Martin von Zweigbergk
|
r32396 | return kindpats | ||
Augie Fackler
|
r43346 | |||
Gregory Szorc
|
r49801 | class basematcher: | ||
Martin von Zweigbergk
|
r41824 | def __init__(self, badfn=None): | ||
Martin von Zweigbergk
|
r32454 | if badfn is not None: | ||
self.bad = badfn | ||||
def __call__(self, fn): | ||||
return self.matchfn(fn) | ||||
Augie Fackler
|
r43346 | |||
Martin von Zweigbergk
|
r32454 | # Callbacks related to how the matcher is used by dirstate.walk. | ||
# Subscribers to these events must monkeypatch the matcher object. | ||||
def bad(self, f, msg): | ||||
Augie Fackler
|
r46554 | """Callback from dirstate.walk for each explicit file that can't be | ||
found/accessed, with an error message.""" | ||||
Martin von Zweigbergk
|
r32454 | |||
# If an traversedir is set, it will be called when a directory discovered | ||||
# by recursive traversal is visited. | ||||
traversedir = None | ||||
Martin von Zweigbergk
|
r32455 | @propertycache | ||
def _files(self): | ||||
return [] | ||||
Martin von Zweigbergk
|
r32454 | def files(self): | ||
Augie Fackler
|
r46554 | """Explicitly listed files or patterns or roots: | ||
Martin von Zweigbergk
|
r32454 | if no patterns or .always(): empty list, | ||
if exact: list exact files, | ||||
if not .anypats(): list all files and dirs, | ||||
Augie Fackler
|
r46554 | else: optimal roots""" | ||
Martin von Zweigbergk
|
r32454 | return self._files | ||
@propertycache | ||||
def _fileset(self): | ||||
return set(self._files) | ||||
def exact(self, f): | ||||
'''Returns True if f is in .files().''' | ||||
return f in self._fileset | ||||
Martin von Zweigbergk
|
r32463 | def matchfn(self, f): | ||
return False | ||||
Martin von Zweigbergk
|
r32454 | def visitdir(self, dir): | ||
Augie Fackler
|
r46554 | """Decides whether a directory should be visited based on whether it | ||
Martin von Zweigbergk
|
r32454 | has potential matches in it or one of its subdirectories. This is | ||
based on the match's primary, included, and excluded patterns. | ||||
Returns the string 'all' if the given directory and all subdirectories | ||||
should be visited. Otherwise returns True or False indicating whether | ||||
the given directory should be visited. | ||||
Augie Fackler
|
r46554 | """ | ||
Durham Goode
|
r33478 | return True | ||
Martin von Zweigbergk
|
r32454 | |||
spectral
|
r38990 | def visitchildrenset(self, dir): | ||
Augie Fackler
|
r46554 | """Decides whether a directory should be visited based on whether it | ||
spectral
|
r38990 | has potential matches in it or one of its subdirectories, and | ||
potentially lists which subdirectories of that directory should be | ||||
visited. This is based on the match's primary, included, and excluded | ||||
patterns. | ||||
This function is very similar to 'visitdir', and the following mapping | ||||
can be applied: | ||||
visitdir | visitchildrenlist | ||||
----------+------------------- | ||||
False | set() | ||||
'all' | 'all' | ||||
Kyle Lippincott
|
r39296 | True | 'this' OR non-empty set of subdirs -or files- to visit | ||
spectral
|
r38990 | |||
Example: | ||||
Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return | ||||
the following values (assuming the implementation of visitchildrenset | ||||
is capable of recognizing this; some implementations are not). | ||||
Martin von Zweigbergk
|
r42528 | '' -> {'foo', 'qux'} | ||
spectral
|
r38990 | 'baz' -> set() | ||
'foo' -> {'bar'} | ||||
# Ideally this would be 'all', but since the prefix nature of matchers | ||||
Kyle Lippincott
|
r39296 | # is applied to the entire matcher, we have to downgrade this to | ||
# 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed | ||||
# in. | ||||
spectral
|
r38990 | 'foo/bar' -> 'this' | ||
'qux' -> 'this' | ||||
Kyle Lippincott
|
r39296 | |||
Important: | ||||
Most matchers do not know if they're representing files or | ||||
directories. They see ['path:dir/f'] and don't know whether 'f' is a | ||||
file or a directory, so visitchildrenset('dir') for most matchers will | ||||
return {'f'}, but if the matcher knows it's a file (like exactmatcher | ||||
does), it may return 'this'. Do not rely on the return being a set | ||||
indicating that there are no files in this dir to investigate (or | ||||
equivalently that if there are files to investigate in 'dir' that it | ||||
will always return 'this'). | ||||
Augie Fackler
|
r46554 | """ | ||
Augie Fackler
|
r43347 | return b'this' | ||
spectral
|
r38990 | |||
Martin von Zweigbergk
|
r32454 | def always(self): | ||
Augie Fackler
|
r46554 | """Matcher will match everything and .files() will be empty -- | ||
optimization might be possible.""" | ||||
Martin von Zweigbergk
|
r32454 | return False | ||
def isexact(self): | ||||
Augie Fackler
|
r46554 | """Matcher will match exactly the list of files in .files() -- | ||
optimization might be possible.""" | ||||
Martin von Zweigbergk
|
r32454 | return False | ||
def prefix(self): | ||||
Augie Fackler
|
r46554 | """Matcher will match the paths in .files() recursively -- | ||
optimization might be possible.""" | ||||
Martin von Zweigbergk
|
r33379 | return False | ||
def anypats(self): | ||||
Augie Fackler
|
r46554 | """None of .always(), .isexact(), and .prefix() is true -- | ||
optimizations will be difficult.""" | ||||
Martin von Zweigbergk
|
r33379 | return not self.always() and not self.isexact() and not self.prefix() | ||
Martin von Zweigbergk
|
r32454 | |||
Augie Fackler
|
r43346 | |||
Martin von Zweigbergk
|
r32553 | class alwaysmatcher(basematcher): | ||
'''Matches everything.''' | ||||
Martin von Zweigbergk
|
r41824 | def __init__(self, badfn=None): | ||
super(alwaysmatcher, self).__init__(badfn) | ||||
Martin von Zweigbergk
|
r32553 | |||
def always(self): | ||||
return True | ||||
def matchfn(self, f): | ||||
return True | ||||
def visitdir(self, dir): | ||||
Augie Fackler
|
r43347 | return b'all' | ||
Martin von Zweigbergk
|
r32553 | |||
spectral
|
r38990 | def visitchildrenset(self, dir): | ||
Augie Fackler
|
r43347 | return b'all' | ||
spectral
|
r38990 | |||
Martin von Zweigbergk
|
r32553 | def __repr__(self): | ||
Pulkit Goyal
|
r36067 | return r'<alwaysmatcher>' | ||
Martin von Zweigbergk
|
r32553 | |||
Augie Fackler
|
r43346 | |||
Siddharth Agarwal
|
r32600 | class nevermatcher(basematcher): | ||
'''Matches nothing.''' | ||||
Martin von Zweigbergk
|
r41824 | def __init__(self, badfn=None): | ||
super(nevermatcher, self).__init__(badfn) | ||||
Siddharth Agarwal
|
r32600 | |||
Martin von Zweigbergk
|
r33378 | # It's a little weird to say that the nevermatcher is an exact matcher | ||
# or a prefix matcher, but it seems to make sense to let callers take | ||||
# fast paths based on either. There will be no exact matches, nor any | ||||
# prefixes (files() returns []), so fast paths iterating over them should | ||||
# be efficient (and correct). | ||||
def isexact(self): | ||||
return True | ||||
def prefix(self): | ||||
return True | ||||
Martin von Zweigbergk
|
r33583 | def visitdir(self, dir): | ||
return False | ||||
spectral
|
r38990 | def visitchildrenset(self, dir): | ||
return set() | ||||
Siddharth Agarwal
|
r32600 | def __repr__(self): | ||
Pulkit Goyal
|
r36067 | return r'<nevermatcher>' | ||
Siddharth Agarwal
|
r32600 | |||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r38596 | class predicatematcher(basematcher): | ||
"""A matcher adapter for a simple boolean function""" | ||||
Martin von Zweigbergk
|
r41824 | def __init__(self, predfn, predrepr=None, badfn=None): | ||
super(predicatematcher, self).__init__(badfn) | ||||
Yuya Nishihara
|
r38596 | self.matchfn = predfn | ||
self._predrepr = predrepr | ||||
@encoding.strmethod | ||||
def __repr__(self): | ||||
Augie Fackler
|
r43346 | s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr( | ||
self.matchfn | ||||
) | ||||
Augie Fackler
|
r43347 | return b'<predicatenmatcher pred=%s>' % s | ||
Yuya Nishihara
|
r38596 | |||
Augie Fackler
|
r43346 | |||
Kyle Lippincott
|
r46614 | def path_or_parents_in_set(path, prefix_set): | ||
"""Returns True if `path` (or any parent of `path`) is in `prefix_set`.""" | ||||
l = len(prefix_set) | ||||
if l == 0: | ||||
return False | ||||
if path in prefix_set: | ||||
return True | ||||
# If there's more than 5 paths in prefix_set, it's *probably* quicker to | ||||
# "walk up" the directory hierarchy instead, with the assumption that most | ||||
# directory hierarchies are relatively shallow and hash lookup is cheap. | ||||
if l > 5: | ||||
return any( | ||||
Matt Harbison
|
r46681 | parentdir in prefix_set for parentdir in pathutil.finddirs(path) | ||
Kyle Lippincott
|
r46614 | ) | ||
# FIXME: Ideally we'd never get to this point if this is the case - we'd | ||||
# recognize ourselves as an 'always' matcher and skip this. | ||||
if b'' in prefix_set: | ||||
return True | ||||
Gregory Szorc
|
r49739 | sl = ord(b'/') | ||
Kyle Lippincott
|
r46614 | |||
# We already checked that path isn't in prefix_set exactly, so | ||||
# `path[len(pf)] should never raise IndexError. | ||||
return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set) | ||||
Martin von Zweigbergk
|
r32501 | class patternmatcher(basematcher): | ||
Daniel Ploch
|
r44125 | r"""Matches a set of (kind, pat, source) against a 'root' directory. | ||
Denis Laxalde
|
r42250 | |||
>>> kindpats = [ | ||||
Gregory Szorc
|
r42367 | ... (b're', br'.*\.c$', b''), | ||
Pulkit Goyal
|
r42267 | ... (b'path', b'foo/a', b''), | ||
... (b'relpath', b'b', b''), | ||||
... (b'glob', b'*.h', b''), | ||||
Denis Laxalde
|
r42250 | ... ] | ||
Pulkit Goyal
|
r42267 | >>> m = patternmatcher(b'foo', kindpats) | ||
>>> m(b'main.c') # matches re:.*\.c$ | ||||
Denis Laxalde
|
r42250 | True | ||
Pulkit Goyal
|
r42267 | >>> m(b'b.txt') | ||
Denis Laxalde
|
r42250 | False | ||
Pulkit Goyal
|
r42267 | >>> m(b'foo/a') # matches path:foo/a | ||
Denis Laxalde
|
r42250 | True | ||
Pulkit Goyal
|
r42267 | >>> m(b'a') # does not match path:b, since 'root' is 'foo' | ||
Denis Laxalde
|
r42250 | False | ||
Pulkit Goyal
|
r42267 | >>> m(b'b') # matches relpath:b, since 'root' is 'foo' | ||
Denis Laxalde
|
r42250 | True | ||
Pulkit Goyal
|
r42267 | >>> m(b'lib.h') # matches glob:*.h | ||
Denis Laxalde
|
r42250 | True | ||
>>> m.files() | ||||
r51285 | [b'', b'foo/a', b'', b'b'] | |||
Pulkit Goyal
|
r42267 | >>> m.exact(b'foo/a') | ||
Denis Laxalde
|
r42250 | True | ||
Pulkit Goyal
|
r42267 | >>> m.exact(b'b') | ||
Denis Laxalde
|
r42250 | True | ||
Pulkit Goyal
|
r42267 | >>> m.exact(b'lib.h') # exact matches are for (rel)path kinds | ||
Denis Laxalde
|
r42250 | False | ||
""" | ||||
Martin von Zweigbergk
|
r32394 | |||
Martin von Zweigbergk
|
r41824 | def __init__(self, root, kindpats, badfn=None): | ||
super(patternmatcher, self).__init__(badfn) | ||||
r51285 | kindpats.sort() | |||
Matt Mackall
|
r8581 | |||
Martin von Zweigbergk
|
r32557 | self._files = _explicitfiles(kindpats) | ||
Martin von Zweigbergk
|
r33405 | self._prefix = _prefix(kindpats) | ||
r51286 | self._pats, self._matchfn = _buildmatch(kindpats, b'$', root) | |||
def matchfn(self, fn): | ||||
if fn in self._fileset: | ||||
return True | ||||
return self._matchfn(fn) | ||||
Matt Mackall
|
r8587 | |||
Martin von Zweigbergk
|
r32323 | @propertycache | ||
Drew Gottlieb
|
r24636 | def _dirs(self): | ||
r43923 | return set(pathutil.dirs(self._fileset)) | |||
Drew Gottlieb
|
r24636 | |||
def visitdir(self, dir): | ||||
Martin von Zweigbergk
|
r33405 | if self._prefix and dir in self._fileset: | ||
Augie Fackler
|
r43347 | return b'all' | ||
Matt Harbison
|
r46681 | return dir in self._dirs or path_or_parents_in_set(dir, self._fileset) | ||
Drew Gottlieb
|
r24636 | |||
spectral
|
r38990 | def visitchildrenset(self, dir): | ||
ret = self.visitdir(dir) | ||||
if ret is True: | ||||
Augie Fackler
|
r43347 | return b'this' | ||
spectral
|
r38990 | elif not ret: | ||
return set() | ||||
Augie Fackler
|
r43347 | assert ret == b'all' | ||
return b'all' | ||||
spectral
|
r38990 | |||
Martin von Zweigbergk
|
r33379 | def prefix(self): | ||
Martin von Zweigbergk
|
r33405 | return self._prefix | ||
Mads Kiilerich
|
r21111 | |||
Pulkit Goyal
|
r36067 | @encoding.strmethod | ||
Martin von Zweigbergk
|
r32406 | def __repr__(self): | ||
Augie Fackler
|
r43347 | return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats) | ||
Augie Fackler
|
r43346 | |||
Martin von Zweigbergk
|
r32501 | |||
r43923 | # This is basically a reimplementation of pathutil.dirs that stores the | |||
# children instead of just a count of them, plus a small optional optimization | ||||
# to avoid some directories we don't need. | ||||
Gregory Szorc
|
r49801 | class _dirchildren: | ||
Kyle Lippincott
|
r39494 | def __init__(self, paths, onlyinclude=None): | ||
self._dirs = {} | ||||
self._onlyinclude = onlyinclude or [] | ||||
addpath = self.addpath | ||||
for f in paths: | ||||
addpath(f) | ||||
def addpath(self, path): | ||||
Augie Fackler
|
r43347 | if path == b'': | ||
Kyle Lippincott
|
r39494 | return | ||
dirs = self._dirs | ||||
findsplitdirs = _dirchildren._findsplitdirs | ||||
for d, b in findsplitdirs(path): | ||||
if d not in self._onlyinclude: | ||||
continue | ||||
dirs.setdefault(d, set()).add(b) | ||||
@staticmethod | ||||
def _findsplitdirs(path): | ||||
# yields (dirname, basename) tuples, walking back to the root. This is | ||||
Martin von Zweigbergk
|
r44032 | # very similar to pathutil.finddirs, except: | ||
Kyle Lippincott
|
r39494 | # - produces a (dirname, basename) tuple, not just 'dirname' | ||
# Unlike manifest._splittopdir, this does not suffix `dirname` with a | ||||
Martin von Zweigbergk
|
r42528 | # slash. | ||
Kyle Lippincott
|
r39494 | oldpos = len(path) | ||
Augie Fackler
|
r43347 | pos = path.rfind(b'/') | ||
Kyle Lippincott
|
r39494 | while pos != -1: | ||
Augie Fackler
|
r43346 | yield path[:pos], path[pos + 1 : oldpos] | ||
Kyle Lippincott
|
r39494 | oldpos = pos | ||
Augie Fackler
|
r43347 | pos = path.rfind(b'/', 0, pos) | ||
yield b'', path[:oldpos] | ||||
Kyle Lippincott
|
r39494 | |||
def get(self, path): | ||||
return self._dirs.get(path, set()) | ||||
Augie Fackler
|
r43346 | |||
Martin von Zweigbergk
|
r32501 | class includematcher(basematcher): | ||
Martin von Zweigbergk
|
r41824 | def __init__(self, root, kindpats, badfn=None): | ||
super(includematcher, self).__init__(badfn) | ||||
Raphaël Gomès
|
r45017 | if rustmod is not None: | ||
# We need to pass the patterns to Rust because they can contain | ||||
# patterns from the user interface | ||||
self._kindpats = kindpats | ||||
Augie Fackler
|
r43347 | self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root) | ||
Martin von Zweigbergk
|
r33405 | self._prefix = _prefix(kindpats) | ||
spectral
|
r38989 | roots, dirs, parents = _rootsdirsandparents(kindpats) | ||
Martin von Zweigbergk
|
r32502 | # roots are directories which are recursively included. | ||
Martin von Zweigbergk
|
r32503 | self._roots = set(roots) | ||
Martin von Zweigbergk
|
r32502 | # dirs are directories which are non-recursively included. | ||
Martin von Zweigbergk
|
r32503 | self._dirs = set(dirs) | ||
spectral
|
r38989 | # parents are directories which are non-recursively included because | ||
# they are needed to get to items in _dirs or _roots. | ||||
Martin von Zweigbergk
|
r42553 | self._parents = parents | ||
Martin von Zweigbergk
|
r32501 | |||
def visitdir(self, dir): | ||||
Martin von Zweigbergk
|
r33405 | if self._prefix and dir in self._roots: | ||
Augie Fackler
|
r43347 | return b'all' | ||
Augie Fackler
|
r43346 | return ( | ||
Kyle Lippincott
|
r46614 | dir in self._dirs | ||
Augie Fackler
|
r43346 | or dir in self._parents | ||
Kyle Lippincott
|
r46614 | or path_or_parents_in_set(dir, self._roots) | ||
Augie Fackler
|
r43346 | ) | ||
Martin von Zweigbergk
|
r32501 | |||
Kyle Lippincott
|
r39494 | @propertycache | ||
def _allparentschildren(self): | ||||
# It may seem odd that we add dirs, roots, and parents, and then | ||||
# restrict to only parents. This is to catch the case of: | ||||
# dirs = ['foo/bar'] | ||||
# parents = ['foo'] | ||||
# if we asked for the children of 'foo', but had only added | ||||
# self._parents, we wouldn't be able to respond ['bar']. | ||||
return _dirchildren( | ||||
Augie Fackler
|
r43346 | itertools.chain(self._dirs, self._roots, self._parents), | ||
onlyinclude=self._parents, | ||||
) | ||||
Kyle Lippincott
|
r39494 | |||
spectral
|
r38990 | def visitchildrenset(self, dir): | ||
if self._prefix and dir in self._roots: | ||||
Augie Fackler
|
r43347 | return b'all' | ||
spectral
|
r38990 | # Note: this does *not* include the 'dir in self._parents' case from | ||
# visitdir, that's handled below. | ||||
Augie Fackler
|
r43346 | if ( | ||
Augie Fackler
|
r43347 | b'' in self._roots | ||
Augie Fackler
|
r43346 | or dir in self._dirs | ||
Kyle Lippincott
|
r46614 | or path_or_parents_in_set(dir, self._roots) | ||
Augie Fackler
|
r43346 | ): | ||
Augie Fackler
|
r43347 | return b'this' | ||
spectral
|
r38990 | |||
if dir in self._parents: | ||||
Kyle Lippincott
|
r39494 | return self._allparentschildren.get(dir) or set() | ||
return set() | ||||
spectral
|
r38990 | |||
Pulkit Goyal
|
r36067 | @encoding.strmethod | ||
Martin von Zweigbergk
|
r32501 | def __repr__(self): | ||
Augie Fackler
|
r43347 | return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats) | ||
Augie Fackler
|
r43346 | |||
Martin von Zweigbergk
|
r32406 | |||
Martin von Zweigbergk
|
r32499 | class exactmatcher(basematcher): | ||
Augie Fackler
|
r46554 | r"""Matches the input files exactly. They are interpreted as paths, not | ||
Martin von Zweigbergk
|
r32499 | patterns (so no kind-prefixes). | ||
Denis Laxalde
|
r42249 | |||
Gregory Szorc
|
r42367 | >>> m = exactmatcher([b'a.txt', br're:.*\.c$']) | ||
Pulkit Goyal
|
r42267 | >>> m(b'a.txt') | ||
Denis Laxalde
|
r42249 | True | ||
Pulkit Goyal
|
r42267 | >>> m(b'b.txt') | ||
Denis Laxalde
|
r42249 | False | ||
Input files that would be matched are exactly those returned by .files() | ||||
>>> m.files() | ||||
['a.txt', 're:.*\\.c$'] | ||||
So pattern 're:.*\.c$' is not considered as a regex, but as a file name | ||||
Pulkit Goyal
|
r42267 | >>> m(b'main.c') | ||
Denis Laxalde
|
r42249 | False | ||
Gregory Szorc
|
r42367 | >>> m(br're:.*\.c$') | ||
Denis Laxalde
|
r42249 | True | ||
Augie Fackler
|
r46554 | """ | ||
Martin von Zweigbergk
|
r32499 | |||
Martin von Zweigbergk
|
r41824 | def __init__(self, files, badfn=None): | ||
super(exactmatcher, self).__init__(badfn) | ||||
Martin von Zweigbergk
|
r32499 | |||
if isinstance(files, list): | ||||
self._files = files | ||||
else: | ||||
self._files = list(files) | ||||
Yuya Nishihara
|
r32543 | |||
matchfn = basematcher.exact | ||||
Martin von Zweigbergk
|
r32499 | |||
@propertycache | ||||
def _dirs(self): | ||||
r43923 | return set(pathutil.dirs(self._fileset)) | |||
Martin von Zweigbergk
|
r32499 | |||
def visitdir(self, dir): | ||||
return dir in self._dirs | ||||
Kyle Lippincott
|
r47634 | @propertycache | ||
def _visitchildrenset_candidates(self): | ||||
"""A memoized set of candidates for visitchildrenset.""" | ||||
return self._fileset | self._dirs - {b''} | ||||
@propertycache | ||||
def _sorted_visitchildrenset_candidates(self): | ||||
"""A memoized sorted list of candidates for visitchildrenset.""" | ||||
return sorted(self._visitchildrenset_candidates) | ||||
spectral
|
r38990 | def visitchildrenset(self, dir): | ||
Kyle Lippincott
|
r39297 | if not self._fileset or dir not in self._dirs: | ||
return set() | ||||
Kyle Lippincott
|
r47634 | if dir == b'': | ||
candidates = self._visitchildrenset_candidates | ||||
else: | ||||
candidates = self._sorted_visitchildrenset_candidates | ||||
Augie Fackler
|
r43347 | d = dir + b'/' | ||
Kyle Lippincott
|
r47634 | # Use bisect to find the first element potentially starting with d | ||
# (i.e. >= d). This should always find at least one element (we'll | ||||
# assert later if this is not the case). | ||||
first = bisect.bisect_left(candidates, d) | ||||
# We need a representation of the first element that is > d that | ||||
# does not start with d, so since we added a `/` on the end of dir, | ||||
# we'll add whatever comes after slash (we could probably assume | ||||
# that `0` is after `/`, but let's not) to the end of dir instead. | ||||
dnext = dir + encoding.strtolocal(chr(ord(b'/') + 1)) | ||||
# Use bisect to find the first element >= d_next | ||||
last = bisect.bisect_left(candidates, dnext, lo=first) | ||||
dlen = len(d) | ||||
candidates = {c[dlen:] for c in candidates[first:last]} | ||||
Kyle Lippincott
|
r39297 | # self._dirs includes all of the directories, recursively, so if | ||
Martin von Zweigbergk
|
r42528 | # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo', | ||
Kyle Lippincott
|
r39297 | # 'foo/bar' in it. Thus we can safely ignore a candidate that has a | ||
# '/' in it, indicating a it's for a subdir-of-a-subdir; the | ||||
# immediate subdir will be in there without a slash. | ||||
Augie Fackler
|
r43347 | ret = {c for c in candidates if b'/' not in c} | ||
Kyle Lippincott
|
r39297 | # We really do not expect ret to be empty, since that would imply that | ||
# there's something in _dirs that didn't have a file in _fileset. | ||||
assert ret | ||||
return ret | ||||
spectral
|
r38990 | |||
Martin von Zweigbergk
|
r32499 | def isexact(self): | ||
return True | ||||
Pulkit Goyal
|
r36067 | @encoding.strmethod | ||
Martin von Zweigbergk
|
r32499 | def __repr__(self): | ||
Augie Fackler
|
r43347 | return b'<exactmatcher files=%r>' % self._files | ||
Augie Fackler
|
r43346 | |||
Martin von Zweigbergk
|
r32499 | |||
Martin von Zweigbergk
|
r32465 | class differencematcher(basematcher): | ||
Augie Fackler
|
r46554 | """Composes two matchers by matching if the first matches and the second | ||
Yuya Nishihara
|
r35677 | does not. | ||
Martin von Zweigbergk
|
r32465 | |||
Martin von Zweigbergk
|
r44114 | The second matcher's non-matching-attributes (bad, traversedir) are ignored. | ||
Augie Fackler
|
r46554 | """ | ||
Augie Fackler
|
r43346 | |||
Martin von Zweigbergk
|
r32465 | def __init__(self, m1, m2): | ||
Martin von Zweigbergk
|
r41824 | super(differencematcher, self).__init__() | ||
Martin von Zweigbergk
|
r32465 | self._m1 = m1 | ||
self._m2 = m2 | ||||
self.bad = m1.bad | ||||
self.traversedir = m1.traversedir | ||||
def matchfn(self, f): | ||||
Yuya Nishihara
|
r35677 | return self._m1(f) and not self._m2(f) | ||
Martin von Zweigbergk
|
r32465 | |||
@propertycache | ||||
def _files(self): | ||||
if self.isexact(): | ||||
return [f for f in self._m1.files() if self(f)] | ||||
# If m1 is not an exact matcher, we can't easily figure out the set of | ||||
# files, because its files() are not always files. For example, if | ||||
# m1 is "path:dir" and m2 is "rootfileins:.", we don't | ||||
# want to remove "dir" from the set even though it would match m2, | ||||
# because the "dir" in m1 may not be a file. | ||||
return self._m1.files() | ||||
def visitdir(self, dir): | ||||
Augie Fackler
|
r43347 | if self._m2.visitdir(dir) == b'all': | ||
Martin von Zweigbergk
|
r32465 | return False | ||
Pulkit Goyal
|
r41669 | elif not self._m2.visitdir(dir): | ||
# m2 does not match dir, we can return 'all' here if possible | ||||
return self._m1.visitdir(dir) | ||||
Martin von Zweigbergk
|
r32465 | return bool(self._m1.visitdir(dir)) | ||
spectral
|
r38990 | def visitchildrenset(self, dir): | ||
m2_set = self._m2.visitchildrenset(dir) | ||||
Augie Fackler
|
r43347 | if m2_set == b'all': | ||
spectral
|
r38990 | return set() | ||
m1_set = self._m1.visitchildrenset(dir) | ||||
# Possible values for m1: 'all', 'this', set(...), set() | ||||
# Possible values for m2: 'this', set(...), set() | ||||
# If m2 has nothing under here that we care about, return m1, even if | ||||
# it's 'all'. This is a change in behavior from visitdir, which would | ||||
# return True, not 'all', for some reason. | ||||
if not m2_set: | ||||
return m1_set | ||||
Augie Fackler
|
r43347 | if m1_set in [b'all', b'this']: | ||
spectral
|
r38990 | # Never return 'all' here if m2_set is any kind of non-empty (either | ||
# 'this' or set(foo)), since m2 might return set() for a | ||||
# subdirectory. | ||||
Augie Fackler
|
r43347 | return b'this' | ||
spectral
|
r38990 | # Possible values for m1: set(...), set() | ||
# Possible values for m2: 'this', set(...) | ||||
# We ignore m2's set results. They're possibly incorrect: | ||||
Martin von Zweigbergk
|
r42528 | # m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''): | ||
spectral
|
r38990 | # m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd | ||
# return set(), which is *not* correct, we still need to visit 'dir'! | ||||
return m1_set | ||||
Martin von Zweigbergk
|
r32465 | def isexact(self): | ||
return self._m1.isexact() | ||||
Pulkit Goyal
|
r36067 | @encoding.strmethod | ||
Martin von Zweigbergk
|
r32465 | def __repr__(self): | ||
Augie Fackler
|
r43347 | return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2) | ||
Augie Fackler
|
r43346 | |||
Martin von Zweigbergk
|
r32465 | |||
Martin von Zweigbergk
|
r32497 | def intersectmatchers(m1, m2): | ||
Augie Fackler
|
r46554 | """Composes two matchers by matching if both of them match. | ||
Martin von Zweigbergk
|
r32497 | |||
Martin von Zweigbergk
|
r44114 | The second matcher's non-matching-attributes (bad, traversedir) are ignored. | ||
Augie Fackler
|
r46554 | """ | ||
Martin von Zweigbergk
|
r32497 | if m1 is None or m2 is None: | ||
return m1 or m2 | ||||
if m1.always(): | ||||
m = copy.copy(m2) | ||||
# TODO: Consider encapsulating these things in a class so there's only | ||||
# one thing to copy from m1. | ||||
m.bad = m1.bad | ||||
m.traversedir = m1.traversedir | ||||
return m | ||||
if m2.always(): | ||||
m = copy.copy(m1) | ||||
return m | ||||
return intersectionmatcher(m1, m2) | ||||
Augie Fackler
|
r43346 | |||
Martin von Zweigbergk
|
r32497 | class intersectionmatcher(basematcher): | ||
def __init__(self, m1, m2): | ||||
Martin von Zweigbergk
|
r41824 | super(intersectionmatcher, self).__init__() | ||
Martin von Zweigbergk
|
r32497 | self._m1 = m1 | ||
self._m2 = m2 | ||||
self.bad = m1.bad | ||||
self.traversedir = m1.traversedir | ||||
@propertycache | ||||
def _files(self): | ||||
if self.isexact(): | ||||
m1, m2 = self._m1, self._m2 | ||||
if not m1.isexact(): | ||||
m1, m2 = m2, m1 | ||||
return [f for f in m1.files() if m2(f)] | ||||
# It neither m1 nor m2 is an exact matcher, we can't easily intersect | ||||
# the set of files, because their files() are not always files. For | ||||
# example, if intersecting a matcher "-I glob:foo.txt" with matcher of | ||||
# "path:dir2", we don't want to remove "dir2" from the set. | ||||
return self._m1.files() + self._m2.files() | ||||
def matchfn(self, f): | ||||
return self._m1(f) and self._m2(f) | ||||
def visitdir(self, dir): | ||||
visit1 = self._m1.visitdir(dir) | ||||
Augie Fackler
|
r43347 | if visit1 == b'all': | ||
Martin von Zweigbergk
|
r32497 | return self._m2.visitdir(dir) | ||
# bool() because visit1=True + visit2='all' should not be 'all' | ||||
return bool(visit1 and self._m2.visitdir(dir)) | ||||
spectral
|
r38990 | def visitchildrenset(self, dir): | ||
m1_set = self._m1.visitchildrenset(dir) | ||||
if not m1_set: | ||||
return set() | ||||
m2_set = self._m2.visitchildrenset(dir) | ||||
if not m2_set: | ||||
return set() | ||||
Augie Fackler
|
r43347 | if m1_set == b'all': | ||
spectral
|
r38990 | return m2_set | ||
Augie Fackler
|
r43347 | elif m2_set == b'all': | ||
spectral
|
r38990 | return m1_set | ||
Augie Fackler
|
r43347 | if m1_set == b'this' or m2_set == b'this': | ||
return b'this' | ||||
spectral
|
r38990 | |||
assert isinstance(m1_set, set) and isinstance(m2_set, set) | ||||
return m1_set.intersection(m2_set) | ||||
Martin von Zweigbergk
|
r32497 | def always(self): | ||
return self._m1.always() and self._m2.always() | ||||
def isexact(self): | ||||
return self._m1.isexact() or self._m2.isexact() | ||||
Pulkit Goyal
|
r36067 | @encoding.strmethod | ||
Martin von Zweigbergk
|
r32497 | def __repr__(self): | ||
Augie Fackler
|
r43347 | return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2) | ||
Augie Fackler
|
r43346 | |||
Martin von Zweigbergk
|
r32497 | |||
Martin von Zweigbergk
|
r32456 | class subdirmatcher(basematcher): | ||
Martin Geisler
|
r12165 | """Adapt a matcher to work on a subdirectory only. | ||
The paths are remapped to remove/insert the path as needed: | ||||
Yuya Nishihara
|
r34139 | >>> from . import pycompat | ||
Martin von Zweigbergk
|
r44454 | >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'], auditor=lambda name: None) | ||
Yuya Nishihara
|
r34133 | >>> m2 = subdirmatcher(b'sub', m1) | ||
Denis Laxalde
|
r42256 | >>> m2(b'a.txt') | ||
Martin Geisler
|
r12165 | False | ||
Denis Laxalde
|
r42256 | >>> m2(b'b.txt') | ||
Martin Geisler
|
r12165 | True | ||
Denis Laxalde
|
r42256 | >>> m2.matchfn(b'a.txt') | ||
Martin Geisler
|
r12165 | False | ||
Denis Laxalde
|
r42256 | >>> m2.matchfn(b'b.txt') | ||
Martin Geisler
|
r12165 | True | ||
>>> m2.files() | ||||
['b.txt'] | ||||
Yuya Nishihara
|
r34133 | >>> m2.exact(b'b.txt') | ||
Martin Geisler
|
r12165 | True | ||
Martin Geisler
|
r12268 | >>> def bad(f, msg): | ||
Yuya Nishihara
|
r34139 | ... print(pycompat.sysstr(b"%s: %s" % (f, msg))) | ||
Martin Geisler
|
r12268 | >>> m1.bad = bad | ||
Yuya Nishihara
|
r34133 | >>> m2.bad(b'x.txt', b'No such file') | ||
Martin Geisler
|
r12268 | sub/x.txt: No such file | ||
Martin Geisler
|
r12165 | """ | ||
def __init__(self, path, matcher): | ||||
Martin von Zweigbergk
|
r41824 | super(subdirmatcher, self).__init__() | ||
Martin Geisler
|
r12165 | self._path = path | ||
self._matcher = matcher | ||||
Martin von Zweigbergk
|
r32456 | self._always = matcher.always() | ||
Martin Geisler
|
r12165 | |||
Augie Fackler
|
r43346 | self._files = [ | ||
f[len(path) + 1 :] | ||||
for f in matcher._files | ||||
Augie Fackler
|
r43347 | if f.startswith(path + b"/") | ||
Augie Fackler
|
r43346 | ] | ||
Matt Harbison
|
r25194 | |||
Martin von Zweigbergk
|
r32326 | # If the parent repo had a path to this subrepo and the matcher is | ||
# a prefix matcher, this submatcher always matches. | ||||
if matcher.prefix(): | ||||
Matt Mackall
|
r25195 | self._always = any(f == path for f in matcher._files) | ||
Matt Harbison
|
r25194 | |||
Martin von Zweigbergk
|
r32325 | def bad(self, f, msg): | ||
Augie Fackler
|
r43347 | self._matcher.bad(self._path + b"/" + f, msg) | ||
Martin von Zweigbergk
|
r32325 | |||
Martin von Zweigbergk
|
r32464 | def matchfn(self, f): | ||
# Some information is lost in the superclass's constructor, so we | ||||
# can not accurately create the matching function for the subdirectory | ||||
# from the inputs. Instead, we override matchfn() and visitdir() to | ||||
# call the original matcher with the subdirectory path prepended. | ||||
Augie Fackler
|
r43347 | return self._matcher.matchfn(self._path + b"/" + f) | ||
Martin von Zweigbergk
|
r32464 | |||
Martin von Zweigbergk
|
r32324 | def visitdir(self, dir): | ||
Augie Fackler
|
r43347 | if dir == b'': | ||
Martin von Zweigbergk
|
r32324 | dir = self._path | ||
else: | ||||
Augie Fackler
|
r43347 | dir = self._path + b"/" + dir | ||
Martin von Zweigbergk
|
r32324 | return self._matcher.visitdir(dir) | ||
spectral
|
r38990 | def visitchildrenset(self, dir): | ||
Augie Fackler
|
r43347 | if dir == b'': | ||
spectral
|
r38990 | dir = self._path | ||
else: | ||||
Augie Fackler
|
r43347 | dir = self._path + b"/" + dir | ||
spectral
|
r38990 | return self._matcher.visitchildrenset(dir) | ||
Martin von Zweigbergk
|
r32456 | def always(self): | ||
return self._always | ||||
Martin von Zweigbergk
|
r33379 | def prefix(self): | ||
return self._matcher.prefix() and not self._always | ||||
Martin von Zweigbergk
|
r32456 | |||
Pulkit Goyal
|
r36067 | @encoding.strmethod | ||
Martin von Zweigbergk
|
r32552 | def __repr__(self): | ||
Augie Fackler
|
r43347 | return b'<subdirmatcher path=%r, matcher=%r>' % ( | ||
Augie Fackler
|
r43346 | self._path, | ||
self._matcher, | ||||
) | ||||
Martin von Zweigbergk
|
r32552 | |||
Yuya Nishihara
|
r38630 | class prefixdirmatcher(basematcher): | ||
"""Adapt a matcher to work on a parent directory. | ||||
Martin von Zweigbergk
|
r44114 | The matcher's non-matching-attributes (bad, traversedir) are ignored. | ||
Yuya Nishihara
|
r38630 | |||
The prefix path should usually be the relative path from the root of | ||||
this matcher to the root of the wrapped matcher. | ||||
Martin von Zweigbergk
|
r44401 | >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None) | ||
Martin von Zweigbergk
|
r41824 | >>> m2 = prefixdirmatcher(b'd/e', m1) | ||
Denis Laxalde
|
r42256 | >>> m2(b'a.txt') | ||
Yuya Nishihara
|
r38630 | False | ||
Denis Laxalde
|
r42256 | >>> m2(b'd/e/a.txt') | ||
Yuya Nishihara
|
r38630 | True | ||
Denis Laxalde
|
r42256 | >>> m2(b'd/e/b.txt') | ||
Yuya Nishihara
|
r38630 | False | ||
>>> m2.files() | ||||
['d/e/a.txt', 'd/e/f/b.txt'] | ||||
>>> m2.exact(b'd/e/a.txt') | ||||
True | ||||
>>> m2.visitdir(b'd') | ||||
True | ||||
>>> m2.visitdir(b'd/e') | ||||
True | ||||
>>> m2.visitdir(b'd/e/f') | ||||
True | ||||
>>> m2.visitdir(b'd/e/g') | ||||
False | ||||
>>> m2.visitdir(b'd/ef') | ||||
False | ||||
""" | ||||
Martin von Zweigbergk
|
r41824 | def __init__(self, path, matcher, badfn=None): | ||
super(prefixdirmatcher, self).__init__(badfn) | ||||
Yuya Nishihara
|
r38630 | if not path: | ||
Augie Fackler
|
r43347 | raise error.ProgrammingError(b'prefix path must not be empty') | ||
Yuya Nishihara
|
r38630 | self._path = path | ||
Augie Fackler
|
r43347 | self._pathprefix = path + b'/' | ||
Yuya Nishihara
|
r38630 | self._matcher = matcher | ||
@propertycache | ||||
def _files(self): | ||||
return [self._pathprefix + f for f in self._matcher._files] | ||||
def matchfn(self, f): | ||||
if not f.startswith(self._pathprefix): | ||||
return False | ||||
Augie Fackler
|
r43346 | return self._matcher.matchfn(f[len(self._pathprefix) :]) | ||
Yuya Nishihara
|
r38630 | |||
@propertycache | ||||
def _pathdirs(self): | ||||
Martin von Zweigbergk
|
r44032 | return set(pathutil.finddirs(self._path)) | ||
Yuya Nishihara
|
r38630 | |||
def visitdir(self, dir): | ||||
if dir == self._path: | ||||
Augie Fackler
|
r43347 | return self._matcher.visitdir(b'') | ||
Yuya Nishihara
|
r38630 | if dir.startswith(self._pathprefix): | ||
Augie Fackler
|
r43346 | return self._matcher.visitdir(dir[len(self._pathprefix) :]) | ||
Yuya Nishihara
|
r38630 | return dir in self._pathdirs | ||
spectral
|
r38990 | def visitchildrenset(self, dir): | ||
if dir == self._path: | ||||
Augie Fackler
|
r43347 | return self._matcher.visitchildrenset(b'') | ||
spectral
|
r38990 | if dir.startswith(self._pathprefix): | ||
Augie Fackler
|
r43346 | return self._matcher.visitchildrenset(dir[len(self._pathprefix) :]) | ||
spectral
|
r38990 | if dir in self._pathdirs: | ||
Augie Fackler
|
r43347 | return b'this' | ||
Kyle Lippincott
|
r38993 | return set() | ||
spectral
|
r38990 | |||
Yuya Nishihara
|
r38630 | def isexact(self): | ||
return self._matcher.isexact() | ||||
def prefix(self): | ||||
return self._matcher.prefix() | ||||
@encoding.strmethod | ||||
def __repr__(self): | ||||
Augie Fackler
|
r43347 | return b'<prefixdirmatcher path=%r, matcher=%r>' % ( | ||
Augie Fackler
|
r43346 | pycompat.bytestr(self._path), | ||
self._matcher, | ||||
) | ||||
Yuya Nishihara
|
r38630 | |||
Gregory Szorc
|
r33319 | class unionmatcher(basematcher): | ||
Martin von Zweigbergk
|
r33448 | """A matcher that is the union of several matchers. | ||
Martin von Zweigbergk
|
r44114 | The non-matching-attributes (bad, traversedir) are taken from the first | ||
matcher. | ||||
Martin von Zweigbergk
|
r33448 | """ | ||
Gregory Szorc
|
r33319 | def __init__(self, matchers): | ||
Martin von Zweigbergk
|
r33448 | m1 = matchers[0] | ||
Martin von Zweigbergk
|
r41824 | super(unionmatcher, self).__init__() | ||
Martin von Zweigbergk
|
r33448 | self.traversedir = m1.traversedir | ||
Gregory Szorc
|
r33319 | self._matchers = matchers | ||
Martin von Zweigbergk
|
r33380 | def matchfn(self, f): | ||
Gregory Szorc
|
r33319 | for match in self._matchers: | ||
Martin von Zweigbergk
|
r33380 | if match(f): | ||
Gregory Szorc
|
r33319 | return True | ||
return False | ||||
Martin von Zweigbergk
|
r33448 | def visitdir(self, dir): | ||
r = False | ||||
for m in self._matchers: | ||||
v = m.visitdir(dir) | ||||
Augie Fackler
|
r43347 | if v == b'all': | ||
Martin von Zweigbergk
|
r33448 | return v | ||
r |= v | ||||
return r | ||||
spectral
|
r38990 | def visitchildrenset(self, dir): | ||
r = set() | ||||
this = False | ||||
for m in self._matchers: | ||||
v = m.visitchildrenset(dir) | ||||
if not v: | ||||
continue | ||||
Augie Fackler
|
r43347 | if v == b'all': | ||
spectral
|
r38990 | return v | ||
Augie Fackler
|
r43347 | if this or v == b'this': | ||
spectral
|
r38990 | this = True | ||
# don't break, we might have an 'all' in here. | ||||
continue | ||||
assert isinstance(v, set) | ||||
r = r.union(v) | ||||
if this: | ||||
Augie Fackler
|
r43347 | return b'this' | ||
spectral
|
r38990 | return r | ||
Pulkit Goyal
|
r36067 | @encoding.strmethod | ||
Gregory Szorc
|
r33319 | def __repr__(self): | ||
Augie Fackler
|
r43347 | return b'<unionmatcher matchers=%r>' % self._matchers | ||
Augie Fackler
|
r43346 | |||
Gregory Szorc
|
r33319 | |||
Mads Kiilerich
|
r21111 | def patkind(pattern, default=None): | ||
Augie Fackler
|
r46554 | r"""If pattern is 'kind:pat' with a known kind, return kind. | ||
Denis Laxalde
|
r42251 | |||
Gregory Szorc
|
r42367 | >>> patkind(br're:.*\.c$') | ||
Denis Laxalde
|
r42251 | 're' | ||
Pulkit Goyal
|
r42267 | >>> patkind(b'glob:*.c') | ||
Denis Laxalde
|
r42251 | 'glob' | ||
Pulkit Goyal
|
r42267 | >>> patkind(b'relpath:test.py') | ||
Denis Laxalde
|
r42251 | 'relpath' | ||
Pulkit Goyal
|
r42267 | >>> patkind(b'main.py') | ||
>>> patkind(b'main.py', default=b're') | ||||
Denis Laxalde
|
r42251 | 're' | ||
Augie Fackler
|
r46554 | """ | ||
Mads Kiilerich
|
r21111 | return _patsplit(pattern, default)[0] | ||
Matt Mackall
|
r8570 | |||
Augie Fackler
|
r43346 | |||
Mads Kiilerich
|
r21111 | def _patsplit(pattern, default): | ||
"""Split a string into the optional pattern kind prefix and the actual | ||||
pattern.""" | ||||
Augie Fackler
|
r43347 | if b':' in pattern: | ||
kind, pat = pattern.split(b':', 1) | ||||
Kostia Balytskyi
|
r33647 | if kind in allpatternkinds: | ||
Mads Kiilerich
|
r21111 | return kind, pat | ||
return default, pattern | ||||
Matt Mackall
|
r8570 | |||
Augie Fackler
|
r43346 | |||
Matt Mackall
|
r8582 | def _globre(pat): | ||
Augie Fackler
|
r46554 | r"""Convert an extended glob string to a regexp string. | ||
Mads Kiilerich
|
r21112 | |||
Yuya Nishihara
|
r34139 | >>> from . import pycompat | ||
>>> def bprint(s): | ||||
... print(pycompat.sysstr(s)) | ||||
>>> bprint(_globre(br'?')) | ||||
Mads Kiilerich
|
r21112 | . | ||
Yuya Nishihara
|
r34139 | >>> bprint(_globre(br'*')) | ||
Mads Kiilerich
|
r21112 | [^/]* | ||
Yuya Nishihara
|
r34139 | >>> bprint(_globre(br'**')) | ||
Mads Kiilerich
|
r21112 | .* | ||
Yuya Nishihara
|
r34139 | >>> bprint(_globre(br'**/a')) | ||
Siddharth Agarwal
|
r21815 | (?:.*/)?a | ||
Yuya Nishihara
|
r34139 | >>> bprint(_globre(br'a/**/b')) | ||
Augie Fackler
|
r38494 | a/(?:.*/)?b | ||
Yuya Nishihara
|
r34139 | >>> bprint(_globre(br'[a*?!^][^b][!c]')) | ||
Mads Kiilerich
|
r21112 | [a*?!^][\^b][^c] | ||
Yuya Nishihara
|
r34139 | >>> bprint(_globre(br'{a,b}')) | ||
Mads Kiilerich
|
r21112 | (?:a|b) | ||
Yuya Nishihara
|
r34139 | >>> bprint(_globre(br'.\*\?')) | ||
Mads Kiilerich
|
r21112 | \.\*\? | ||
Augie Fackler
|
r46554 | """ | ||
Matt Mackall
|
r8570 | i, n = 0, len(pat) | ||
Augie Fackler
|
r43347 | res = b'' | ||
Matt Mackall
|
r8570 | group = 0 | ||
Boris Feld
|
r40720 | escape = util.stringutil.regexbytesescapemap.get | ||
Augie Fackler
|
r43346 | |||
Matt Mackall
|
r10282 | def peek(): | ||
Augie Fackler
|
r43346 | return i < n and pat[i : i + 1] | ||
Matt Mackall
|
r8570 | while i < n: | ||
Augie Fackler
|
r43346 | c = pat[i : i + 1] | ||
Matt Mackall
|
r10282 | i += 1 | ||
Augie Fackler
|
r43347 | if c not in b'*?[{},\\': | ||
Boris Feld
|
r40720 | res += escape(c, c) | ||
Augie Fackler
|
r43347 | elif c == b'*': | ||
if peek() == b'*': | ||||
Matt Mackall
|
r8570 | i += 1 | ||
Augie Fackler
|
r43347 | if peek() == b'/': | ||
Siddharth Agarwal
|
r21815 | i += 1 | ||
Augie Fackler
|
r43347 | res += b'(?:.*/)?' | ||
Siddharth Agarwal
|
r21815 | else: | ||
Augie Fackler
|
r43347 | res += b'.*' | ||
Matt Mackall
|
r8570 | else: | ||
Augie Fackler
|
r43347 | res += b'[^/]*' | ||
elif c == b'?': | ||||
res += b'.' | ||||
elif c == b'[': | ||||
Matt Mackall
|
r8570 | j = i | ||
Augie Fackler
|
r43347 | if j < n and pat[j : j + 1] in b'!]': | ||
Matt Mackall
|
r8570 | j += 1 | ||
Augie Fackler
|
r43347 | while j < n and pat[j : j + 1] != b']': | ||
Matt Mackall
|
r8570 | j += 1 | ||
if j >= n: | ||||
Augie Fackler
|
r43347 | res += b'\\[' | ||
Matt Mackall
|
r8570 | else: | ||
Augie Fackler
|
r43347 | stuff = pat[i:j].replace(b'\\', b'\\\\') | ||
Matt Mackall
|
r8570 | i = j + 1 | ||
Augie Fackler
|
r43347 | if stuff[0:1] == b'!': | ||
stuff = b'^' + stuff[1:] | ||||
elif stuff[0:1] == b'^': | ||||
stuff = b'\\' + stuff | ||||
res = b'%s[%s]' % (res, stuff) | ||||
elif c == b'{': | ||||
Matt Mackall
|
r8570 | group += 1 | ||
Augie Fackler
|
r43347 | res += b'(?:' | ||
elif c == b'}' and group: | ||||
res += b')' | ||||
Matt Mackall
|
r8570 | group -= 1 | ||
Augie Fackler
|
r43347 | elif c == b',' and group: | ||
res += b'|' | ||||
elif c == b'\\': | ||||
Matt Mackall
|
r8570 | p = peek() | ||
if p: | ||||
i += 1 | ||||
Boris Feld
|
r40720 | res += escape(p, p) | ||
Matt Mackall
|
r8570 | else: | ||
Boris Feld
|
r40720 | res += escape(c, c) | ||
Matt Mackall
|
r8570 | else: | ||
Boris Feld
|
r40720 | res += escape(c, c) | ||
Matt Mackall
|
r8582 | return res | ||
Matt Mackall
|
r8570 | |||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r50540 | FLAG_RE = util.re.compile(br'^\(\?([aiLmsux]+)\)(.*)') | ||
r50498 | ||||
Mads Kiilerich
|
r21111 | def _regex(kind, pat, globsuffix): | ||
Augie Fackler
|
r46554 | """Convert a (normalized) pattern of any kind into a | ||
Raphaël Gomès
|
r42516 | regular expression. | ||
Augie Fackler
|
r46554 | globsuffix is appended to the regexp of globs.""" | ||
Augie Fackler
|
r43347 | if not pat and kind in (b'glob', b'relpath'): | ||
return b'' | ||||
if kind == b're': | ||||
Mads Kiilerich
|
r21111 | return pat | ||
Raphaël Gomès
|
r51588 | if kind == b'filepath': | ||
raise error.ProgrammingError( | ||||
"'filepath:' patterns should not be converted to a regex" | ||||
) | ||||
Augie Fackler
|
r43347 | if kind in (b'path', b'relpath'): | ||
if pat == b'.': | ||||
return b'' | ||||
return util.stringutil.reescape(pat) + b'(?:/|$)' | ||||
if kind == b'rootfilesin': | ||||
if pat == b'.': | ||||
escaped = b'' | ||||
Rodrigo Damazio Bovendorp
|
r31012 | else: | ||
# Pattern is a directory name. | ||||
Augie Fackler
|
r43347 | escaped = util.stringutil.reescape(pat) + b'/' | ||
Rodrigo Damazio Bovendorp
|
r31012 | # Anything after the pattern must be a non-directory. | ||
Augie Fackler
|
r43347 | return escaped + b'[^/]+$' | ||
if kind == b'relglob': | ||||
Valentin Gatien-Baron
|
r43132 | globre = _globre(pat) | ||
Augie Fackler
|
r43347 | if globre.startswith(b'[^/]*'): | ||
Valentin Gatien-Baron
|
r43132 | # When pat has the form *XYZ (common), make the returned regex more | ||
# legible by returning the regex for **XYZ instead of **/*XYZ. | ||||
Augie Fackler
|
r43347 | return b'.*' + globre[len(b'[^/]*') :] + globsuffix | ||
return b'(?:|.*/)' + globre + globsuffix | ||||
if kind == b'relre': | ||||
r50500 | flag = None | |||
m = FLAG_RE.match(pat) | ||||
if m: | ||||
flag, pat = m.groups() | ||||
if not pat.startswith(b'^'): | ||||
pat = b'.*' + pat | ||||
if flag is not None: | ||||
pat = br'(?%s:%s)' % (flag, pat) | ||||
return pat | ||||
Augie Fackler
|
r43347 | if kind in (b'glob', b'rootglob'): | ||
Yuya Nishihara
|
r38597 | return _globre(pat) + globsuffix | ||
Augie Fackler
|
r43347 | raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat)) | ||
Matt Mackall
|
r8574 | |||
Augie Fackler
|
r43346 | |||
Martin von Zweigbergk
|
r41818 | def _buildmatch(kindpats, globsuffix, root): | ||
Augie Fackler
|
r46554 | """Return regexp string and a matcher function for kindpats. | ||
globsuffix is appended to the regexp of globs.""" | ||||
Durham Goode
|
r25239 | matchfuncs = [] | ||
Durham Goode
|
r25283 | subincludes, kindpats = _expandsubinclude(kindpats, root) | ||
if subincludes: | ||||
Durham Goode
|
r32132 | submatchers = {} | ||
Augie Fackler
|
r43346 | |||
Durham Goode
|
r25283 | def matchsubinclude(f): | ||
Durham Goode
|
r32132 | for prefix, matcherargs in subincludes: | ||
if f.startswith(prefix): | ||||
mf = submatchers.get(prefix) | ||||
if mf is None: | ||||
mf = match(*matcherargs) | ||||
submatchers[prefix] = mf | ||||
Augie Fackler
|
r43346 | if mf(f[len(prefix) :]): | ||
Durham Goode
|
r32132 | return True | ||
Durham Goode
|
r25283 | return False | ||
Augie Fackler
|
r43346 | |||
Durham Goode
|
r25283 | matchfuncs.append(matchsubinclude) | ||
Matt Mackall
|
r14675 | |||
Augie Fackler
|
r43347 | regex = b'' | ||
Durham Goode
|
r25239 | if kindpats: | ||
Augie Fackler
|
r43347 | if all(k == b'rootfilesin' for k, p, s in kindpats): | ||
Martin von Zweigbergk
|
r40278 | dirs = {p for k, p, s in kindpats} | ||
Augie Fackler
|
r43346 | |||
Martin von Zweigbergk
|
r40278 | def mf(f): | ||
Augie Fackler
|
r43347 | i = f.rfind(b'/') | ||
Martin von Zweigbergk
|
r40278 | if i >= 0: | ||
dir = f[:i] | ||||
else: | ||||
Augie Fackler
|
r43347 | dir = b'.' | ||
Martin von Zweigbergk
|
r40278 | return dir in dirs | ||
Augie Fackler
|
r43346 | |||
Augie Fackler
|
r40381 | regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs))) | ||
Martin von Zweigbergk
|
r40278 | matchfuncs.append(mf) | ||
else: | ||||
regex, mf = _buildregexmatch(kindpats, globsuffix) | ||||
matchfuncs.append(mf) | ||||
Durham Goode
|
r25239 | |||
if len(matchfuncs) == 1: | ||||
return regex, matchfuncs[0] | ||||
else: | ||||
return regex, lambda f: any(mf(f) for mf in matchfuncs) | ||||
Matt Mackall
|
r14675 | |||
Augie Fackler
|
r43346 | |||
Boris Feld
|
r40810 | MAX_RE_SIZE = 20000 | ||
Augie Fackler
|
r43346 | |||
Boris Feld
|
r40812 | def _joinregexes(regexps): | ||
"""gather multiple regular expressions into a single one""" | ||||
Augie Fackler
|
r43347 | return b'|'.join(regexps) | ||
Boris Feld
|
r40812 | |||
Augie Fackler
|
r43346 | |||
Mads Kiilerich
|
r21111 | def _buildregexmatch(kindpats, globsuffix): | ||
"""Build a match function from a list of kinds and kindpats, | ||||
Boris Feld
|
r40811 | return regexp string and a matcher function. | ||
Test too large input | ||||
>>> _buildregexmatch([ | ||||
Augie Fackler
|
r40983 | ... (b'relglob', b'?' * MAX_RE_SIZE, b'') | ||
... ], b'$') | ||||
Boris Feld
|
r40811 | Traceback (most recent call last): | ||
... | ||||
Boris Feld
|
r40814 | Abort: matcher pattern is too long (20009 bytes) | ||
Boris Feld
|
r40811 | """ | ||
Matt Mackall
|
r8574 | try: | ||
Boris Feld
|
r40813 | allgroups = [] | ||
Raphaël Gomès
|
r51588 | regexps = [] | ||
exact = set() | ||||
for (kind, pattern, _source) in kindpats: | ||||
if kind == b'filepath': | ||||
exact.add(pattern) | ||||
continue | ||||
regexps.append(_regex(kind, pattern, globsuffix)) | ||||
Boris Feld
|
r40813 | fullregexp = _joinregexes(regexps) | ||
startidx = 0 | ||||
Martin von Zweigbergk
|
r40818 | groupsize = 0 | ||
Boris Feld
|
r40813 | for idx, r in enumerate(regexps): | ||
piecesize = len(r) | ||||
Martin von Zweigbergk
|
r40818 | if piecesize > MAX_RE_SIZE: | ||
Augie Fackler
|
r43347 | msg = _(b"matcher pattern is too long (%d bytes)") % piecesize | ||
Boris Feld
|
r40814 | raise error.Abort(msg) | ||
Martin von Zweigbergk
|
r40816 | elif (groupsize + piecesize) > MAX_RE_SIZE: | ||
Boris Feld
|
r40813 | group = regexps[startidx:idx] | ||
allgroups.append(_joinregexes(group)) | ||||
startidx = idx | ||||
Martin von Zweigbergk
|
r40818 | groupsize = 0 | ||
Boris Feld
|
r40813 | groupsize += piecesize + 1 | ||
if startidx == 0: | ||||
Denis Laxalde
|
r42256 | matcher = _rematcher(fullregexp) | ||
r42268 | func = lambda s: bool(matcher(s)) | |||
Boris Feld
|
r40813 | else: | ||
group = regexps[startidx:] | ||||
allgroups.append(_joinregexes(group)) | ||||
allmatchers = [_rematcher(g) for g in allgroups] | ||||
func = lambda s: any(m(s) for m in allmatchers) | ||||
Raphaël Gomès
|
r51588 | |||
actualfunc = func | ||||
if exact: | ||||
# An empty regex will always match, so only call the regex if | ||||
# there were any actual patterns to match. | ||||
if not regexps: | ||||
actualfunc = lambda s: s in exact | ||||
else: | ||||
actualfunc = lambda s: s in exact or func(s) | ||||
return fullregexp, actualfunc | ||||
Matt Mackall
|
r8574 | except re.error: | ||
Durham Goode
|
r25213 | for k, p, s in kindpats: | ||
Raphaël Gomès
|
r51588 | if k == b'filepath': | ||
continue | ||||
Matt Mackall
|
r8574 | try: | ||
Martin von Zweigbergk
|
r40818 | _rematcher(_regex(k, p, globsuffix)) | ||
Matt Mackall
|
r8574 | except re.error: | ||
Durham Goode
|
r25213 | if s: | ||
Augie Fackler
|
r43346 | raise error.Abort( | ||
Augie Fackler
|
r43347 | _(b"%s: invalid pattern (%s): %s") % (s, k, p) | ||
Augie Fackler
|
r43346 | ) | ||
Durham Goode
|
r25213 | else: | ||
Augie Fackler
|
r43347 | raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p)) | ||
raise error.Abort(_(b"invalid pattern")) | ||||
Matt Mackall
|
r8574 | |||
Augie Fackler
|
r43346 | |||
Rodrigo Damazio Bovendorp
|
r31013 | def _patternrootsanddirs(kindpats): | ||
Augie Fackler
|
r46554 | """Returns roots and directories corresponding to each pattern. | ||
Mads Kiilerich
|
r21079 | |||
Rodrigo Damazio Bovendorp
|
r31013 | This calculates the roots and directories exactly matching the patterns and | ||
returns a tuple of (roots, dirs) for each. It does not return other | ||||
directories which may also need to be considered, like the parent | ||||
directories. | ||||
Augie Fackler
|
r46554 | """ | ||
Matt Mackall
|
r8576 | r = [] | ||
Rodrigo Damazio Bovendorp
|
r31013 | d = [] | ||
Durham Goode
|
r25213 | for kind, pat, source in kindpats: | ||
Augie Fackler
|
r43347 | if kind in (b'glob', b'rootglob'): # find the non-glob prefix | ||
Matt Mackall
|
r8584 | root = [] | ||
Augie Fackler
|
r43347 | for p in pat.split(b'/'): | ||
if b'[' in p or b'{' in p or b'*' in p or b'?' in p: | ||||
Matt Mackall
|
r8584 | break | ||
root.append(p) | ||||
Augie Fackler
|
r43347 | r.append(b'/'.join(root)) | ||
Raphaël Gomès
|
r51588 | elif kind in (b'relpath', b'path', b'filepath'): | ||
Augie Fackler
|
r43347 | if pat == b'.': | ||
pat = b'' | ||||
Martin von Zweigbergk
|
r42528 | r.append(pat) | ||
Augie Fackler
|
r43347 | elif kind in (b'rootfilesin',): | ||
if pat == b'.': | ||||
pat = b'' | ||||
Martin von Zweigbergk
|
r42528 | d.append(pat) | ||
Augie Fackler
|
r43346 | else: # relglob, re, relre | ||
Augie Fackler
|
r43347 | r.append(b'') | ||
Rodrigo Damazio Bovendorp
|
r31013 | return r, d | ||
Augie Fackler
|
r43346 | |||
Rodrigo Damazio Bovendorp
|
r31013 | def _roots(kindpats): | ||
'''Returns root directories to match recursively from the given patterns.''' | ||||
roots, dirs = _patternrootsanddirs(kindpats) | ||||
return roots | ||||
Augie Fackler
|
r43346 | |||
spectral
|
r38989 | def _rootsdirsandparents(kindpats): | ||
Augie Fackler
|
r46554 | """Returns roots and exact directories from patterns. | ||
Rodrigo Damazio Bovendorp
|
r31013 | |||
Kyle Lippincott
|
r38992 | `roots` are directories to match recursively, `dirs` should | ||
be matched non-recursively, and `parents` are the implicitly required | ||||
directories to walk to items in either roots or dirs. | ||||
Returns a tuple of (roots, dirs, parents). | ||||
Rodrigo Damazio Bovendorp
|
r31013 | |||
r42559 | >>> r = _rootsdirsandparents( | |||
Yuya Nishihara
|
r34133 | ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''), | ||
... (b'glob', b'g*', b'')]) | ||||
r42559 | >>> print(r[0:2], sorted(r[2])) # the set has an unstable output | |||
(['g/h', 'g/h', ''], []) ['', 'g'] | ||||
>>> r = _rootsdirsandparents( | ||||
Yuya Nishihara
|
r34133 | ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')]) | ||
r42559 | >>> print(r[0:2], sorted(r[2])) # the set has an unstable output | |||
([], ['g/h', '']) ['', 'g'] | ||||
>>> r = _rootsdirsandparents( | ||||
Yuya Nishihara
|
r34133 | ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''), | ||
... (b'path', b'', b'')]) | ||||
r42559 | >>> print(r[0:2], sorted(r[2])) # the set has an unstable output | |||
(['r', 'p/p', ''], []) ['', 'p'] | ||||
>>> r = _rootsdirsandparents( | ||||
Yuya Nishihara
|
r34133 | ... [(b'relglob', b'rg*', b''), (b're', b're/', b''), | ||
... (b'relre', b'rr', b'')]) | ||||
r42559 | >>> print(r[0:2], sorted(r[2])) # the set has an unstable output | |||
(['', '', ''], []) [''] | ||||
Augie Fackler
|
r46554 | """ | ||
Rodrigo Damazio Bovendorp
|
r31013 | r, d = _patternrootsanddirs(kindpats) | ||
Martin von Zweigbergk
|
r42553 | p = set() | ||
# Add the parents as non-recursive/exact directories, since they must be | ||||
Rodrigo Damazio Bovendorp
|
r31013 | # scanned to get to either the roots or the other exact directories. | ||
r43923 | p.update(pathutil.dirs(d)) | |||
p.update(pathutil.dirs(r)) | ||||
Rodrigo Damazio Bovendorp
|
r31013 | |||
Kyle Lippincott
|
r39494 | # FIXME: all uses of this function convert these to sets, do so before | ||
# returning. | ||||
# FIXME: all uses of this function do not need anything in 'roots' and | ||||
# 'dirs' to also be in 'parents', consider removing them before returning. | ||||
spectral
|
r38989 | return r, d, p | ||
Matt Mackall
|
r8576 | |||
Augie Fackler
|
r43346 | |||
Rodrigo Damazio Bovendorp
|
r31012 | def _explicitfiles(kindpats): | ||
Augie Fackler
|
r46554 | """Returns the potential explicit filenames from the patterns. | ||
Rodrigo Damazio Bovendorp
|
r31012 | |||
Yuya Nishihara
|
r34133 | >>> _explicitfiles([(b'path', b'foo/bar', b'')]) | ||
Rodrigo Damazio Bovendorp
|
r31012 | ['foo/bar'] | ||
Yuya Nishihara
|
r34133 | >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')]) | ||
Rodrigo Damazio Bovendorp
|
r31012 | [] | ||
Augie Fackler
|
r46554 | """ | ||
Rodrigo Damazio Bovendorp
|
r31012 | # Keep only the pattern kinds where one can specify filenames (vs only | ||
# directory names). | ||||
Augie Fackler
|
r43347 | filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)] | ||
Rodrigo Damazio Bovendorp
|
r31012 | return _roots(filable) | ||
Augie Fackler
|
r43346 | |||
Martin von Zweigbergk
|
r33405 | def _prefix(kindpats): | ||
'''Whether all the patterns match a prefix (i.e. recursively)''' | ||||
Durham Goode
|
r25213 | for kind, pat, source in kindpats: | ||
Augie Fackler
|
r43347 | if kind not in (b'path', b'relpath'): | ||
Martin von Zweigbergk
|
r33405 | return False | ||
return True | ||||
Durham Goode
|
r25167 | |||
Augie Fackler
|
r43346 | |||
Durham Goode
|
r25167 | _commentre = None | ||
Augie Fackler
|
r43346 | |||
Laurent Charignon
|
r27595 | def readpatternfile(filepath, warn, sourceinfo=False): | ||
Augie Fackler
|
r46554 | """parse a pattern file, returning a list of | ||
Durham Goode
|
r25167 | patterns. These patterns should be given to compile() | ||
Durham Goode
|
r25216 | to be validated and converted into a match function. | ||
trailing white space is dropped. | ||||
the escape character is backslash. | ||||
comments start with #. | ||||
empty lines are skipped. | ||||
lines can be of the following formats: | ||||
syntax: regexp # defaults following lines to non-rooted regexps | ||||
syntax: glob # defaults following lines to non-rooted globs | ||||
re:pattern # non-rooted regular expression | ||||
glob:pattern # non-rooted glob | ||||
Valentin Gatien-Baron
|
r41318 | rootglob:pat # rooted glob (same root as ^ in regexps) | ||
Laurent Charignon
|
r27595 | pattern # pattern of the current default type | ||
if sourceinfo is set, returns a list of tuples: | ||||
Raphaël Gomès
|
r42516 | (pattern, lineno, originalline). | ||
This is useful to debug ignore patterns. | ||||
Augie Fackler
|
r46554 | """ | ||
Durham Goode
|
r25216 | |||
Boris Feld
|
r40721 | syntaxes = { | ||
Augie Fackler
|
r43347 | b're': b'relre:', | ||
b'regexp': b'relre:', | ||||
b'glob': b'relglob:', | ||||
b'rootglob': b'rootglob:', | ||||
b'include': b'include', | ||||
b'subinclude': b'subinclude', | ||||
Boris Feld
|
r40721 | } | ||
Augie Fackler
|
r43347 | syntax = b'relre:' | ||
Durham Goode
|
r25167 | patterns = [] | ||
Augie Fackler
|
r43347 | fp = open(filepath, b'rb') | ||
Gregory Szorc
|
r49796 | for lineno, line in enumerate(fp, start=1): | ||
Augie Fackler
|
r43347 | if b"#" in line: | ||
Durham Goode
|
r25167 | global _commentre | ||
if not _commentre: | ||||
Pulkit Goyal
|
r31420 | _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*') | ||
Durham Goode
|
r25167 | # remove comments prefixed by an even number of escapes | ||
Bryan O'Sullivan
|
r27327 | m = _commentre.search(line) | ||
if m: | ||||
Augie Fackler
|
r43346 | line = line[: m.end(1)] | ||
Durham Goode
|
r25167 | # fixup properly escaped comments that survived the above | ||
Augie Fackler
|
r43347 | line = line.replace(b"\\#", b"#") | ||
Durham Goode
|
r25167 | line = line.rstrip() | ||
if not line: | ||||
continue | ||||
Augie Fackler
|
r43347 | if line.startswith(b'syntax:'): | ||
Durham Goode
|
r25167 | s = line[7:].strip() | ||
try: | ||||
syntax = syntaxes[s] | ||||
except KeyError: | ||||
Durham Goode
|
r25214 | if warn: | ||
Augie Fackler
|
r43346 | warn( | ||
Augie Fackler
|
r43347 | _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s) | ||
Augie Fackler
|
r43346 | ) | ||
Durham Goode
|
r25167 | continue | ||
linesyntax = syntax | ||||
Gregory Szorc
|
r49768 | for s, rels in syntaxes.items(): | ||
Durham Goode
|
r25167 | if line.startswith(rels): | ||
linesyntax = rels | ||||
Augie Fackler
|
r43346 | line = line[len(rels) :] | ||
Durham Goode
|
r25167 | break | ||
Augie Fackler
|
r43347 | elif line.startswith(s + b':'): | ||
Durham Goode
|
r25167 | linesyntax = rels | ||
Augie Fackler
|
r43346 | line = line[len(s) + 1 :] | ||
Durham Goode
|
r25167 | break | ||
Laurent Charignon
|
r27595 | if sourceinfo: | ||
patterns.append((linesyntax + line, lineno, line)) | ||||
else: | ||||
patterns.append(linesyntax + line) | ||||
Durham Goode
|
r25167 | fp.close() | ||
return patterns | ||||