match.py
1371 lines
| 45.5 KiB
| text/x-python
|
PythonLexer
/ mercurial / match.py
timeless
|
r8761 | # match.py - filename matching | ||
Martin Geisler
|
r8231 | # | ||
# Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
Martin Geisler
|
r8231 | |||
Yuya Nishihara
|
r34139 | from __future__ import absolute_import, print_function | ||
Gregory Szorc
|
r25958 | |||
import copy | ||||
spectral
|
r38990 | import itertools | ||
Gregory Szorc
|
r25958 | import os | ||
import re | ||||
from .i18n import _ | ||||
from . import ( | ||||
Pulkit Goyal
|
r36067 | encoding, | ||
Pierre-Yves David
|
r26587 | error, | ||
Gregory Szorc
|
r25958 | pathutil, | ||
Augie Fackler
|
r36590 | pycompat, | ||
Gregory Szorc
|
r25958 | util, | ||
) | ||||
Yuya Nishihara
|
r37102 | from .utils import ( | ||
stringutil, | ||||
) | ||||
Matt Mackall
|
r6576 | |||
Kostia Balytskyi
|
r33647 | allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre', | ||
Valentin Gatien-Baron
|
r41318 | 'rootglob', | ||
Kostia Balytskyi
|
r33647 | 'listfile', 'listfile0', 'set', 'include', 'subinclude', | ||
'rootfilesin') | ||||
cwdrelativepatternkinds = ('relpath', 'glob') | ||||
Drew Gottlieb
|
r24636 | propertycache = util.propertycache | ||
Mads Kiilerich
|
r21111 | def _rematcher(regex): | ||
'''compile the regexp with the best available regexp engine and return a | ||||
matcher function''' | ||||
Siddharth Agarwal
|
r21909 | m = util.re.compile(regex) | ||
Bryan O'Sullivan
|
r16943 | try: | ||
# slightly faster, provided by facebook's re2 bindings | ||||
return m.test_match | ||||
except AttributeError: | ||||
return m.match | ||||
Martin von Zweigbergk
|
r41824 | def _expandsets(kindpats, ctx, listsubrepos, badfn): | ||
Yuya Nishihara
|
r38631 | '''Returns the kindpats list with the 'set' patterns expanded to matchers''' | ||
matchers = [] | ||||
Matt Mackall
|
r14675 | other = [] | ||
Durham Goode
|
r25213 | for kind, pat, source in kindpats: | ||
Matt Mackall
|
r14675 | if kind == 'set': | ||
Yuya Nishihara
|
r41144 | if ctx is None: | ||
Martin von Zweigbergk
|
r32444 | raise error.ProgrammingError("fileset expression with no " | ||
"context") | ||||
Yuya Nishihara
|
r38631 | matchers.append(ctx.matchfileset(pat, badfn=badfn)) | ||
Matt Harbison
|
r25122 | |||
if listsubrepos: | ||||
for subpath in ctx.substate: | ||||
Yuya Nishihara
|
r38631 | sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn) | ||
Martin von Zweigbergk
|
r41824 | pm = prefixdirmatcher(subpath, sm, badfn=badfn) | ||
Yuya Nishihara
|
r38631 | matchers.append(pm) | ||
Matt Harbison
|
r25122 | |||
Matt Mackall
|
r14675 | continue | ||
Durham Goode
|
r25213 | other.append((kind, pat, source)) | ||
Yuya Nishihara
|
r38631 | return matchers, other | ||
Matt Mackall
|
r14675 | |||
Durham Goode
|
r25283 | def _expandsubinclude(kindpats, root): | ||
Durham Goode
|
r32132 | '''Returns the list of subinclude matcher args and the kindpats without the | ||
Durham Goode
|
r25283 | subincludes in it.''' | ||
relmatchers = [] | ||||
other = [] | ||||
for kind, pat, source in kindpats: | ||||
if kind == 'subinclude': | ||||
Matt Harbison
|
r25301 | sourceroot = pathutil.dirname(util.normpath(source)) | ||
Durham Goode
|
r25283 | pat = util.pconvert(pat) | ||
path = pathutil.join(sourceroot, pat) | ||||
newroot = pathutil.dirname(path) | ||||
Durham Goode
|
r32132 | matcherargs = (newroot, '', [], ['include:%s' % path]) | ||
Durham Goode
|
r25283 | |||
prefix = pathutil.canonpath(root, root, newroot) | ||||
if prefix: | ||||
prefix += '/' | ||||
Durham Goode
|
r32132 | relmatchers.append((prefix, matcherargs)) | ||
Durham Goode
|
r25283 | else: | ||
other.append((kind, pat, source)) | ||||
return relmatchers, other | ||||
Martin von Zweigbergk
|
r24447 | def _kindpatsalwaysmatch(kindpats): | ||
""""Checks whether the kindspats match everything, as e.g. | ||||
'relpath:.' does. | ||||
""" | ||||
Durham Goode
|
r25213 | for kind, pat, source in kindpats: | ||
Martin von Zweigbergk
|
r24447 | if pat != '' or kind not in ['relpath', 'glob']: | ||
return False | ||||
return True | ||||
Martin von Zweigbergk
|
r41824 | def _buildkindpatsmatcher(matchercls, root, kindpats, ctx=None, | ||
Yuya Nishihara
|
r38599 | listsubrepos=False, badfn=None): | ||
matchers = [] | ||||
Martin von Zweigbergk
|
r41824 | fms, kindpats = _expandsets(kindpats, ctx=ctx, | ||
Yuya Nishihara
|
r38631 | listsubrepos=listsubrepos, badfn=badfn) | ||
Yuya Nishihara
|
r38599 | if kindpats: | ||
Martin von Zweigbergk
|
r41824 | m = matchercls(root, kindpats, badfn=badfn) | ||
Yuya Nishihara
|
r38599 | matchers.append(m) | ||
Yuya Nishihara
|
r38631 | if fms: | ||
matchers.extend(fms) | ||||
Yuya Nishihara
|
r38599 | if not matchers: | ||
Martin von Zweigbergk
|
r41824 | return nevermatcher(badfn=badfn) | ||
Yuya Nishihara
|
r38599 | if len(matchers) == 1: | ||
return matchers[0] | ||||
return unionmatcher(matchers) | ||||
Martin von Zweigbergk
|
r32728 | def match(root, cwd, patterns=None, include=None, exclude=None, default='glob', | ||
Martin von Zweigbergk
|
r41771 | auditor=None, ctx=None, listsubrepos=False, warn=None, | ||
Martin von Zweigbergk
|
r32400 | badfn=None, icasefs=False): | ||
Martin von Zweigbergk
|
r32394 | """build an object to match a set of file patterns | ||
arguments: | ||||
root - the canonical root of the tree you're matching against | ||||
cwd - the current working directory, if relevant | ||||
patterns - patterns to find | ||||
include - patterns to include (unless they are excluded) | ||||
exclude - patterns to exclude (even if they are included) | ||||
default - if a pattern in patterns has no explicit type, assume this one | ||||
warn - optional function used for printing warnings | ||||
badfn - optional bad() callback for this matcher instead of the default | ||||
Martin von Zweigbergk
|
r32400 | icasefs - make a matcher for wdir on case insensitive filesystems, which | ||
normalizes the given patterns to the case in the filesystem | ||||
Martin von Zweigbergk
|
r32394 | |||
a pattern is one of: | ||||
'glob:<glob>' - a glob relative to cwd | ||||
're:<regexp>' - a regular expression | ||||
'path:<path>' - a path relative to repository root, which is matched | ||||
recursively | ||||
'rootfilesin:<path>' - a path relative to repository root, which is | ||||
matched non-recursively (will not match subdirectories) | ||||
'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs) | ||||
'relpath:<path>' - a path relative to cwd | ||||
'relre:<regexp>' - a regexp that needn't match the start of a name | ||||
'set:<fileset>' - a fileset expression | ||||
'include:<path>' - a file of patterns to read and include | ||||
'subinclude:<path>' - a file of patterns to match against files under | ||||
the same directory | ||||
'<something>' - a pattern of the specified default type | ||||
""" | ||||
Martin von Zweigbergk
|
r32400 | normalize = _donormalize | ||
if icasefs: | ||||
dirstate = ctx.repo().dirstate | ||||
dsnormalize = dirstate.normalize | ||||
def normalize(patterns, default, root, cwd, auditor, warn): | ||||
kp = _donormalize(patterns, default, root, cwd, auditor, warn) | ||||
kindpats = [] | ||||
for kind, pats, source in kp: | ||||
if kind not in ('re', 'relre'): # regex can't be normalized | ||||
p = pats | ||||
pats = dsnormalize(pats) | ||||
# Preserve the original to handle a case only rename. | ||||
if p != pats and p in dirstate: | ||||
kindpats.append((kind, p, source)) | ||||
kindpats.append((kind, pats, source)) | ||||
return kindpats | ||||
Martin von Zweigbergk
|
r41771 | if patterns: | ||
Martin von Zweigbergk
|
r32556 | kindpats = normalize(patterns, default, root, cwd, auditor, warn) | ||
Martin von Zweigbergk
|
r32557 | if _kindpatsalwaysmatch(kindpats): | ||
Martin von Zweigbergk
|
r41824 | m = alwaysmatcher(badfn) | ||
Martin von Zweigbergk
|
r32557 | else: | ||
Martin von Zweigbergk
|
r41824 | m = _buildkindpatsmatcher(patternmatcher, root, kindpats, ctx=ctx, | ||
listsubrepos=listsubrepos, badfn=badfn) | ||||
Martin von Zweigbergk
|
r32553 | else: | ||
# It's a little strange that no patterns means to match everything. | ||||
Martin von Zweigbergk
|
r32650 | # Consider changing this to match nothing (probably using nevermatcher). | ||
Martin von Zweigbergk
|
r41824 | m = alwaysmatcher(badfn) | ||
Martin von Zweigbergk
|
r32553 | |||
Martin von Zweigbergk
|
r32497 | if include: | ||
Martin von Zweigbergk
|
r32556 | kindpats = normalize(include, 'glob', root, cwd, auditor, warn) | ||
Martin von Zweigbergk
|
r41824 | im = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx, | ||
Yuya Nishihara
|
r38599 | listsubrepos=listsubrepos, badfn=None) | ||
Martin von Zweigbergk
|
r32497 | m = intersectmatchers(m, im) | ||
Martin von Zweigbergk
|
r32465 | if exclude: | ||
Martin von Zweigbergk
|
r32556 | kindpats = normalize(exclude, 'glob', root, cwd, auditor, warn) | ||
Martin von Zweigbergk
|
r41824 | em = _buildkindpatsmatcher(includematcher, root, kindpats, ctx=ctx, | ||
Yuya Nishihara
|
r38599 | listsubrepos=listsubrepos, badfn=None) | ||
Martin von Zweigbergk
|
r32465 | m = differencematcher(m, em) | ||
return m | ||||
Martin von Zweigbergk
|
r32394 | |||
Martin von Zweigbergk
|
r41825 | def exact(files, badfn=None): | ||
Martin von Zweigbergk
|
r41824 | return exactmatcher(files, badfn=badfn) | ||
Martin von Zweigbergk
|
r32394 | |||
Martin von Zweigbergk
|
r41825 | def always(badfn=None): | ||
return alwaysmatcher(badfn) | ||||
Martin von Zweigbergk
|
r32394 | |||
Martin von Zweigbergk
|
r41825 | def never(badfn=None): | ||
return nevermatcher(badfn) | ||||
Siddharth Agarwal
|
r32600 | |||
Martin von Zweigbergk
|
r32394 | def badmatch(match, badfn): | ||
"""Make a copy of the given matcher, replacing its bad method with the given | ||||
one. | ||||
""" | ||||
m = copy.copy(match) | ||||
m.bad = badfn | ||||
return m | ||||
Martin von Zweigbergk
|
r32396 | def _donormalize(patterns, default, root, cwd, auditor, warn): | ||
'''Convert 'kind:pat' from the patterns list to tuples with kind and | ||||
normalized and rooted patterns and with listfiles expanded.''' | ||||
kindpats = [] | ||||
for kind, pat in [_patsplit(p, default) for p in patterns]: | ||||
Kostia Balytskyi
|
r33647 | if kind in cwdrelativepatternkinds: | ||
Martin von Zweigbergk
|
r32396 | pat = pathutil.canonpath(root, cwd, pat, auditor) | ||
Valentin Gatien-Baron
|
r41318 | elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'): | ||
Martin von Zweigbergk
|
r32396 | pat = util.normpath(pat) | ||
elif kind in ('listfile', 'listfile0'): | ||||
try: | ||||
files = util.readfile(pat) | ||||
if kind == 'listfile0': | ||||
files = files.split('\0') | ||||
else: | ||||
files = files.splitlines() | ||||
files = [f for f in files if f] | ||||
except EnvironmentError: | ||||
raise error.Abort(_("unable to read file list (%s)") % pat) | ||||
for k, p, source in _donormalize(files, default, root, cwd, | ||||
auditor, warn): | ||||
kindpats.append((k, p, pat)) | ||||
continue | ||||
elif kind == 'include': | ||||
try: | ||||
fullpath = os.path.join(root, util.localpath(pat)) | ||||
includepats = readpatternfile(fullpath, warn) | ||||
for k, p, source in _donormalize(includepats, default, | ||||
root, cwd, auditor, warn): | ||||
kindpats.append((k, p, source or pat)) | ||||
except error.Abort as inst: | ||||
raise error.Abort('%s: %s' % (pat, inst[0])) | ||||
except IOError as inst: | ||||
if warn: | ||||
warn(_("skipping unreadable pattern file '%s': %s\n") % | ||||
Yuya Nishihara
|
r37102 | (pat, stringutil.forcebytestr(inst.strerror))) | ||
Martin von Zweigbergk
|
r32396 | continue | ||
# else: re or relre - which cannot be normalized | ||||
kindpats.append((kind, pat, '')) | ||||
return kindpats | ||||
Martin von Zweigbergk
|
r32454 | class basematcher(object): | ||
Martin von Zweigbergk
|
r41824 | def __init__(self, badfn=None): | ||
Martin von Zweigbergk
|
r32454 | if badfn is not None: | ||
self.bad = badfn | ||||
def __call__(self, fn): | ||||
return self.matchfn(fn) | ||||
def __iter__(self): | ||||
for f in self._files: | ||||
yield f | ||||
# Callbacks related to how the matcher is used by dirstate.walk. | ||||
# Subscribers to these events must monkeypatch the matcher object. | ||||
def bad(self, f, msg): | ||||
'''Callback from dirstate.walk for each explicit file that can't be | ||||
found/accessed, with an error message.''' | ||||
# If an explicitdir is set, it will be called when an explicitly listed | ||||
# directory is visited. | ||||
explicitdir = None | ||||
# If an traversedir is set, it will be called when a directory discovered | ||||
# by recursive traversal is visited. | ||||
traversedir = None | ||||
Martin von Zweigbergk
|
r32455 | @propertycache | ||
def _files(self): | ||||
return [] | ||||
Martin von Zweigbergk
|
r32454 | def files(self): | ||
'''Explicitly listed files or patterns or roots: | ||||
if no patterns or .always(): empty list, | ||||
if exact: list exact files, | ||||
if not .anypats(): list all files and dirs, | ||||
else: optimal roots''' | ||||
return self._files | ||||
@propertycache | ||||
def _fileset(self): | ||||
return set(self._files) | ||||
def exact(self, f): | ||||
'''Returns True if f is in .files().''' | ||||
return f in self._fileset | ||||
Martin von Zweigbergk
|
r32463 | def matchfn(self, f): | ||
return False | ||||
Martin von Zweigbergk
|
r32454 | def visitdir(self, dir): | ||
'''Decides whether a directory should be visited based on whether it | ||||
has potential matches in it or one of its subdirectories. This is | ||||
based on the match's primary, included, and excluded patterns. | ||||
Returns the string 'all' if the given directory and all subdirectories | ||||
should be visited. Otherwise returns True or False indicating whether | ||||
the given directory should be visited. | ||||
''' | ||||
Durham Goode
|
r33478 | return True | ||
Martin von Zweigbergk
|
r32454 | |||
spectral
|
r38990 | def visitchildrenset(self, dir): | ||
'''Decides whether a directory should be visited based on whether it | ||||
has potential matches in it or one of its subdirectories, and | ||||
potentially lists which subdirectories of that directory should be | ||||
visited. This is based on the match's primary, included, and excluded | ||||
patterns. | ||||
This function is very similar to 'visitdir', and the following mapping | ||||
can be applied: | ||||
visitdir | visitchildrenlist | ||||
----------+------------------- | ||||
False | set() | ||||
'all' | 'all' | ||||
Kyle Lippincott
|
r39296 | True | 'this' OR non-empty set of subdirs -or files- to visit | ||
spectral
|
r38990 | |||
Example: | ||||
Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return | ||||
the following values (assuming the implementation of visitchildrenset | ||||
is capable of recognizing this; some implementations are not). | ||||
'.' -> {'foo', 'qux'} | ||||
'baz' -> set() | ||||
'foo' -> {'bar'} | ||||
# Ideally this would be 'all', but since the prefix nature of matchers | ||||
Kyle Lippincott
|
r39296 | # is applied to the entire matcher, we have to downgrade this to | ||
# 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed | ||||
# in. | ||||
spectral
|
r38990 | 'foo/bar' -> 'this' | ||
'qux' -> 'this' | ||||
Kyle Lippincott
|
r39296 | |||
Important: | ||||
Most matchers do not know if they're representing files or | ||||
directories. They see ['path:dir/f'] and don't know whether 'f' is a | ||||
file or a directory, so visitchildrenset('dir') for most matchers will | ||||
return {'f'}, but if the matcher knows it's a file (like exactmatcher | ||||
does), it may return 'this'. Do not rely on the return being a set | ||||
indicating that there are no files in this dir to investigate (or | ||||
equivalently that if there are files to investigate in 'dir' that it | ||||
will always return 'this'). | ||||
spectral
|
r38990 | ''' | ||
return 'this' | ||||
Martin von Zweigbergk
|
r32454 | def always(self): | ||
Martin von Zweigbergk
|
r33379 | '''Matcher will match everything and .files() will be empty -- | ||
optimization might be possible.''' | ||||
Martin von Zweigbergk
|
r32454 | return False | ||
def isexact(self): | ||||
Martin von Zweigbergk
|
r33379 | '''Matcher will match exactly the list of files in .files() -- | ||
optimization might be possible.''' | ||||
Martin von Zweigbergk
|
r32454 | return False | ||
def prefix(self): | ||||
Martin von Zweigbergk
|
r33379 | '''Matcher will match the paths in .files() recursively -- | ||
optimization might be possible.''' | ||||
return False | ||||
def anypats(self): | ||||
'''None of .always(), .isexact(), and .prefix() is true -- | ||||
optimizations will be difficult.''' | ||||
return not self.always() and not self.isexact() and not self.prefix() | ||||
Martin von Zweigbergk
|
r32454 | |||
Martin von Zweigbergk
|
r32553 | class alwaysmatcher(basematcher): | ||
'''Matches everything.''' | ||||
Martin von Zweigbergk
|
r41824 | def __init__(self, badfn=None): | ||
super(alwaysmatcher, self).__init__(badfn) | ||||
Martin von Zweigbergk
|
r32553 | |||
def always(self): | ||||
return True | ||||
def matchfn(self, f): | ||||
return True | ||||
def visitdir(self, dir): | ||||
return 'all' | ||||
spectral
|
r38990 | def visitchildrenset(self, dir): | ||
return 'all' | ||||
Martin von Zweigbergk
|
r32553 | def __repr__(self): | ||
Pulkit Goyal
|
r36067 | return r'<alwaysmatcher>' | ||
Martin von Zweigbergk
|
r32553 | |||
Siddharth Agarwal
|
r32600 | class nevermatcher(basematcher): | ||
'''Matches nothing.''' | ||||
Martin von Zweigbergk
|
r41824 | def __init__(self, badfn=None): | ||
super(nevermatcher, self).__init__(badfn) | ||||
Siddharth Agarwal
|
r32600 | |||
Martin von Zweigbergk
|
r33378 | # It's a little weird to say that the nevermatcher is an exact matcher | ||
# or a prefix matcher, but it seems to make sense to let callers take | ||||
# fast paths based on either. There will be no exact matches, nor any | ||||
# prefixes (files() returns []), so fast paths iterating over them should | ||||
# be efficient (and correct). | ||||
def isexact(self): | ||||
return True | ||||
def prefix(self): | ||||
return True | ||||
Martin von Zweigbergk
|
r33583 | def visitdir(self, dir): | ||
return False | ||||
spectral
|
r38990 | def visitchildrenset(self, dir): | ||
return set() | ||||
Siddharth Agarwal
|
r32600 | def __repr__(self): | ||
Pulkit Goyal
|
r36067 | return r'<nevermatcher>' | ||
Siddharth Agarwal
|
r32600 | |||
Yuya Nishihara
|
r38596 | class predicatematcher(basematcher): | ||
"""A matcher adapter for a simple boolean function""" | ||||
Martin von Zweigbergk
|
r41824 | def __init__(self, predfn, predrepr=None, badfn=None): | ||
super(predicatematcher, self).__init__(badfn) | ||||
Yuya Nishihara
|
r38596 | self.matchfn = predfn | ||
self._predrepr = predrepr | ||||
@encoding.strmethod | ||||
def __repr__(self): | ||||
s = (stringutil.buildrepr(self._predrepr) | ||||
or pycompat.byterepr(self.matchfn)) | ||||
return '<predicatenmatcher pred=%s>' % s | ||||
Martin von Zweigbergk
|
r32501 | class patternmatcher(basematcher): | ||
Martin von Zweigbergk
|
r32394 | |||
Martin von Zweigbergk
|
r41824 | def __init__(self, root, kindpats, badfn=None): | ||
super(patternmatcher, self).__init__(badfn) | ||||
Matt Mackall
|
r8581 | |||
Martin von Zweigbergk
|
r32557 | self._files = _explicitfiles(kindpats) | ||
Martin von Zweigbergk
|
r33405 | self._prefix = _prefix(kindpats) | ||
Martin von Zweigbergk
|
r41818 | self._pats, self.matchfn = _buildmatch(kindpats, '$', root) | ||
Matt Mackall
|
r8587 | |||
Martin von Zweigbergk
|
r32323 | @propertycache | ||
Drew Gottlieb
|
r24636 | def _dirs(self): | ||
Martin von Zweigbergk
|
r32323 | return set(util.dirs(self._fileset)) | {'.'} | ||
Drew Gottlieb
|
r24636 | |||
def visitdir(self, dir): | ||||
Martin von Zweigbergk
|
r33405 | if self._prefix and dir in self._fileset: | ||
Martin von Zweigbergk
|
r27343 | return 'all' | ||
Martin von Zweigbergk
|
r32554 | return ('.' in self._fileset or | ||
Martin von Zweigbergk
|
r32323 | dir in self._fileset or | ||
Martin von Zweigbergk
|
r25576 | dir in self._dirs or | ||
Martin von Zweigbergk
|
r32323 | any(parentdir in self._fileset | ||
Martin von Zweigbergk
|
r25577 | for parentdir in util.finddirs(dir))) | ||
Drew Gottlieb
|
r24636 | |||
spectral
|
r38990 | def visitchildrenset(self, dir): | ||
ret = self.visitdir(dir) | ||||
if ret is True: | ||||
return 'this' | ||||
elif not ret: | ||||
return set() | ||||
assert ret == 'all' | ||||
return 'all' | ||||
Martin von Zweigbergk
|
r33379 | def prefix(self): | ||
Martin von Zweigbergk
|
r33405 | return self._prefix | ||
Mads Kiilerich
|
r21111 | |||
Pulkit Goyal
|
r36067 | @encoding.strmethod | ||
Martin von Zweigbergk
|
r32406 | def __repr__(self): | ||
Pulkit Goyal
|
r38039 | return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)) | ||
Martin von Zweigbergk
|
r32501 | |||
Kyle Lippincott
|
r39494 | # This is basically a reimplementation of util.dirs that stores the children | ||
# instead of just a count of them, plus a small optional optimization to avoid | ||||
# some directories we don't need. | ||||
class _dirchildren(object): | ||||
def __init__(self, paths, onlyinclude=None): | ||||
self._dirs = {} | ||||
self._onlyinclude = onlyinclude or [] | ||||
addpath = self.addpath | ||||
for f in paths: | ||||
addpath(f) | ||||
def addpath(self, path): | ||||
if path == '.': | ||||
return | ||||
dirs = self._dirs | ||||
findsplitdirs = _dirchildren._findsplitdirs | ||||
for d, b in findsplitdirs(path): | ||||
if d not in self._onlyinclude: | ||||
continue | ||||
dirs.setdefault(d, set()).add(b) | ||||
@staticmethod | ||||
def _findsplitdirs(path): | ||||
# yields (dirname, basename) tuples, walking back to the root. This is | ||||
# very similar to util.finddirs, except: | ||||
# - produces a (dirname, basename) tuple, not just 'dirname' | ||||
# - includes root dir | ||||
# Unlike manifest._splittopdir, this does not suffix `dirname` with a | ||||
# slash, and produces '.' for the root instead of ''. | ||||
oldpos = len(path) | ||||
pos = path.rfind('/') | ||||
while pos != -1: | ||||
yield path[:pos], path[pos + 1:oldpos] | ||||
oldpos = pos | ||||
pos = path.rfind('/', 0, pos) | ||||
yield '.', path[:oldpos] | ||||
def get(self, path): | ||||
return self._dirs.get(path, set()) | ||||
Martin von Zweigbergk
|
r32501 | class includematcher(basematcher): | ||
Martin von Zweigbergk
|
r41824 | def __init__(self, root, kindpats, badfn=None): | ||
super(includematcher, self).__init__(badfn) | ||||
Martin von Zweigbergk
|
r32501 | |||
Martin von Zweigbergk
|
r41818 | self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', root) | ||
Martin von Zweigbergk
|
r33405 | self._prefix = _prefix(kindpats) | ||
spectral
|
r38989 | roots, dirs, parents = _rootsdirsandparents(kindpats) | ||
Martin von Zweigbergk
|
r32502 | # roots are directories which are recursively included. | ||
Martin von Zweigbergk
|
r32503 | self._roots = set(roots) | ||
Martin von Zweigbergk
|
r32502 | # dirs are directories which are non-recursively included. | ||
Martin von Zweigbergk
|
r32503 | self._dirs = set(dirs) | ||
spectral
|
r38989 | # parents are directories which are non-recursively included because | ||
# they are needed to get to items in _dirs or _roots. | ||||
self._parents = set(parents) | ||||
Martin von Zweigbergk
|
r32501 | |||
def visitdir(self, dir): | ||||
Martin von Zweigbergk
|
r33405 | if self._prefix and dir in self._roots: | ||
Martin von Zweigbergk
|
r32501 | return 'all' | ||
Martin von Zweigbergk
|
r32503 | return ('.' in self._roots or | ||
dir in self._roots or | ||||
dir in self._dirs or | ||||
spectral
|
r38989 | dir in self._parents or | ||
Martin von Zweigbergk
|
r32503 | any(parentdir in self._roots | ||
for parentdir in util.finddirs(dir))) | ||||
Martin von Zweigbergk
|
r32501 | |||
Kyle Lippincott
|
r39494 | @propertycache | ||
def _allparentschildren(self): | ||||
# It may seem odd that we add dirs, roots, and parents, and then | ||||
# restrict to only parents. This is to catch the case of: | ||||
# dirs = ['foo/bar'] | ||||
# parents = ['foo'] | ||||
# if we asked for the children of 'foo', but had only added | ||||
# self._parents, we wouldn't be able to respond ['bar']. | ||||
return _dirchildren( | ||||
itertools.chain(self._dirs, self._roots, self._parents), | ||||
onlyinclude=self._parents) | ||||
spectral
|
r38990 | def visitchildrenset(self, dir): | ||
if self._prefix and dir in self._roots: | ||||
return 'all' | ||||
# Note: this does *not* include the 'dir in self._parents' case from | ||||
# visitdir, that's handled below. | ||||
if ('.' in self._roots or | ||||
dir in self._roots or | ||||
dir in self._dirs or | ||||
any(parentdir in self._roots | ||||
for parentdir in util.finddirs(dir))): | ||||
return 'this' | ||||
if dir in self._parents: | ||||
Kyle Lippincott
|
r39494 | return self._allparentschildren.get(dir) or set() | ||
return set() | ||||
spectral
|
r38990 | |||
Pulkit Goyal
|
r36067 | @encoding.strmethod | ||
Martin von Zweigbergk
|
r32501 | def __repr__(self): | ||
Augie Fackler
|
r36590 | return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats)) | ||
Martin von Zweigbergk
|
r32406 | |||
Martin von Zweigbergk
|
r32499 | class exactmatcher(basematcher): | ||
'''Matches the input files exactly. They are interpreted as paths, not | ||||
patterns (so no kind-prefixes). | ||||
''' | ||||
Martin von Zweigbergk
|
r41824 | def __init__(self, files, badfn=None): | ||
super(exactmatcher, self).__init__(badfn) | ||||
Martin von Zweigbergk
|
r32499 | |||
if isinstance(files, list): | ||||
self._files = files | ||||
else: | ||||
self._files = list(files) | ||||
Yuya Nishihara
|
r32543 | |||
matchfn = basematcher.exact | ||||
Martin von Zweigbergk
|
r32499 | |||
@propertycache | ||||
def _dirs(self): | ||||
return set(util.dirs(self._fileset)) | {'.'} | ||||
def visitdir(self, dir): | ||||
return dir in self._dirs | ||||
spectral
|
r38990 | def visitchildrenset(self, dir): | ||
Kyle Lippincott
|
r39297 | if not self._fileset or dir not in self._dirs: | ||
return set() | ||||
candidates = self._fileset | self._dirs - {'.'} | ||||
if dir != '.': | ||||
d = dir + '/' | ||||
candidates = set(c[len(d):] for c in candidates if | ||||
c.startswith(d)) | ||||
# self._dirs includes all of the directories, recursively, so if | ||||
# we're attempting to match foo/bar/baz.txt, it'll have '.', 'foo', | ||||
# 'foo/bar' in it. Thus we can safely ignore a candidate that has a | ||||
# '/' in it, indicating a it's for a subdir-of-a-subdir; the | ||||
# immediate subdir will be in there without a slash. | ||||
ret = {c for c in candidates if '/' not in c} | ||||
# We really do not expect ret to be empty, since that would imply that | ||||
# there's something in _dirs that didn't have a file in _fileset. | ||||
assert ret | ||||
return ret | ||||
spectral
|
r38990 | |||
Martin von Zweigbergk
|
r32499 | def isexact(self): | ||
return True | ||||
Pulkit Goyal
|
r36067 | @encoding.strmethod | ||
Martin von Zweigbergk
|
r32499 | def __repr__(self): | ||
return ('<exactmatcher files=%r>' % self._files) | ||||
Martin von Zweigbergk
|
r32465 | class differencematcher(basematcher): | ||
'''Composes two matchers by matching if the first matches and the second | ||||
Yuya Nishihara
|
r35677 | does not. | ||
Martin von Zweigbergk
|
r32465 | |||
Martin von Zweigbergk
|
r41824 | The second matcher's non-matching-attributes (bad, explicitdir, | ||
Martin von Zweigbergk
|
r32465 | traversedir) are ignored. | ||
''' | ||||
def __init__(self, m1, m2): | ||||
Martin von Zweigbergk
|
r41824 | super(differencematcher, self).__init__() | ||
Martin von Zweigbergk
|
r32465 | self._m1 = m1 | ||
self._m2 = m2 | ||||
self.bad = m1.bad | ||||
self.explicitdir = m1.explicitdir | ||||
self.traversedir = m1.traversedir | ||||
def matchfn(self, f): | ||||
Yuya Nishihara
|
r35677 | return self._m1(f) and not self._m2(f) | ||
Martin von Zweigbergk
|
r32465 | |||
@propertycache | ||||
def _files(self): | ||||
if self.isexact(): | ||||
return [f for f in self._m1.files() if self(f)] | ||||
# If m1 is not an exact matcher, we can't easily figure out the set of | ||||
# files, because its files() are not always files. For example, if | ||||
# m1 is "path:dir" and m2 is "rootfileins:.", we don't | ||||
# want to remove "dir" from the set even though it would match m2, | ||||
# because the "dir" in m1 may not be a file. | ||||
return self._m1.files() | ||||
def visitdir(self, dir): | ||||
if self._m2.visitdir(dir) == 'all': | ||||
return False | ||||
Pulkit Goyal
|
r41669 | elif not self._m2.visitdir(dir): | ||
# m2 does not match dir, we can return 'all' here if possible | ||||
return self._m1.visitdir(dir) | ||||
Martin von Zweigbergk
|
r32465 | return bool(self._m1.visitdir(dir)) | ||
spectral
|
r38990 | def visitchildrenset(self, dir): | ||
m2_set = self._m2.visitchildrenset(dir) | ||||
if m2_set == 'all': | ||||
return set() | ||||
m1_set = self._m1.visitchildrenset(dir) | ||||
# Possible values for m1: 'all', 'this', set(...), set() | ||||
# Possible values for m2: 'this', set(...), set() | ||||
# If m2 has nothing under here that we care about, return m1, even if | ||||
# it's 'all'. This is a change in behavior from visitdir, which would | ||||
# return True, not 'all', for some reason. | ||||
if not m2_set: | ||||
return m1_set | ||||
if m1_set in ['all', 'this']: | ||||
# Never return 'all' here if m2_set is any kind of non-empty (either | ||||
# 'this' or set(foo)), since m2 might return set() for a | ||||
# subdirectory. | ||||
return 'this' | ||||
# Possible values for m1: set(...), set() | ||||
# Possible values for m2: 'this', set(...) | ||||
# We ignore m2's set results. They're possibly incorrect: | ||||
# m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset('.'): | ||||
# m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd | ||||
# return set(), which is *not* correct, we still need to visit 'dir'! | ||||
return m1_set | ||||
Martin von Zweigbergk
|
r32465 | def isexact(self): | ||
return self._m1.isexact() | ||||
Pulkit Goyal
|
r36067 | @encoding.strmethod | ||
Martin von Zweigbergk
|
r32465 | def __repr__(self): | ||
return ('<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)) | ||||
Martin von Zweigbergk
|
r32497 | def intersectmatchers(m1, m2): | ||
'''Composes two matchers by matching if both of them match. | ||||
Martin von Zweigbergk
|
r41824 | The second matcher's non-matching-attributes (bad, explicitdir, | ||
Martin von Zweigbergk
|
r32497 | traversedir) are ignored. | ||
''' | ||||
if m1 is None or m2 is None: | ||||
return m1 or m2 | ||||
if m1.always(): | ||||
m = copy.copy(m2) | ||||
# TODO: Consider encapsulating these things in a class so there's only | ||||
# one thing to copy from m1. | ||||
m.bad = m1.bad | ||||
m.explicitdir = m1.explicitdir | ||||
m.traversedir = m1.traversedir | ||||
return m | ||||
if m2.always(): | ||||
m = copy.copy(m1) | ||||
return m | ||||
return intersectionmatcher(m1, m2) | ||||
class intersectionmatcher(basematcher): | ||||
def __init__(self, m1, m2): | ||||
Martin von Zweigbergk
|
r41824 | super(intersectionmatcher, self).__init__() | ||
Martin von Zweigbergk
|
r32497 | self._m1 = m1 | ||
self._m2 = m2 | ||||
self.bad = m1.bad | ||||
self.explicitdir = m1.explicitdir | ||||
self.traversedir = m1.traversedir | ||||
@propertycache | ||||
def _files(self): | ||||
if self.isexact(): | ||||
m1, m2 = self._m1, self._m2 | ||||
if not m1.isexact(): | ||||
m1, m2 = m2, m1 | ||||
return [f for f in m1.files() if m2(f)] | ||||
# It neither m1 nor m2 is an exact matcher, we can't easily intersect | ||||
# the set of files, because their files() are not always files. For | ||||
# example, if intersecting a matcher "-I glob:foo.txt" with matcher of | ||||
# "path:dir2", we don't want to remove "dir2" from the set. | ||||
return self._m1.files() + self._m2.files() | ||||
def matchfn(self, f): | ||||
return self._m1(f) and self._m2(f) | ||||
def visitdir(self, dir): | ||||
visit1 = self._m1.visitdir(dir) | ||||
if visit1 == 'all': | ||||
return self._m2.visitdir(dir) | ||||
# bool() because visit1=True + visit2='all' should not be 'all' | ||||
return bool(visit1 and self._m2.visitdir(dir)) | ||||
spectral
|
r38990 | def visitchildrenset(self, dir): | ||
m1_set = self._m1.visitchildrenset(dir) | ||||
if not m1_set: | ||||
return set() | ||||
m2_set = self._m2.visitchildrenset(dir) | ||||
if not m2_set: | ||||
return set() | ||||
if m1_set == 'all': | ||||
return m2_set | ||||
elif m2_set == 'all': | ||||
return m1_set | ||||
if m1_set == 'this' or m2_set == 'this': | ||||
return 'this' | ||||
assert isinstance(m1_set, set) and isinstance(m2_set, set) | ||||
return m1_set.intersection(m2_set) | ||||
Martin von Zweigbergk
|
r32497 | def always(self): | ||
return self._m1.always() and self._m2.always() | ||||
def isexact(self): | ||||
return self._m1.isexact() or self._m2.isexact() | ||||
Pulkit Goyal
|
r36067 | @encoding.strmethod | ||
Martin von Zweigbergk
|
r32497 | def __repr__(self): | ||
return ('<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)) | ||||
Martin von Zweigbergk
|
r32456 | class subdirmatcher(basematcher): | ||
Martin Geisler
|
r12165 | """Adapt a matcher to work on a subdirectory only. | ||
The paths are remapped to remove/insert the path as needed: | ||||
Yuya Nishihara
|
r34139 | >>> from . import pycompat | ||
Yuya Nishihara
|
r34133 | >>> m1 = match(b'root', b'', [b'a.txt', b'sub/b.txt']) | ||
>>> m2 = subdirmatcher(b'sub', m1) | ||||
>>> bool(m2(b'a.txt')) | ||||
Martin Geisler
|
r12165 | False | ||
Yuya Nishihara
|
r34133 | >>> bool(m2(b'b.txt')) | ||
Martin Geisler
|
r12165 | True | ||
Yuya Nishihara
|
r34133 | >>> bool(m2.matchfn(b'a.txt')) | ||
Martin Geisler
|
r12165 | False | ||
Yuya Nishihara
|
r34133 | >>> bool(m2.matchfn(b'b.txt')) | ||
Martin Geisler
|
r12165 | True | ||
>>> m2.files() | ||||
['b.txt'] | ||||
Yuya Nishihara
|
r34133 | >>> m2.exact(b'b.txt') | ||
Martin Geisler
|
r12165 | True | ||
Martin Geisler
|
r12268 | >>> def bad(f, msg): | ||
Yuya Nishihara
|
r34139 | ... print(pycompat.sysstr(b"%s: %s" % (f, msg))) | ||
Martin Geisler
|
r12268 | >>> m1.bad = bad | ||
Yuya Nishihara
|
r34133 | >>> m2.bad(b'x.txt', b'No such file') | ||
Martin Geisler
|
r12268 | sub/x.txt: No such file | ||
Martin Geisler
|
r12165 | """ | ||
def __init__(self, path, matcher): | ||||
Martin von Zweigbergk
|
r41824 | super(subdirmatcher, self).__init__() | ||
Martin Geisler
|
r12165 | self._path = path | ||
self._matcher = matcher | ||||
Martin von Zweigbergk
|
r32456 | self._always = matcher.always() | ||
Martin Geisler
|
r12165 | |||
self._files = [f[len(path) + 1:] for f in matcher._files | ||||
if f.startswith(path + "/")] | ||||
Matt Harbison
|
r25194 | |||
Martin von Zweigbergk
|
r32326 | # If the parent repo had a path to this subrepo and the matcher is | ||
# a prefix matcher, this submatcher always matches. | ||||
if matcher.prefix(): | ||||
Matt Mackall
|
r25195 | self._always = any(f == path for f in matcher._files) | ||
Matt Harbison
|
r25194 | |||
Martin von Zweigbergk
|
r32325 | def bad(self, f, msg): | ||
self._matcher.bad(self._path + "/" + f, msg) | ||||
Martin von Zweigbergk
|
r32464 | def matchfn(self, f): | ||
# Some information is lost in the superclass's constructor, so we | ||||
# can not accurately create the matching function for the subdirectory | ||||
# from the inputs. Instead, we override matchfn() and visitdir() to | ||||
# call the original matcher with the subdirectory path prepended. | ||||
return self._matcher.matchfn(self._path + "/" + f) | ||||
Martin von Zweigbergk
|
r32324 | def visitdir(self, dir): | ||
if dir == '.': | ||||
dir = self._path | ||||
else: | ||||
dir = self._path + "/" + dir | ||||
return self._matcher.visitdir(dir) | ||||
spectral
|
r38990 | def visitchildrenset(self, dir): | ||
if dir == '.': | ||||
dir = self._path | ||||
else: | ||||
dir = self._path + "/" + dir | ||||
return self._matcher.visitchildrenset(dir) | ||||
Martin von Zweigbergk
|
r32456 | def always(self): | ||
return self._always | ||||
Martin von Zweigbergk
|
r33379 | def prefix(self): | ||
return self._matcher.prefix() and not self._always | ||||
Martin von Zweigbergk
|
r32456 | |||
Pulkit Goyal
|
r36067 | @encoding.strmethod | ||
Martin von Zweigbergk
|
r32552 | def __repr__(self): | ||
return ('<subdirmatcher path=%r, matcher=%r>' % | ||||
(self._path, self._matcher)) | ||||
Yuya Nishihara
|
r38630 | class prefixdirmatcher(basematcher): | ||
"""Adapt a matcher to work on a parent directory. | ||||
Martin von Zweigbergk
|
r41824 | The matcher's non-matching-attributes (bad, explicitdir, traversedir) are | ||
ignored. | ||||
Yuya Nishihara
|
r38630 | |||
The prefix path should usually be the relative path from the root of | ||||
this matcher to the root of the wrapped matcher. | ||||
Yuya Nishihara
|
r38773 | >>> m1 = match(util.localpath(b'root/d/e'), b'f', [b'../a.txt', b'b.txt']) | ||
Martin von Zweigbergk
|
r41824 | >>> m2 = prefixdirmatcher(b'd/e', m1) | ||
Yuya Nishihara
|
r38630 | >>> bool(m2(b'a.txt'),) | ||
False | ||||
>>> bool(m2(b'd/e/a.txt')) | ||||
True | ||||
>>> bool(m2(b'd/e/b.txt')) | ||||
False | ||||
>>> m2.files() | ||||
['d/e/a.txt', 'd/e/f/b.txt'] | ||||
>>> m2.exact(b'd/e/a.txt') | ||||
True | ||||
>>> m2.visitdir(b'd') | ||||
True | ||||
>>> m2.visitdir(b'd/e') | ||||
True | ||||
>>> m2.visitdir(b'd/e/f') | ||||
True | ||||
>>> m2.visitdir(b'd/e/g') | ||||
False | ||||
>>> m2.visitdir(b'd/ef') | ||||
False | ||||
""" | ||||
Martin von Zweigbergk
|
r41824 | def __init__(self, path, matcher, badfn=None): | ||
super(prefixdirmatcher, self).__init__(badfn) | ||||
Yuya Nishihara
|
r38630 | if not path: | ||
raise error.ProgrammingError('prefix path must not be empty') | ||||
self._path = path | ||||
self._pathprefix = path + '/' | ||||
self._matcher = matcher | ||||
@propertycache | ||||
def _files(self): | ||||
return [self._pathprefix + f for f in self._matcher._files] | ||||
def matchfn(self, f): | ||||
if not f.startswith(self._pathprefix): | ||||
return False | ||||
return self._matcher.matchfn(f[len(self._pathprefix):]) | ||||
@propertycache | ||||
def _pathdirs(self): | ||||
return set(util.finddirs(self._path)) | {'.'} | ||||
def visitdir(self, dir): | ||||
if dir == self._path: | ||||
return self._matcher.visitdir('.') | ||||
if dir.startswith(self._pathprefix): | ||||
return self._matcher.visitdir(dir[len(self._pathprefix):]) | ||||
return dir in self._pathdirs | ||||
spectral
|
r38990 | def visitchildrenset(self, dir): | ||
if dir == self._path: | ||||
return self._matcher.visitchildrenset('.') | ||||
if dir.startswith(self._pathprefix): | ||||
return self._matcher.visitchildrenset(dir[len(self._pathprefix):]) | ||||
if dir in self._pathdirs: | ||||
return 'this' | ||||
Kyle Lippincott
|
r38993 | return set() | ||
spectral
|
r38990 | |||
Yuya Nishihara
|
r38630 | def isexact(self): | ||
return self._matcher.isexact() | ||||
def prefix(self): | ||||
return self._matcher.prefix() | ||||
@encoding.strmethod | ||||
def __repr__(self): | ||||
return ('<prefixdirmatcher path=%r, matcher=%r>' | ||||
% (pycompat.bytestr(self._path), self._matcher)) | ||||
Gregory Szorc
|
r33319 | class unionmatcher(basematcher): | ||
Martin von Zweigbergk
|
r33448 | """A matcher that is the union of several matchers. | ||
Martin von Zweigbergk
|
r41824 | The non-matching-attributes (bad, explicitdir, traversedir) are taken from | ||
the first matcher. | ||||
Martin von Zweigbergk
|
r33448 | """ | ||
Gregory Szorc
|
r33319 | def __init__(self, matchers): | ||
Martin von Zweigbergk
|
r33448 | m1 = matchers[0] | ||
Martin von Zweigbergk
|
r41824 | super(unionmatcher, self).__init__() | ||
Martin von Zweigbergk
|
r33448 | self.explicitdir = m1.explicitdir | ||
self.traversedir = m1.traversedir | ||||
Gregory Szorc
|
r33319 | self._matchers = matchers | ||
Martin von Zweigbergk
|
r33380 | def matchfn(self, f): | ||
Gregory Szorc
|
r33319 | for match in self._matchers: | ||
Martin von Zweigbergk
|
r33380 | if match(f): | ||
Gregory Szorc
|
r33319 | return True | ||
return False | ||||
Martin von Zweigbergk
|
r33448 | def visitdir(self, dir): | ||
r = False | ||||
for m in self._matchers: | ||||
v = m.visitdir(dir) | ||||
if v == 'all': | ||||
return v | ||||
r |= v | ||||
return r | ||||
spectral
|
r38990 | def visitchildrenset(self, dir): | ||
r = set() | ||||
this = False | ||||
for m in self._matchers: | ||||
v = m.visitchildrenset(dir) | ||||
if not v: | ||||
continue | ||||
if v == 'all': | ||||
return v | ||||
if this or v == 'this': | ||||
this = True | ||||
# don't break, we might have an 'all' in here. | ||||
continue | ||||
assert isinstance(v, set) | ||||
r = r.union(v) | ||||
if this: | ||||
return 'this' | ||||
return r | ||||
Pulkit Goyal
|
r36067 | @encoding.strmethod | ||
Gregory Szorc
|
r33319 | def __repr__(self): | ||
return ('<unionmatcher matchers=%r>' % self._matchers) | ||||
Mads Kiilerich
|
r21111 | def patkind(pattern, default=None): | ||
'''If pattern is 'kind:pat' with a known kind, return kind.''' | ||||
return _patsplit(pattern, default)[0] | ||||
Matt Mackall
|
r8570 | |||
Mads Kiilerich
|
r21111 | def _patsplit(pattern, default): | ||
"""Split a string into the optional pattern kind prefix and the actual | ||||
pattern.""" | ||||
if ':' in pattern: | ||||
kind, pat = pattern.split(':', 1) | ||||
Kostia Balytskyi
|
r33647 | if kind in allpatternkinds: | ||
Mads Kiilerich
|
r21111 | return kind, pat | ||
return default, pattern | ||||
Matt Mackall
|
r8570 | |||
Matt Mackall
|
r8582 | def _globre(pat): | ||
Mads Kiilerich
|
r21112 | r'''Convert an extended glob string to a regexp string. | ||
Yuya Nishihara
|
r34139 | >>> from . import pycompat | ||
>>> def bprint(s): | ||||
... print(pycompat.sysstr(s)) | ||||
>>> bprint(_globre(br'?')) | ||||
Mads Kiilerich
|
r21112 | . | ||
Yuya Nishihara
|
r34139 | >>> bprint(_globre(br'*')) | ||
Mads Kiilerich
|
r21112 | [^/]* | ||
Yuya Nishihara
|
r34139 | >>> bprint(_globre(br'**')) | ||
Mads Kiilerich
|
r21112 | .* | ||
Yuya Nishihara
|
r34139 | >>> bprint(_globre(br'**/a')) | ||
Siddharth Agarwal
|
r21815 | (?:.*/)?a | ||
Yuya Nishihara
|
r34139 | >>> bprint(_globre(br'a/**/b')) | ||
Augie Fackler
|
r38494 | a/(?:.*/)?b | ||
Yuya Nishihara
|
r34139 | >>> bprint(_globre(br'[a*?!^][^b][!c]')) | ||
Mads Kiilerich
|
r21112 | [a*?!^][\^b][^c] | ||
Yuya Nishihara
|
r34139 | >>> bprint(_globre(br'{a,b}')) | ||
Mads Kiilerich
|
r21112 | (?:a|b) | ||
Yuya Nishihara
|
r34139 | >>> bprint(_globre(br'.\*\?')) | ||
Mads Kiilerich
|
r21112 | \.\*\? | ||
''' | ||||
Matt Mackall
|
r8570 | i, n = 0, len(pat) | ||
res = '' | ||||
group = 0 | ||||
Boris Feld
|
r40720 | escape = util.stringutil.regexbytesescapemap.get | ||
Matt Mackall
|
r10282 | def peek(): | ||
Pulkit Goyal
|
r31421 | return i < n and pat[i:i + 1] | ||
Matt Mackall
|
r8570 | while i < n: | ||
Pulkit Goyal
|
r31421 | c = pat[i:i + 1] | ||
Matt Mackall
|
r10282 | i += 1 | ||
Matt Mackall
|
r8583 | if c not in '*?[{},\\': | ||
Boris Feld
|
r40720 | res += escape(c, c) | ||
Matt Mackall
|
r8583 | elif c == '*': | ||
Matt Mackall
|
r8570 | if peek() == '*': | ||
i += 1 | ||||
Siddharth Agarwal
|
r21815 | if peek() == '/': | ||
i += 1 | ||||
res += '(?:.*/)?' | ||||
else: | ||||
res += '.*' | ||||
Matt Mackall
|
r8570 | else: | ||
res += '[^/]*' | ||||
elif c == '?': | ||||
res += '.' | ||||
elif c == '[': | ||||
j = i | ||||
Pulkit Goyal
|
r31421 | if j < n and pat[j:j + 1] in '!]': | ||
Matt Mackall
|
r8570 | j += 1 | ||
Pulkit Goyal
|
r31421 | while j < n and pat[j:j + 1] != ']': | ||
Matt Mackall
|
r8570 | j += 1 | ||
if j >= n: | ||||
res += '\\[' | ||||
else: | ||||
stuff = pat[i:j].replace('\\','\\\\') | ||||
i = j + 1 | ||||
Pulkit Goyal
|
r31421 | if stuff[0:1] == '!': | ||
Matt Mackall
|
r8570 | stuff = '^' + stuff[1:] | ||
Pulkit Goyal
|
r31421 | elif stuff[0:1] == '^': | ||
Matt Mackall
|
r8570 | stuff = '\\' + stuff | ||
res = '%s[%s]' % (res, stuff) | ||||
elif c == '{': | ||||
group += 1 | ||||
res += '(?:' | ||||
elif c == '}' and group: | ||||
res += ')' | ||||
group -= 1 | ||||
elif c == ',' and group: | ||||
res += '|' | ||||
elif c == '\\': | ||||
p = peek() | ||||
if p: | ||||
i += 1 | ||||
Boris Feld
|
r40720 | res += escape(p, p) | ||
Matt Mackall
|
r8570 | else: | ||
Boris Feld
|
r40720 | res += escape(c, c) | ||
Matt Mackall
|
r8570 | else: | ||
Boris Feld
|
r40720 | res += escape(c, c) | ||
Matt Mackall
|
r8582 | return res | ||
Matt Mackall
|
r8570 | |||
Mads Kiilerich
|
r21111 | def _regex(kind, pat, globsuffix): | ||
'''Convert a (normalized) pattern of any kind into a regular expression. | ||||
globsuffix is appended to the regexp of globs.''' | ||||
if not pat: | ||||
Matt Mackall
|
r8574 | return '' | ||
if kind == 're': | ||||
Mads Kiilerich
|
r21111 | return pat | ||
Martin von Zweigbergk
|
r33358 | if kind in ('path', 'relpath'): | ||
Matt Harbison
|
r25636 | if pat == '.': | ||
return '' | ||||
Augie Fackler
|
r38494 | return util.stringutil.reescape(pat) + '(?:/|$)' | ||
Rodrigo Damazio Bovendorp
|
r31012 | if kind == 'rootfilesin': | ||
if pat == '.': | ||||
escaped = '' | ||||
else: | ||||
# Pattern is a directory name. | ||||
Augie Fackler
|
r38494 | escaped = util.stringutil.reescape(pat) + '/' | ||
Rodrigo Damazio Bovendorp
|
r31012 | # Anything after the pattern must be a non-directory. | ||
Martin von Zweigbergk
|
r33357 | return escaped + '[^/]+$' | ||
Mads Kiilerich
|
r21111 | if kind == 'relglob': | ||
return '(?:|.*/)' + _globre(pat) + globsuffix | ||||
if kind == 'relre': | ||||
if pat.startswith('^'): | ||||
return pat | ||||
return '.*' + pat | ||||
Valentin Gatien-Baron
|
r41318 | if kind in ('glob', 'rootglob'): | ||
Yuya Nishihara
|
r38597 | return _globre(pat) + globsuffix | ||
raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat)) | ||||
Matt Mackall
|
r8574 | |||
Martin von Zweigbergk
|
r41818 | def _buildmatch(kindpats, globsuffix, root): | ||
Mads Kiilerich
|
r21111 | '''Return regexp string and a matcher function for kindpats. | ||
globsuffix is appended to the regexp of globs.''' | ||||
Durham Goode
|
r25239 | matchfuncs = [] | ||
Durham Goode
|
r25283 | subincludes, kindpats = _expandsubinclude(kindpats, root) | ||
if subincludes: | ||||
Durham Goode
|
r32132 | submatchers = {} | ||
Durham Goode
|
r25283 | def matchsubinclude(f): | ||
Durham Goode
|
r32132 | for prefix, matcherargs in subincludes: | ||
if f.startswith(prefix): | ||||
mf = submatchers.get(prefix) | ||||
if mf is None: | ||||
mf = match(*matcherargs) | ||||
submatchers[prefix] = mf | ||||
if mf(f[len(prefix):]): | ||||
return True | ||||
Durham Goode
|
r25283 | return False | ||
matchfuncs.append(matchsubinclude) | ||||
Matt Mackall
|
r14675 | |||
Durham Goode
|
r25239 | regex = '' | ||
if kindpats: | ||||
Martin von Zweigbergk
|
r40278 | if all(k == 'rootfilesin' for k, p, s in kindpats): | ||
dirs = {p for k, p, s in kindpats} | ||||
def mf(f): | ||||
i = f.rfind('/') | ||||
if i >= 0: | ||||
dir = f[:i] | ||||
else: | ||||
dir = '.' | ||||
return dir in dirs | ||||
Augie Fackler
|
r40381 | regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs))) | ||
Martin von Zweigbergk
|
r40278 | matchfuncs.append(mf) | ||
else: | ||||
regex, mf = _buildregexmatch(kindpats, globsuffix) | ||||
matchfuncs.append(mf) | ||||
Durham Goode
|
r25239 | |||
if len(matchfuncs) == 1: | ||||
return regex, matchfuncs[0] | ||||
else: | ||||
return regex, lambda f: any(mf(f) for mf in matchfuncs) | ||||
Matt Mackall
|
r14675 | |||
Boris Feld
|
r40810 | MAX_RE_SIZE = 20000 | ||
Boris Feld
|
r40812 | def _joinregexes(regexps): | ||
"""gather multiple regular expressions into a single one""" | ||||
Martin von Zweigbergk
|
r40818 | return '|'.join(regexps) | ||
Boris Feld
|
r40812 | |||
Mads Kiilerich
|
r21111 | def _buildregexmatch(kindpats, globsuffix): | ||
"""Build a match function from a list of kinds and kindpats, | ||||
Boris Feld
|
r40811 | return regexp string and a matcher function. | ||
Test too large input | ||||
>>> _buildregexmatch([ | ||||
Augie Fackler
|
r40983 | ... (b'relglob', b'?' * MAX_RE_SIZE, b'') | ||
... ], b'$') | ||||
Boris Feld
|
r40811 | Traceback (most recent call last): | ||
... | ||||
Boris Feld
|
r40814 | Abort: matcher pattern is too long (20009 bytes) | ||
Boris Feld
|
r40811 | """ | ||
Matt Mackall
|
r8574 | try: | ||
Boris Feld
|
r40813 | allgroups = [] | ||
regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats] | ||||
fullregexp = _joinregexes(regexps) | ||||
startidx = 0 | ||||
Martin von Zweigbergk
|
r40818 | groupsize = 0 | ||
Boris Feld
|
r40813 | for idx, r in enumerate(regexps): | ||
piecesize = len(r) | ||||
Martin von Zweigbergk
|
r40818 | if piecesize > MAX_RE_SIZE: | ||
Boris Feld
|
r40814 | msg = _("matcher pattern is too long (%d bytes)") % piecesize | ||
raise error.Abort(msg) | ||||
Martin von Zweigbergk
|
r40816 | elif (groupsize + piecesize) > MAX_RE_SIZE: | ||
Boris Feld
|
r40813 | group = regexps[startidx:idx] | ||
allgroups.append(_joinregexes(group)) | ||||
startidx = idx | ||||
Martin von Zweigbergk
|
r40818 | groupsize = 0 | ||
Boris Feld
|
r40813 | groupsize += piecesize + 1 | ||
if startidx == 0: | ||||
func = _rematcher(fullregexp) | ||||
else: | ||||
group = regexps[startidx:] | ||||
allgroups.append(_joinregexes(group)) | ||||
allmatchers = [_rematcher(g) for g in allgroups] | ||||
func = lambda s: any(m(s) for m in allmatchers) | ||||
return fullregexp, func | ||||
Matt Mackall
|
r8574 | except re.error: | ||
Durham Goode
|
r25213 | for k, p, s in kindpats: | ||
Matt Mackall
|
r8574 | try: | ||
Martin von Zweigbergk
|
r40818 | _rematcher(_regex(k, p, globsuffix)) | ||
Matt Mackall
|
r8574 | except re.error: | ||
Durham Goode
|
r25213 | if s: | ||
Pierre-Yves David
|
r26587 | raise error.Abort(_("%s: invalid pattern (%s): %s") % | ||
Martin von Zweigbergk
|
r40815 | (s, k, p)) | ||
Durham Goode
|
r25213 | else: | ||
Pierre-Yves David
|
r26587 | raise error.Abort(_("invalid pattern (%s): %s") % (k, p)) | ||
raise error.Abort(_("invalid pattern")) | ||||
Matt Mackall
|
r8574 | |||
Rodrigo Damazio Bovendorp
|
r31013 | def _patternrootsanddirs(kindpats): | ||
'''Returns roots and directories corresponding to each pattern. | ||||
Mads Kiilerich
|
r21079 | |||
Rodrigo Damazio Bovendorp
|
r31013 | This calculates the roots and directories exactly matching the patterns and | ||
returns a tuple of (roots, dirs) for each. It does not return other | ||||
directories which may also need to be considered, like the parent | ||||
directories. | ||||
Mads Kiilerich
|
r21079 | ''' | ||
Matt Mackall
|
r8576 | r = [] | ||
Rodrigo Damazio Bovendorp
|
r31013 | d = [] | ||
Durham Goode
|
r25213 | for kind, pat, source in kindpats: | ||
Valentin Gatien-Baron
|
r41318 | if kind in ('glob', 'rootglob'): # find the non-glob prefix | ||
Matt Mackall
|
r8584 | root = [] | ||
Mads Kiilerich
|
r21111 | for p in pat.split('/'): | ||
Matt Mackall
|
r8584 | if '[' in p or '{' in p or '*' in p or '?' in p: | ||
break | ||||
root.append(p) | ||||
r.append('/'.join(root) or '.') | ||||
Rodrigo Damazio Bovendorp
|
r31013 | elif kind in ('relpath', 'path'): | ||
Mads Kiilerich
|
r21111 | r.append(pat or '.') | ||
Rodrigo Damazio Bovendorp
|
r31013 | elif kind in ('rootfilesin',): | ||
d.append(pat or '.') | ||||
Mads Kiilerich
|
r19107 | else: # relglob, re, relre | ||
Matt Mackall
|
r8576 | r.append('.') | ||
Rodrigo Damazio Bovendorp
|
r31013 | return r, d | ||
def _roots(kindpats): | ||||
'''Returns root directories to match recursively from the given patterns.''' | ||||
roots, dirs = _patternrootsanddirs(kindpats) | ||||
return roots | ||||
spectral
|
r38989 | def _rootsdirsandparents(kindpats): | ||
Rodrigo Damazio Bovendorp
|
r31013 | '''Returns roots and exact directories from patterns. | ||
Kyle Lippincott
|
r38992 | `roots` are directories to match recursively, `dirs` should | ||
be matched non-recursively, and `parents` are the implicitly required | ||||
directories to walk to items in either roots or dirs. | ||||
Returns a tuple of (roots, dirs, parents). | ||||
Rodrigo Damazio Bovendorp
|
r31013 | |||
spectral
|
r38989 | >>> _rootsdirsandparents( | ||
Yuya Nishihara
|
r34133 | ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''), | ||
... (b'glob', b'g*', b'')]) | ||||
spectral
|
r38989 | (['g/h', 'g/h', '.'], [], ['g', '.']) | ||
>>> _rootsdirsandparents( | ||||
Yuya Nishihara
|
r34133 | ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')]) | ||
spectral
|
r38989 | ([], ['g/h', '.'], ['g', '.']) | ||
>>> _rootsdirsandparents( | ||||
Yuya Nishihara
|
r34133 | ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''), | ||
... (b'path', b'', b'')]) | ||||
spectral
|
r38989 | (['r', 'p/p', '.'], [], ['p', '.']) | ||
>>> _rootsdirsandparents( | ||||
Yuya Nishihara
|
r34133 | ... [(b'relglob', b'rg*', b''), (b're', b're/', b''), | ||
... (b'relre', b'rr', b'')]) | ||||
spectral
|
r38989 | (['.', '.', '.'], [], ['.']) | ||
Rodrigo Damazio Bovendorp
|
r31013 | ''' | ||
r, d = _patternrootsanddirs(kindpats) | ||||
spectral
|
r38989 | p = [] | ||
Rodrigo Damazio Bovendorp
|
r31013 | # Append the parents as non-recursive/exact directories, since they must be | ||
# scanned to get to either the roots or the other exact directories. | ||||
spectral
|
r38989 | p.extend(util.dirs(d)) | ||
p.extend(util.dirs(r)) | ||||
Martin von Zweigbergk
|
r32176 | # util.dirs() does not include the root directory, so add it manually | ||
spectral
|
r38989 | p.append('.') | ||
Rodrigo Damazio Bovendorp
|
r31013 | |||
Kyle Lippincott
|
r39494 | # FIXME: all uses of this function convert these to sets, do so before | ||
# returning. | ||||
# FIXME: all uses of this function do not need anything in 'roots' and | ||||
# 'dirs' to also be in 'parents', consider removing them before returning. | ||||
spectral
|
r38989 | return r, d, p | ||
Matt Mackall
|
r8576 | |||
Rodrigo Damazio Bovendorp
|
r31012 | def _explicitfiles(kindpats): | ||
'''Returns the potential explicit filenames from the patterns. | ||||
Yuya Nishihara
|
r34133 | >>> _explicitfiles([(b'path', b'foo/bar', b'')]) | ||
Rodrigo Damazio Bovendorp
|
r31012 | ['foo/bar'] | ||
Yuya Nishihara
|
r34133 | >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')]) | ||
Rodrigo Damazio Bovendorp
|
r31012 | [] | ||
''' | ||||
# Keep only the pattern kinds where one can specify filenames (vs only | ||||
# directory names). | ||||
filable = [kp for kp in kindpats if kp[0] not in ('rootfilesin',)] | ||||
return _roots(filable) | ||||
Martin von Zweigbergk
|
r33405 | def _prefix(kindpats): | ||
'''Whether all the patterns match a prefix (i.e. recursively)''' | ||||
Durham Goode
|
r25213 | for kind, pat, source in kindpats: | ||
Martin von Zweigbergk
|
r33405 | if kind not in ('path', 'relpath'): | ||
return False | ||||
return True | ||||
Durham Goode
|
r25167 | |||
_commentre = None | ||||
Laurent Charignon
|
r27595 | def readpatternfile(filepath, warn, sourceinfo=False): | ||
Durham Goode
|
r25167 | '''parse a pattern file, returning a list of | ||
patterns. These patterns should be given to compile() | ||||
Durham Goode
|
r25216 | to be validated and converted into a match function. | ||
trailing white space is dropped. | ||||
the escape character is backslash. | ||||
comments start with #. | ||||
empty lines are skipped. | ||||
lines can be of the following formats: | ||||
syntax: regexp # defaults following lines to non-rooted regexps | ||||
syntax: glob # defaults following lines to non-rooted globs | ||||
re:pattern # non-rooted regular expression | ||||
glob:pattern # non-rooted glob | ||||
Valentin Gatien-Baron
|
r41318 | rootglob:pat # rooted glob (same root as ^ in regexps) | ||
Laurent Charignon
|
r27595 | pattern # pattern of the current default type | ||
if sourceinfo is set, returns a list of tuples: | ||||
(pattern, lineno, originalline). This is useful to debug ignore patterns. | ||||
''' | ||||
Durham Goode
|
r25216 | |||
Boris Feld
|
r40721 | syntaxes = { | ||
're': 'relre:', | ||||
'regexp': 'relre:', | ||||
'glob': 'relglob:', | ||||
Valentin Gatien-Baron
|
r41318 | 'rootglob': 'rootglob:', | ||
Boris Feld
|
r40721 | 'include': 'include', | ||
'subinclude': 'subinclude', | ||||
} | ||||
Durham Goode
|
r25167 | syntax = 'relre:' | ||
patterns = [] | ||||
Rishabh Madan
|
r31403 | fp = open(filepath, 'rb') | ||
Jun Wu
|
r30399 | for lineno, line in enumerate(util.iterfile(fp), start=1): | ||
Durham Goode
|
r25167 | if "#" in line: | ||
global _commentre | ||||
if not _commentre: | ||||
Pulkit Goyal
|
r31420 | _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*') | ||
Durham Goode
|
r25167 | # remove comments prefixed by an even number of escapes | ||
Bryan O'Sullivan
|
r27327 | m = _commentre.search(line) | ||
if m: | ||||
line = line[:m.end(1)] | ||||
Durham Goode
|
r25167 | # fixup properly escaped comments that survived the above | ||
line = line.replace("\\#", "#") | ||||
line = line.rstrip() | ||||
if not line: | ||||
continue | ||||
if line.startswith('syntax:'): | ||||
s = line[7:].strip() | ||||
try: | ||||
syntax = syntaxes[s] | ||||
except KeyError: | ||||
Durham Goode
|
r25214 | if warn: | ||
warn(_("%s: ignoring invalid syntax '%s'\n") % | ||||
(filepath, s)) | ||||
Durham Goode
|
r25167 | continue | ||
linesyntax = syntax | ||||
for s, rels in syntaxes.iteritems(): | ||||
if line.startswith(rels): | ||||
linesyntax = rels | ||||
line = line[len(rels):] | ||||
break | ||||
elif line.startswith(s+':'): | ||||
linesyntax = rels | ||||
line = line[len(s) + 1:] | ||||
break | ||||
Laurent Charignon
|
r27595 | if sourceinfo: | ||
patterns.append((linesyntax + line, lineno, line)) | ||||
else: | ||||
patterns.append(linesyntax + line) | ||||
Durham Goode
|
r25167 | fp.close() | ||
return patterns | ||||