fileset.py
506 lines
| 14.6 KiB
| text/x-python
|
PythonLexer
/ mercurial / fileset.py
Matt Mackall
|
r14511 | # fileset.py - file set queries for mercurial | ||
# | ||||
# Copyright 2010 Matt Mackall <mpm@selenic.com> | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
# GNU General Public License version 2 or any later version. | ||||
Matt Mackall
|
r14682 | import parser, error, util, merge, re | ||
Matt Mackall
|
r14511 | from i18n import _ | ||
elements = { | ||||
"(": (20, ("group", 1, ")"), ("func", 1, ")")), | ||||
"-": (5, ("negate", 19), ("minus", 5)), | ||||
"not": (10, ("not", 10)), | ||||
"!": (10, ("not", 10)), | ||||
"and": (5, None, ("and", 5)), | ||||
"&": (5, None, ("and", 5)), | ||||
"or": (4, None, ("or", 4)), | ||||
"|": (4, None, ("or", 4)), | ||||
"+": (4, None, ("or", 4)), | ||||
",": (2, None, ("list", 2)), | ||||
")": (0, None, None), | ||||
"symbol": (0, ("symbol",), None), | ||||
"string": (0, ("string",), None), | ||||
"end": (0, None, None), | ||||
} | ||||
keywords = set(['and', 'or', 'not']) | ||||
Matt Mackall
|
r19470 | globchars = ".*{}[]?/\\_" | ||
Matt Mackall
|
r14551 | |||
Matt Mackall
|
r14511 | def tokenize(program): | ||
pos, l = 0, len(program) | ||||
while pos < l: | ||||
c = program[pos] | ||||
if c.isspace(): # skip inter-token whitespace | ||||
pass | ||||
elif c in "(),-|&+!": # handle simple operators | ||||
yield (c, None, pos) | ||||
elif (c in '"\'' or c == 'r' and | ||||
program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings | ||||
if c == 'r': | ||||
pos += 1 | ||||
c = program[pos] | ||||
decode = lambda x: x | ||||
else: | ||||
decode = lambda x: x.decode('string-escape') | ||||
pos += 1 | ||||
s = pos | ||||
while pos < l: # find closing quote | ||||
d = program[pos] | ||||
if d == '\\': # skip over escaped characters | ||||
pos += 2 | ||||
continue | ||||
if d == c: | ||||
yield ('string', decode(program[s:pos]), s) | ||||
break | ||||
pos += 1 | ||||
else: | ||||
raise error.ParseError(_("unterminated string"), s) | ||||
Matt Mackall
|
r14551 | elif c.isalnum() or c in globchars or ord(c) > 127: | ||
Matt Mackall
|
r14513 | # gather up a symbol/keyword | ||
Matt Mackall
|
r14511 | s = pos | ||
pos += 1 | ||||
while pos < l: # find end of symbol | ||||
d = program[pos] | ||||
Matt Mackall
|
r14551 | if not (d.isalnum() or d in globchars or ord(d) > 127): | ||
Matt Mackall
|
r14511 | break | ||
pos += 1 | ||||
sym = program[s:pos] | ||||
if sym in keywords: # operator keywords | ||||
yield (sym, None, s) | ||||
else: | ||||
yield ('symbol', sym, s) | ||||
pos -= 1 | ||||
else: | ||||
raise error.ParseError(_("syntax error"), pos) | ||||
pos += 1 | ||||
yield ('end', None, pos) | ||||
Yuya Nishihara
|
r20208 | def parse(expr): | ||
p = parser.parser(tokenize, elements) | ||||
return p.parse(expr) | ||||
Matt Mackall
|
r14511 | |||
Matt Mackall
|
r14551 | def getstring(x, err): | ||
if x and (x[0] == 'string' or x[0] == 'symbol'): | ||||
return x[1] | ||||
raise error.ParseError(err) | ||||
def getset(mctx, x): | ||||
if not x: | ||||
raise error.ParseError(_("missing argument")) | ||||
return methods[x[0]](mctx, *x[1:]) | ||||
def stringset(mctx, x): | ||||
m = mctx.matcher([x]) | ||||
return [f for f in mctx.subset if m(f)] | ||||
def andset(mctx, x, y): | ||||
return getset(mctx.narrow(getset(mctx, x)), y) | ||||
def orset(mctx, x, y): | ||||
# needs optimizing | ||||
xl = getset(mctx, x) | ||||
yl = getset(mctx, y) | ||||
return xl + [f for f in yl if f not in xl] | ||||
def notset(mctx, x): | ||||
s = set(getset(mctx, x)) | ||||
return [r for r in mctx.subset if r not in s] | ||||
Patrick Mezard
|
r17363 | def minusset(mctx, x, y): | ||
xl = getset(mctx, x) | ||||
yl = set(getset(mctx, y)) | ||||
return [f for f in xl if f not in yl] | ||||
Matt Mackall
|
r14551 | def listset(mctx, a, b): | ||
raise error.ParseError(_("can't use a list in this context")) | ||||
Matt Mackall
|
r14677 | def modified(mctx, x): | ||
Matt Mackall
|
r14681 | """``modified()`` | ||
File that is modified according to status. | ||||
""" | ||||
Wagner Bruna
|
r14785 | # i18n: "modified" is a keyword | ||
Matt Mackall
|
r14677 | getargs(x, 0, 0, _("modified takes no arguments")) | ||
s = mctx.status()[0] | ||||
return [f for f in mctx.subset if f in s] | ||||
def added(mctx, x): | ||||
Matt Mackall
|
r14681 | """``added()`` | ||
File that is added according to status. | ||||
""" | ||||
Wagner Bruna
|
r14785 | # i18n: "added" is a keyword | ||
Matt Mackall
|
r14677 | getargs(x, 0, 0, _("added takes no arguments")) | ||
s = mctx.status()[1] | ||||
return [f for f in mctx.subset if f in s] | ||||
def removed(mctx, x): | ||||
Matt Mackall
|
r14681 | """``removed()`` | ||
File that is removed according to status. | ||||
""" | ||||
Wagner Bruna
|
r14785 | # i18n: "removed" is a keyword | ||
Matt Mackall
|
r14677 | getargs(x, 0, 0, _("removed takes no arguments")) | ||
s = mctx.status()[2] | ||||
return [f for f in mctx.subset if f in s] | ||||
def deleted(mctx, x): | ||||
Matt Mackall
|
r14681 | """``deleted()`` | ||
File that is deleted according to status. | ||||
""" | ||||
Wagner Bruna
|
r14785 | # i18n: "deleted" is a keyword | ||
Matt Mackall
|
r14677 | getargs(x, 0, 0, _("deleted takes no arguments")) | ||
s = mctx.status()[3] | ||||
return [f for f in mctx.subset if f in s] | ||||
def unknown(mctx, x): | ||||
Matt Mackall
|
r14681 | """``unknown()`` | ||
File that is unknown according to status. These files will only be | ||||
considered if this predicate is used. | ||||
""" | ||||
Wagner Bruna
|
r14785 | # i18n: "unknown" is a keyword | ||
Matt Mackall
|
r14677 | getargs(x, 0, 0, _("unknown takes no arguments")) | ||
s = mctx.status()[4] | ||||
return [f for f in mctx.subset if f in s] | ||||
def ignored(mctx, x): | ||||
Matt Mackall
|
r14681 | """``ignored()`` | ||
File that is ignored according to status. These files will only be | ||||
considered if this predicate is used. | ||||
""" | ||||
Wagner Bruna
|
r14785 | # i18n: "ignored" is a keyword | ||
Matt Mackall
|
r14677 | getargs(x, 0, 0, _("ignored takes no arguments")) | ||
s = mctx.status()[5] | ||||
return [f for f in mctx.subset if f in s] | ||||
def clean(mctx, x): | ||||
Matt Mackall
|
r14681 | """``clean()`` | ||
File that is clean according to status. | ||||
""" | ||||
Wagner Bruna
|
r14785 | # i18n: "clean" is a keyword | ||
Matt Mackall
|
r14677 | getargs(x, 0, 0, _("clean takes no arguments")) | ||
s = mctx.status()[6] | ||||
return [f for f in mctx.subset if f in s] | ||||
Matt Mackall
|
r14676 | def func(mctx, a, b): | ||
if a[0] == 'symbol' and a[1] in symbols: | ||||
return symbols[a[1]](mctx, b) | ||||
raise error.ParseError(_("not a function: %s") % a[1]) | ||||
def getlist(x): | ||||
if not x: | ||||
return [] | ||||
if x[0] == 'list': | ||||
return getlist(x[1]) + [x[2]] | ||||
return [x] | ||||
def getargs(x, min, max, err): | ||||
l = getlist(x) | ||||
if len(l) < min or len(l) > max: | ||||
raise error.ParseError(err) | ||||
return l | ||||
def binary(mctx, x): | ||||
Matt Mackall
|
r14681 | """``binary()`` | ||
Idan Kamara
|
r14830 | File that appears to be binary (contains NUL bytes). | ||
Matt Mackall
|
r14681 | """ | ||
Wagner Bruna
|
r14785 | # i18n: "binary" is a keyword | ||
Matt Mackall
|
r14676 | getargs(x, 0, 0, _("binary takes no arguments")) | ||
Matt Mackall
|
r15963 | return [f for f in mctx.existing() if util.binary(mctx.ctx[f].data())] | ||
Matt Mackall
|
r14676 | |||
def exec_(mctx, x): | ||||
Matt Mackall
|
r14681 | """``exec()`` | ||
File that is marked as executable. | ||||
""" | ||||
Wagner Bruna
|
r14785 | # i18n: "exec" is a keyword | ||
Matt Mackall
|
r14676 | getargs(x, 0, 0, _("exec takes no arguments")) | ||
Matt Mackall
|
r15963 | return [f for f in mctx.existing() if mctx.ctx.flags(f) == 'x'] | ||
Matt Mackall
|
r14676 | |||
def symlink(mctx, x): | ||||
Matt Mackall
|
r14681 | """``symlink()`` | ||
File that is marked as a symlink. | ||||
""" | ||||
Wagner Bruna
|
r14785 | # i18n: "symlink" is a keyword | ||
Matt Mackall
|
r14676 | getargs(x, 0, 0, _("symlink takes no arguments")) | ||
Matt Mackall
|
r15963 | return [f for f in mctx.existing() if mctx.ctx.flags(f) == 'l'] | ||
Matt Mackall
|
r14676 | |||
Matt Mackall
|
r14679 | def resolved(mctx, x): | ||
Matt Mackall
|
r14681 | """``resolved()`` | ||
File that is marked resolved according to the resolve state. | ||||
""" | ||||
Wagner Bruna
|
r14785 | # i18n: "resolved" is a keyword | ||
Matt Mackall
|
r14679 | getargs(x, 0, 0, _("resolved takes no arguments")) | ||
if mctx.ctx.rev() is not None: | ||||
return [] | ||||
ms = merge.mergestate(mctx.ctx._repo) | ||||
return [f for f in mctx.subset if f in ms and ms[f] == 'r'] | ||||
def unresolved(mctx, x): | ||||
Matt Mackall
|
r14681 | """``unresolved()`` | ||
File that is marked unresolved according to the resolve state. | ||||
""" | ||||
Wagner Bruna
|
r14785 | # i18n: "unresolved" is a keyword | ||
Matt Mackall
|
r14679 | getargs(x, 0, 0, _("unresolved takes no arguments")) | ||
if mctx.ctx.rev() is not None: | ||||
return [] | ||||
ms = merge.mergestate(mctx.ctx._repo) | ||||
return [f for f in mctx.subset if f in ms and ms[f] == 'u'] | ||||
Matt Mackall
|
r14680 | def hgignore(mctx, x): | ||
Ollie Rutherfurd
|
r14700 | """``hgignore()`` | ||
Matt Mackall
|
r14681 | File that matches the active .hgignore pattern. | ||
""" | ||||
Matt Mackall
|
r14680 | getargs(x, 0, 0, _("hgignore takes no arguments")) | ||
ignore = mctx.ctx._repo.dirstate._ignore | ||||
return [f for f in mctx.subset if ignore(f)] | ||||
Matt Mackall
|
r14682 | def grep(mctx, x): | ||
"""``grep(regex)`` | ||||
File contains the given regular expression. | ||||
""" | ||||
Patrick Mezard
|
r17368 | try: | ||
# i18n: "grep" is a keyword | ||||
r = re.compile(getstring(x, _("grep requires a pattern"))) | ||||
except re.error, e: | ||||
raise error.ParseError(_('invalid match pattern: %s') % e) | ||||
Matt Mackall
|
r15963 | return [f for f in mctx.existing() if r.search(mctx.ctx[f].data())] | ||
Matt Mackall
|
r14682 | |||
Matt Mackall
|
r14683 | def _sizetomax(s): | ||
try: | ||||
s = s.strip() | ||||
Bryan O'Sullivan
|
r19194 | for k, v in util._sizeunits: | ||
Matt Mackall
|
r14683 | if s.endswith(k): | ||
# max(4k) = 5k - 1, max(4.5k) = 4.6k - 1 | ||||
n = s[:-len(k)] | ||||
inc = 1.0 | ||||
if "." in n: | ||||
inc /= 10 ** len(n.split(".")[1]) | ||||
return int((float(n) + inc) * v) - 1 | ||||
# no extension, this is a precise value | ||||
return int(s) | ||||
except ValueError: | ||||
Mads Kiilerich
|
r14716 | raise error.ParseError(_("couldn't parse size: %s") % s) | ||
Matt Mackall
|
r14683 | |||
def size(mctx, x): | ||||
"""``size(expression)`` | ||||
File size matches the given expression. Examples: | ||||
- 1k (files from 1024 to 2047 bytes) | ||||
- < 20k (files less than 20480 bytes) | ||||
Matt Mackall
|
r14689 | - >= .5MB (files at least 524288 bytes) | ||
Matt Mackall
|
r14683 | - 4k - 1MB (files from 4096 bytes to 1048576 bytes) | ||
""" | ||||
Wagner Bruna
|
r14785 | # i18n: "size" is a keyword | ||
Mads Kiilerich
|
r14717 | expr = getstring(x, _("size requires an expression")).strip() | ||
Matt Mackall
|
r14683 | if '-' in expr: # do we have a range? | ||
a, b = expr.split('-', 1) | ||||
Bryan O'Sullivan
|
r19194 | a = util.sizetoint(a) | ||
b = util.sizetoint(b) | ||||
Matt Mackall
|
r14683 | m = lambda x: x >= a and x <= b | ||
elif expr.startswith("<="): | ||||
Bryan O'Sullivan
|
r19194 | a = util.sizetoint(expr[2:]) | ||
Matt Mackall
|
r14683 | m = lambda x: x <= a | ||
elif expr.startswith("<"): | ||||
Bryan O'Sullivan
|
r19194 | a = util.sizetoint(expr[1:]) | ||
Matt Mackall
|
r14683 | m = lambda x: x < a | ||
elif expr.startswith(">="): | ||||
Bryan O'Sullivan
|
r19194 | a = util.sizetoint(expr[2:]) | ||
Matt Mackall
|
r14683 | m = lambda x: x >= a | ||
elif expr.startswith(">"): | ||||
Bryan O'Sullivan
|
r19194 | a = util.sizetoint(expr[1:]) | ||
Matt Mackall
|
r14683 | m = lambda x: x > a | ||
elif expr[0].isdigit or expr[0] == '.': | ||||
Bryan O'Sullivan
|
r19194 | a = util.sizetoint(expr) | ||
Matt Mackall
|
r14683 | b = _sizetomax(expr) | ||
Thomas Arendsen Hein
|
r14690 | m = lambda x: x >= a and x <= b | ||
Matt Mackall
|
r14683 | else: | ||
Mads Kiilerich
|
r14716 | raise error.ParseError(_("couldn't parse size: %s") % expr) | ||
Matt Mackall
|
r14683 | |||
Matt Mackall
|
r15963 | return [f for f in mctx.existing() if m(mctx.ctx[f].size())] | ||
Matt Mackall
|
r14683 | |||
Matt Mackall
|
r14684 | def encoding(mctx, x): | ||
"""``encoding(name)`` | ||||
File can be successfully decoded with the given character | ||||
encoding. May not be useful for encodings other than ASCII and | ||||
UTF-8. | ||||
""" | ||||
Wagner Bruna
|
r14785 | # i18n: "encoding" is a keyword | ||
Matt Mackall
|
r14684 | enc = getstring(x, _("encoding requires an encoding name")) | ||
s = [] | ||||
Matt Mackall
|
r15963 | for f in mctx.existing(): | ||
Matt Mackall
|
r14684 | d = mctx.ctx[f].data() | ||
try: | ||||
d.decode(enc) | ||||
except LookupError: | ||||
raise util.Abort(_("unknown encoding '%s'") % enc) | ||||
except UnicodeDecodeError: | ||||
continue | ||||
s.append(f) | ||||
return s | ||||
Matt Mackall
|
r18842 | def eol(mctx, x): | ||
"""``eol(style)`` | ||||
File contains newlines of the given style (dos, unix, mac). Binary | ||||
files are excluded, files with mixed line endings match multiple | ||||
styles. | ||||
""" | ||||
# i18n: "encoding" is a keyword | ||||
enc = getstring(x, _("encoding requires an encoding name")) | ||||
s = [] | ||||
for f in mctx.existing(): | ||||
d = mctx.ctx[f].data() | ||||
if util.binary(d): | ||||
continue | ||||
if (enc == 'dos' or enc == 'win') and '\r\n' in d: | ||||
s.append(f) | ||||
elif enc == 'unix' and re.search('(?<!\r)\n', d): | ||||
s.append(f) | ||||
elif enc == 'mac' and re.search('\r(?!\n)', d): | ||||
s.append(f) | ||||
return s | ||||
Matt Mackall
|
r14685 | def copied(mctx, x): | ||
"""``copied()`` | ||||
File that is recorded as being copied. | ||||
""" | ||||
Wagner Bruna
|
r14785 | # i18n: "copied" is a keyword | ||
Mads Kiilerich
|
r14718 | getargs(x, 0, 0, _("copied takes no arguments")) | ||
Matt Mackall
|
r14685 | s = [] | ||
for f in mctx.subset: | ||||
p = mctx.ctx[f].parents() | ||||
if p and p[0].path() != f: | ||||
s.append(f) | ||||
return s | ||||
Angel Ezquerra
|
r16443 | def subrepo(mctx, x): | ||
"""``subrepo([pattern])`` | ||||
Subrepositories whose paths match the given pattern. | ||||
""" | ||||
# i18n: "subrepo" is a keyword | ||||
getargs(x, 0, 1, _("subrepo takes at most one argument")) | ||||
ctx = mctx.ctx | ||||
Mads Kiilerich
|
r18364 | sstate = sorted(ctx.substate) | ||
Angel Ezquerra
|
r16443 | if x: | ||
pat = getstring(x, _("subrepo requires a pattern or no arguments")) | ||||
import match as matchmod # avoid circular import issues | ||||
fast = not matchmod.patkind(pat) | ||||
if fast: | ||||
def m(s): | ||||
return (s == pat) | ||||
else: | ||||
m = matchmod.match(ctx._repo.root, '', [pat], ctx=ctx) | ||||
return [sub for sub in sstate if m(sub)] | ||||
else: | ||||
return [sub for sub in sstate] | ||||
Matt Mackall
|
r14676 | symbols = { | ||
Matt Mackall
|
r14677 | 'added': added, | ||
Matt Mackall
|
r14676 | 'binary': binary, | ||
Matt Mackall
|
r14677 | 'clean': clean, | ||
Matt Mackall
|
r14685 | 'copied': copied, | ||
Matt Mackall
|
r14677 | 'deleted': deleted, | ||
Matt Mackall
|
r14684 | 'encoding': encoding, | ||
Matt Mackall
|
r18842 | 'eol': eol, | ||
Matt Mackall
|
r14676 | 'exec': exec_, | ||
Matt Mackall
|
r14682 | 'grep': grep, | ||
Matt Mackall
|
r14677 | 'ignored': ignored, | ||
Matt Mackall
|
r14680 | 'hgignore': hgignore, | ||
Matt Mackall
|
r14677 | 'modified': modified, | ||
'removed': removed, | ||||
Matt Mackall
|
r14679 | 'resolved': resolved, | ||
Matt Mackall
|
r14683 | 'size': size, | ||
Matt Mackall
|
r14676 | 'symlink': symlink, | ||
Matt Mackall
|
r14677 | 'unknown': unknown, | ||
Matt Mackall
|
r14679 | 'unresolved': unresolved, | ||
Angel Ezquerra
|
r16443 | 'subrepo': subrepo, | ||
Matt Mackall
|
r14676 | } | ||
Matt Mackall
|
r14551 | methods = { | ||
'string': stringset, | ||||
'symbol': stringset, | ||||
'and': andset, | ||||
'or': orset, | ||||
Patrick Mezard
|
r17363 | 'minus': minusset, | ||
Matt Mackall
|
r14551 | 'list': listset, | ||
'group': getset, | ||||
Matt Mackall
|
r14676 | 'not': notset, | ||
'func': func, | ||||
Matt Mackall
|
r14551 | } | ||
class matchctx(object): | ||||
Matt Mackall
|
r14677 | def __init__(self, ctx, subset=None, status=None): | ||
Matt Mackall
|
r14551 | self.ctx = ctx | ||
self.subset = subset | ||||
Matt Mackall
|
r14677 | self._status = status | ||
def status(self): | ||||
return self._status | ||||
Matt Mackall
|
r14673 | def matcher(self, patterns): | ||
return self.ctx.match(patterns) | ||||
Matt Mackall
|
r14551 | def filter(self, files): | ||
return [f for f in files if f in self.subset] | ||||
Matt Mackall
|
r15963 | def existing(self): | ||
Patrick Mezard
|
r17365 | if self._status is not None: | ||
removed = set(self._status[3]) | ||||
Patrick Mezard
|
r17367 | unknown = set(self._status[4] + self._status[5]) | ||
Patrick Mezard
|
r17365 | else: | ||
removed = set() | ||||
Patrick Mezard
|
r17366 | unknown = set() | ||
Patrick Mezard
|
r17365 | return (f for f in self.subset | ||
Patrick Mezard
|
r17366 | if (f in self.ctx and f not in removed) or f in unknown) | ||
Matt Mackall
|
r14551 | def narrow(self, files): | ||
Matt Mackall
|
r14677 | return matchctx(self.ctx, self.filter(files), self._status) | ||
Matt Mackall
|
r14551 | |||
Matt Mackall
|
r14678 | def _intree(funcs, tree): | ||
if isinstance(tree, tuple): | ||||
if tree[0] == 'func' and tree[1][0] == 'symbol': | ||||
if tree[1][1] in funcs: | ||||
return True | ||||
for s in tree[1:]: | ||||
if _intree(funcs, s): | ||||
return True | ||||
return False | ||||
Patrick Mezard
|
r17365 | # filesets using matchctx.existing() | ||
_existingcallers = [ | ||||
'binary', | ||||
'exec', | ||||
'grep', | ||||
'size', | ||||
'symlink', | ||||
] | ||||
Matt Mackall
|
r14673 | def getfileset(ctx, expr): | ||
Matt Mackall
|
r14551 | tree, pos = parse(expr) | ||
if (pos != len(expr)): | ||||
Mads Kiilerich
|
r14701 | raise error.ParseError(_("invalid token"), pos) | ||
Matt Mackall
|
r14678 | |||
# do we need status info? | ||||
Patrick Mezard
|
r17365 | if (_intree(['modified', 'added', 'removed', 'deleted', | ||
'unknown', 'ignored', 'clean'], tree) or | ||||
# Using matchctx.existing() on a workingctx requires us to check | ||||
# for deleted files. | ||||
(ctx.rev() is None and _intree(_existingcallers, tree))): | ||||
Matt Mackall
|
r14678 | unknown = _intree(['unknown'], tree) | ||
ignored = _intree(['ignored'], tree) | ||||
r = ctx._repo | ||||
status = r.status(ctx.p1(), ctx, | ||||
unknown=unknown, ignored=ignored, clean=True) | ||||
subset = [] | ||||
for c in status: | ||||
subset.extend(c) | ||||
else: | ||||
status = None | ||||
Patrick Mezard
|
r17371 | subset = list(ctx.walk(ctx.match([]))) | ||
Matt Mackall
|
r14678 | |||
return getset(matchctx(ctx, subset, status), tree) | ||||
Matt Mackall
|
r14681 | |||
# tell hggettext to extract docstrings from these functions: | ||||
i18nfunctions = symbols.values() | ||||