fileset.py
426 lines
| 12.0 KiB
| text/x-python
|
PythonLexer
/ mercurial / fileset.py
Matt Mackall
|
r14511 | # fileset.py - file set queries for mercurial | ||
# | ||||
# Copyright 2010 Matt Mackall <mpm@selenic.com> | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
# GNU General Public License version 2 or any later version. | ||||
Matt Mackall
|
r14682 | import parser, error, util, merge, re | ||
Matt Mackall
|
r14511 | from i18n import _ | ||
elements = { | ||||
"(": (20, ("group", 1, ")"), ("func", 1, ")")), | ||||
"-": (5, ("negate", 19), ("minus", 5)), | ||||
"not": (10, ("not", 10)), | ||||
"!": (10, ("not", 10)), | ||||
"and": (5, None, ("and", 5)), | ||||
"&": (5, None, ("and", 5)), | ||||
"or": (4, None, ("or", 4)), | ||||
"|": (4, None, ("or", 4)), | ||||
"+": (4, None, ("or", 4)), | ||||
",": (2, None, ("list", 2)), | ||||
")": (0, None, None), | ||||
"symbol": (0, ("symbol",), None), | ||||
"string": (0, ("string",), None), | ||||
"end": (0, None, None), | ||||
} | ||||
keywords = set(['and', 'or', 'not']) | ||||
Matt Mackall
|
r14551 | globchars = ".*{}[]?/\\" | ||
Matt Mackall
|
r14511 | def tokenize(program): | ||
pos, l = 0, len(program) | ||||
while pos < l: | ||||
c = program[pos] | ||||
if c.isspace(): # skip inter-token whitespace | ||||
pass | ||||
elif c in "(),-|&+!": # handle simple operators | ||||
yield (c, None, pos) | ||||
elif (c in '"\'' or c == 'r' and | ||||
program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings | ||||
if c == 'r': | ||||
pos += 1 | ||||
c = program[pos] | ||||
decode = lambda x: x | ||||
else: | ||||
decode = lambda x: x.decode('string-escape') | ||||
pos += 1 | ||||
s = pos | ||||
while pos < l: # find closing quote | ||||
d = program[pos] | ||||
if d == '\\': # skip over escaped characters | ||||
pos += 2 | ||||
continue | ||||
if d == c: | ||||
yield ('string', decode(program[s:pos]), s) | ||||
break | ||||
pos += 1 | ||||
else: | ||||
raise error.ParseError(_("unterminated string"), s) | ||||
Matt Mackall
|
r14551 | elif c.isalnum() or c in globchars or ord(c) > 127: | ||
Matt Mackall
|
r14513 | # gather up a symbol/keyword | ||
Matt Mackall
|
r14511 | s = pos | ||
pos += 1 | ||||
while pos < l: # find end of symbol | ||||
d = program[pos] | ||||
Matt Mackall
|
r14551 | if not (d.isalnum() or d in globchars or ord(d) > 127): | ||
Matt Mackall
|
r14511 | break | ||
pos += 1 | ||||
sym = program[s:pos] | ||||
if sym in keywords: # operator keywords | ||||
yield (sym, None, s) | ||||
else: | ||||
yield ('symbol', sym, s) | ||||
pos -= 1 | ||||
else: | ||||
raise error.ParseError(_("syntax error"), pos) | ||||
pos += 1 | ||||
yield ('end', None, pos) | ||||
parse = parser.parser(tokenize, elements).parse | ||||
Matt Mackall
|
r14551 | def getstring(x, err): | ||
if x and (x[0] == 'string' or x[0] == 'symbol'): | ||||
return x[1] | ||||
raise error.ParseError(err) | ||||
def getset(mctx, x): | ||||
if not x: | ||||
raise error.ParseError(_("missing argument")) | ||||
return methods[x[0]](mctx, *x[1:]) | ||||
def stringset(mctx, x): | ||||
m = mctx.matcher([x]) | ||||
return [f for f in mctx.subset if m(f)] | ||||
def andset(mctx, x, y): | ||||
return getset(mctx.narrow(getset(mctx, x)), y) | ||||
def orset(mctx, x, y): | ||||
# needs optimizing | ||||
xl = getset(mctx, x) | ||||
yl = getset(mctx, y) | ||||
return xl + [f for f in yl if f not in xl] | ||||
def notset(mctx, x): | ||||
s = set(getset(mctx, x)) | ||||
return [r for r in mctx.subset if r not in s] | ||||
def listset(mctx, a, b): | ||||
raise error.ParseError(_("can't use a list in this context")) | ||||
Matt Mackall
|
r14677 | def modified(mctx, x): | ||
Matt Mackall
|
r14681 | """``modified()`` | ||
File that is modified according to status. | ||||
""" | ||||
Matt Mackall
|
r14677 | getargs(x, 0, 0, _("modified takes no arguments")) | ||
s = mctx.status()[0] | ||||
return [f for f in mctx.subset if f in s] | ||||
def added(mctx, x): | ||||
Matt Mackall
|
r14681 | """``added()`` | ||
File that is added according to status. | ||||
""" | ||||
Matt Mackall
|
r14677 | getargs(x, 0, 0, _("added takes no arguments")) | ||
s = mctx.status()[1] | ||||
return [f for f in mctx.subset if f in s] | ||||
def removed(mctx, x): | ||||
Matt Mackall
|
r14681 | """``removed()`` | ||
File that is removed according to status. | ||||
""" | ||||
Matt Mackall
|
r14677 | getargs(x, 0, 0, _("removed takes no arguments")) | ||
s = mctx.status()[2] | ||||
return [f for f in mctx.subset if f in s] | ||||
def deleted(mctx, x): | ||||
Matt Mackall
|
r14681 | """``deleted()`` | ||
File that is deleted according to status. | ||||
""" | ||||
Matt Mackall
|
r14677 | getargs(x, 0, 0, _("deleted takes no arguments")) | ||
s = mctx.status()[3] | ||||
return [f for f in mctx.subset if f in s] | ||||
def unknown(mctx, x): | ||||
Matt Mackall
|
r14681 | """``unknown()`` | ||
File that is unknown according to status. These files will only be | ||||
considered if this predicate is used. | ||||
""" | ||||
Matt Mackall
|
r14677 | getargs(x, 0, 0, _("unknown takes no arguments")) | ||
s = mctx.status()[4] | ||||
return [f for f in mctx.subset if f in s] | ||||
def ignored(mctx, x): | ||||
Matt Mackall
|
r14681 | """``ignored()`` | ||
File that is ignored according to status. These files will only be | ||||
considered if this predicate is used. | ||||
""" | ||||
Matt Mackall
|
r14677 | getargs(x, 0, 0, _("ignored takes no arguments")) | ||
s = mctx.status()[5] | ||||
return [f for f in mctx.subset if f in s] | ||||
def clean(mctx, x): | ||||
Matt Mackall
|
r14681 | """``clean()`` | ||
File that is clean according to status. | ||||
""" | ||||
Matt Mackall
|
r14677 | getargs(x, 0, 0, _("clean takes no arguments")) | ||
s = mctx.status()[6] | ||||
return [f for f in mctx.subset if f in s] | ||||
Matt Mackall
|
r14676 | def func(mctx, a, b): | ||
if a[0] == 'symbol' and a[1] in symbols: | ||||
return symbols[a[1]](mctx, b) | ||||
raise error.ParseError(_("not a function: %s") % a[1]) | ||||
def getlist(x): | ||||
if not x: | ||||
return [] | ||||
if x[0] == 'list': | ||||
return getlist(x[1]) + [x[2]] | ||||
return [x] | ||||
def getargs(x, min, max, err): | ||||
l = getlist(x) | ||||
if len(l) < min or len(l) > max: | ||||
raise error.ParseError(err) | ||||
return l | ||||
def binary(mctx, x): | ||||
Matt Mackall
|
r14681 | """``binary()`` | ||
File that appears to be binary (contails NUL bytes). | ||||
""" | ||||
Matt Mackall
|
r14676 | getargs(x, 0, 0, _("binary takes no arguments")) | ||
return [f for f in mctx.subset if util.binary(mctx.ctx[f].data())] | ||||
def exec_(mctx, x): | ||||
Matt Mackall
|
r14681 | """``exec()`` | ||
File that is marked as executable. | ||||
""" | ||||
Matt Mackall
|
r14676 | getargs(x, 0, 0, _("exec takes no arguments")) | ||
return [f for f in mctx.subset if mctx.ctx.flags(f) == 'x'] | ||||
def symlink(mctx, x): | ||||
Matt Mackall
|
r14681 | """``symlink()`` | ||
File that is marked as a symlink. | ||||
""" | ||||
Matt Mackall
|
r14676 | getargs(x, 0, 0, _("symlink takes no arguments")) | ||
return [f for f in mctx.subset if mctx.ctx.flags(f) == 'l'] | ||||
Matt Mackall
|
r14679 | def resolved(mctx, x): | ||
Matt Mackall
|
r14681 | """``resolved()`` | ||
File that is marked resolved according to the resolve state. | ||||
""" | ||||
Matt Mackall
|
r14679 | getargs(x, 0, 0, _("resolved takes no arguments")) | ||
if mctx.ctx.rev() is not None: | ||||
return [] | ||||
ms = merge.mergestate(mctx.ctx._repo) | ||||
return [f for f in mctx.subset if f in ms and ms[f] == 'r'] | ||||
def unresolved(mctx, x): | ||||
Matt Mackall
|
r14681 | """``unresolved()`` | ||
File that is marked unresolved according to the resolve state. | ||||
""" | ||||
Matt Mackall
|
r14679 | getargs(x, 0, 0, _("unresolved takes no arguments")) | ||
if mctx.ctx.rev() is not None: | ||||
return [] | ||||
ms = merge.mergestate(mctx.ctx._repo) | ||||
return [f for f in mctx.subset if f in ms and ms[f] == 'u'] | ||||
Matt Mackall
|
r14680 | def hgignore(mctx, x): | ||
Ollie Rutherfurd
|
r14700 | """``hgignore()`` | ||
Matt Mackall
|
r14681 | File that matches the active .hgignore pattern. | ||
""" | ||||
Matt Mackall
|
r14680 | getargs(x, 0, 0, _("hgignore takes no arguments")) | ||
ignore = mctx.ctx._repo.dirstate._ignore | ||||
return [f for f in mctx.subset if ignore(f)] | ||||
Matt Mackall
|
r14682 | def grep(mctx, x): | ||
"""``grep(regex)`` | ||||
File contains the given regular expression. | ||||
""" | ||||
pat = getstring(x, _("grep requires a pattern")) | ||||
r = re.compile(pat) | ||||
return [f for f in mctx.subset if r.search(mctx.ctx[f].data())] | ||||
Matt Mackall
|
r14683 | _units = dict(k=2**10, K=2**10, kB=2**10, KB=2**10, | ||
Matt Mackall
|
r14689 | M=2**20, MB=2**20, G=2**30, GB=2**30) | ||
Matt Mackall
|
r14683 | |||
def _sizetoint(s): | ||||
try: | ||||
s = s.strip() | ||||
for k, v in _units.items(): | ||||
if s.endswith(k): | ||||
return int(float(s[:-len(k)]) * v) | ||||
return int(s) | ||||
except ValueError: | ||||
raise | ||||
raise error.ParseError(_("couldn't parse size"), s) | ||||
def _sizetomax(s): | ||||
try: | ||||
s = s.strip() | ||||
for k, v in _units.items(): | ||||
if s.endswith(k): | ||||
# max(4k) = 5k - 1, max(4.5k) = 4.6k - 1 | ||||
n = s[:-len(k)] | ||||
inc = 1.0 | ||||
if "." in n: | ||||
inc /= 10 ** len(n.split(".")[1]) | ||||
return int((float(n) + inc) * v) - 1 | ||||
# no extension, this is a precise value | ||||
return int(s) | ||||
except ValueError: | ||||
raise | ||||
raise error.ParseError(_("couldn't parse size"), s) | ||||
def size(mctx, x): | ||||
"""``size(expression)`` | ||||
File size matches the given expression. Examples: | ||||
- 1k (files from 1024 to 2047 bytes) | ||||
- < 20k (files less than 20480 bytes) | ||||
Matt Mackall
|
r14689 | - >= .5MB (files at least 524288 bytes) | ||
Matt Mackall
|
r14683 | - 4k - 1MB (files from 4096 bytes to 1048576 bytes) | ||
""" | ||||
expr = getstring(x, _("grep requires a pattern")).strip() | ||||
if '-' in expr: # do we have a range? | ||||
a, b = expr.split('-', 1) | ||||
a = _sizetoint(a) | ||||
b = _sizetoint(b) | ||||
m = lambda x: x >= a and x <= b | ||||
elif expr.startswith("<="): | ||||
a = _sizetoint(expr[2:]) | ||||
m = lambda x: x <= a | ||||
elif expr.startswith("<"): | ||||
a = _sizetoint(expr[1:]) | ||||
m = lambda x: x < a | ||||
elif expr.startswith(">="): | ||||
a = _sizetoint(expr[2:]) | ||||
m = lambda x: x >= a | ||||
elif expr.startswith(">"): | ||||
a = _sizetoint(expr[1:]) | ||||
m = lambda x: x > a | ||||
elif expr[0].isdigit or expr[0] == '.': | ||||
a = _sizetoint(expr) | ||||
b = _sizetomax(expr) | ||||
Thomas Arendsen Hein
|
r14690 | m = lambda x: x >= a and x <= b | ||
Matt Mackall
|
r14683 | else: | ||
raise error.ParseError(_("couldn't parse size"), expr) | ||||
return [f for f in mctx.subset if m(mctx.ctx[f].size())] | ||||
Matt Mackall
|
r14684 | def encoding(mctx, x): | ||
"""``encoding(name)`` | ||||
File can be successfully decoded with the given character | ||||
encoding. May not be useful for encodings other than ASCII and | ||||
UTF-8. | ||||
""" | ||||
enc = getstring(x, _("encoding requires an encoding name")) | ||||
s = [] | ||||
for f in mctx.subset: | ||||
d = mctx.ctx[f].data() | ||||
try: | ||||
d.decode(enc) | ||||
except LookupError: | ||||
raise util.Abort(_("unknown encoding '%s'") % enc) | ||||
except UnicodeDecodeError: | ||||
continue | ||||
s.append(f) | ||||
return s | ||||
Matt Mackall
|
r14685 | def copied(mctx, x): | ||
"""``copied()`` | ||||
File that is recorded as being copied. | ||||
""" | ||||
s = [] | ||||
for f in mctx.subset: | ||||
p = mctx.ctx[f].parents() | ||||
if p and p[0].path() != f: | ||||
s.append(f) | ||||
return s | ||||
Matt Mackall
|
r14676 | symbols = { | ||
Matt Mackall
|
r14677 | 'added': added, | ||
Matt Mackall
|
r14676 | 'binary': binary, | ||
Matt Mackall
|
r14677 | 'clean': clean, | ||
Matt Mackall
|
r14685 | 'copied': copied, | ||
Matt Mackall
|
r14677 | 'deleted': deleted, | ||
Matt Mackall
|
r14684 | 'encoding': encoding, | ||
Matt Mackall
|
r14676 | 'exec': exec_, | ||
Matt Mackall
|
r14682 | 'grep': grep, | ||
Matt Mackall
|
r14677 | 'ignored': ignored, | ||
Matt Mackall
|
r14680 | 'hgignore': hgignore, | ||
Matt Mackall
|
r14677 | 'modified': modified, | ||
'removed': removed, | ||||
Matt Mackall
|
r14679 | 'resolved': resolved, | ||
Matt Mackall
|
r14683 | 'size': size, | ||
Matt Mackall
|
r14676 | 'symlink': symlink, | ||
Matt Mackall
|
r14677 | 'unknown': unknown, | ||
Matt Mackall
|
r14679 | 'unresolved': unresolved, | ||
Matt Mackall
|
r14676 | } | ||
Matt Mackall
|
r14551 | methods = { | ||
'string': stringset, | ||||
'symbol': stringset, | ||||
'and': andset, | ||||
'or': orset, | ||||
'list': listset, | ||||
'group': getset, | ||||
Matt Mackall
|
r14676 | 'not': notset, | ||
'func': func, | ||||
Matt Mackall
|
r14551 | } | ||
class matchctx(object): | ||||
Matt Mackall
|
r14677 | def __init__(self, ctx, subset=None, status=None): | ||
Matt Mackall
|
r14551 | self.ctx = ctx | ||
self.subset = subset | ||||
Matt Mackall
|
r14677 | self._status = status | ||
def status(self): | ||||
return self._status | ||||
Matt Mackall
|
r14673 | def matcher(self, patterns): | ||
return self.ctx.match(patterns) | ||||
Matt Mackall
|
r14551 | def filter(self, files): | ||
return [f for f in files if f in self.subset] | ||||
def narrow(self, files): | ||||
Matt Mackall
|
r14677 | return matchctx(self.ctx, self.filter(files), self._status) | ||
Matt Mackall
|
r14551 | |||
Matt Mackall
|
r14678 | def _intree(funcs, tree): | ||
if isinstance(tree, tuple): | ||||
if tree[0] == 'func' and tree[1][0] == 'symbol': | ||||
if tree[1][1] in funcs: | ||||
return True | ||||
for s in tree[1:]: | ||||
if _intree(funcs, s): | ||||
return True | ||||
return False | ||||
Matt Mackall
|
r14673 | def getfileset(ctx, expr): | ||
Matt Mackall
|
r14551 | tree, pos = parse(expr) | ||
if (pos != len(expr)): | ||||
Mads Kiilerich
|
r14701 | raise error.ParseError(_("invalid token"), pos) | ||
Matt Mackall
|
r14678 | |||
# do we need status info? | ||||
if _intree(['modified', 'added', 'removed', 'deleted', | ||||
'unknown', 'ignored', 'clean'], tree): | ||||
unknown = _intree(['unknown'], tree) | ||||
ignored = _intree(['ignored'], tree) | ||||
r = ctx._repo | ||||
status = r.status(ctx.p1(), ctx, | ||||
unknown=unknown, ignored=ignored, clean=True) | ||||
subset = [] | ||||
for c in status: | ||||
subset.extend(c) | ||||
else: | ||||
status = None | ||||
subset = ctx.walk(ctx.match([])) | ||||
return getset(matchctx(ctx, subset, status), tree) | ||||
Matt Mackall
|
r14681 | |||
# tell hggettext to extract docstrings from these functions: | ||||
i18nfunctions = symbols.values() | ||||