##// END OF EJS Templates
merge with stable
merge with stable

File last commit:

r14690:15faf0e6 stable
r14697:b1880474 merge default
Show More
fileset.py
426 lines | 12.0 KiB | text/x-python | PythonLexer
Matt Mackall
filesets: introduce basic fileset expression parser
r14511 # fileset.py - file set queries for mercurial
#
# Copyright 2010 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
Matt Mackall
fileset: add grep predicate
r14682 import parser, error, util, merge, re
Matt Mackall
filesets: introduce basic fileset expression parser
r14511 from i18n import _
elements = {
"(": (20, ("group", 1, ")"), ("func", 1, ")")),
"-": (5, ("negate", 19), ("minus", 5)),
"not": (10, ("not", 10)),
"!": (10, ("not", 10)),
"and": (5, None, ("and", 5)),
"&": (5, None, ("and", 5)),
"or": (4, None, ("or", 4)),
"|": (4, None, ("or", 4)),
"+": (4, None, ("or", 4)),
",": (2, None, ("list", 2)),
")": (0, None, None),
"symbol": (0, ("symbol",), None),
"string": (0, ("string",), None),
"end": (0, None, None),
}
keywords = set(['and', 'or', 'not'])
Matt Mackall
fileset: basic pattern and boolean support...
r14551 globchars = ".*{}[]?/\\"
Matt Mackall
filesets: introduce basic fileset expression parser
r14511 def tokenize(program):
pos, l = 0, len(program)
while pos < l:
c = program[pos]
if c.isspace(): # skip inter-token whitespace
pass
elif c in "(),-|&+!": # handle simple operators
yield (c, None, pos)
elif (c in '"\'' or c == 'r' and
program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
if c == 'r':
pos += 1
c = program[pos]
decode = lambda x: x
else:
decode = lambda x: x.decode('string-escape')
pos += 1
s = pos
while pos < l: # find closing quote
d = program[pos]
if d == '\\': # skip over escaped characters
pos += 2
continue
if d == c:
yield ('string', decode(program[s:pos]), s)
break
pos += 1
else:
raise error.ParseError(_("unterminated string"), s)
Matt Mackall
fileset: basic pattern and boolean support...
r14551 elif c.isalnum() or c in globchars or ord(c) > 127:
Matt Mackall
fileset: fix long line
r14513 # gather up a symbol/keyword
Matt Mackall
filesets: introduce basic fileset expression parser
r14511 s = pos
pos += 1
while pos < l: # find end of symbol
d = program[pos]
Matt Mackall
fileset: basic pattern and boolean support...
r14551 if not (d.isalnum() or d in globchars or ord(d) > 127):
Matt Mackall
filesets: introduce basic fileset expression parser
r14511 break
pos += 1
sym = program[s:pos]
if sym in keywords: # operator keywords
yield (sym, None, s)
else:
yield ('symbol', sym, s)
pos -= 1
else:
raise error.ParseError(_("syntax error"), pos)
pos += 1
yield ('end', None, pos)
parse = parser.parser(tokenize, elements).parse
Matt Mackall
fileset: basic pattern and boolean support...
r14551 def getstring(x, err):
if x and (x[0] == 'string' or x[0] == 'symbol'):
return x[1]
raise error.ParseError(err)
def getset(mctx, x):
if not x:
raise error.ParseError(_("missing argument"))
return methods[x[0]](mctx, *x[1:])
def stringset(mctx, x):
m = mctx.matcher([x])
return [f for f in mctx.subset if m(f)]
def andset(mctx, x, y):
return getset(mctx.narrow(getset(mctx, x)), y)
def orset(mctx, x, y):
# needs optimizing
xl = getset(mctx, x)
yl = getset(mctx, y)
return xl + [f for f in yl if f not in xl]
def notset(mctx, x):
s = set(getset(mctx, x))
return [r for r in mctx.subset if r not in s]
def listset(mctx, a, b):
raise error.ParseError(_("can't use a list in this context"))
Matt Mackall
fileset: add support for file status predicates...
r14677 def modified(mctx, x):
Matt Mackall
fileset: add some function help text
r14681 """``modified()``
File that is modified according to status.
"""
Matt Mackall
fileset: add support for file status predicates...
r14677 getargs(x, 0, 0, _("modified takes no arguments"))
s = mctx.status()[0]
return [f for f in mctx.subset if f in s]
def added(mctx, x):
Matt Mackall
fileset: add some function help text
r14681 """``added()``
File that is added according to status.
"""
Matt Mackall
fileset: add support for file status predicates...
r14677 getargs(x, 0, 0, _("added takes no arguments"))
s = mctx.status()[1]
return [f for f in mctx.subset if f in s]
def removed(mctx, x):
Matt Mackall
fileset: add some function help text
r14681 """``removed()``
File that is removed according to status.
"""
Matt Mackall
fileset: add support for file status predicates...
r14677 getargs(x, 0, 0, _("removed takes no arguments"))
s = mctx.status()[2]
return [f for f in mctx.subset if f in s]
def deleted(mctx, x):
Matt Mackall
fileset: add some function help text
r14681 """``deleted()``
File that is deleted according to status.
"""
Matt Mackall
fileset: add support for file status predicates...
r14677 getargs(x, 0, 0, _("deleted takes no arguments"))
s = mctx.status()[3]
return [f for f in mctx.subset if f in s]
def unknown(mctx, x):
Matt Mackall
fileset: add some function help text
r14681 """``unknown()``
File that is unknown according to status. These files will only be
considered if this predicate is used.
"""
Matt Mackall
fileset: add support for file status predicates...
r14677 getargs(x, 0, 0, _("unknown takes no arguments"))
s = mctx.status()[4]
return [f for f in mctx.subset if f in s]
def ignored(mctx, x):
Matt Mackall
fileset: add some function help text
r14681 """``ignored()``
File that is ignored according to status. These files will only be
considered if this predicate is used.
"""
Matt Mackall
fileset: add support for file status predicates...
r14677 getargs(x, 0, 0, _("ignored takes no arguments"))
s = mctx.status()[5]
return [f for f in mctx.subset if f in s]
def clean(mctx, x):
Matt Mackall
fileset: add some function help text
r14681 """``clean()``
File that is clean according to status.
"""
Matt Mackall
fileset: add support for file status predicates...
r14677 getargs(x, 0, 0, _("clean takes no arguments"))
s = mctx.status()[6]
return [f for f in mctx.subset if f in s]
Matt Mackall
fileset: add some basic predicates
r14676 def func(mctx, a, b):
if a[0] == 'symbol' and a[1] in symbols:
return symbols[a[1]](mctx, b)
raise error.ParseError(_("not a function: %s") % a[1])
def getlist(x):
if not x:
return []
if x[0] == 'list':
return getlist(x[1]) + [x[2]]
return [x]
def getargs(x, min, max, err):
l = getlist(x)
if len(l) < min or len(l) > max:
raise error.ParseError(err)
return l
def binary(mctx, x):
Matt Mackall
fileset: add some function help text
r14681 """``binary()``
File that appears to be binary (contails NUL bytes).
"""
Matt Mackall
fileset: add some basic predicates
r14676 getargs(x, 0, 0, _("binary takes no arguments"))
return [f for f in mctx.subset if util.binary(mctx.ctx[f].data())]
def exec_(mctx, x):
Matt Mackall
fileset: add some function help text
r14681 """``exec()``
File that is marked as executable.
"""
Matt Mackall
fileset: add some basic predicates
r14676 getargs(x, 0, 0, _("exec takes no arguments"))
return [f for f in mctx.subset if mctx.ctx.flags(f) == 'x']
def symlink(mctx, x):
Matt Mackall
fileset: add some function help text
r14681 """``symlink()``
File that is marked as a symlink.
"""
Matt Mackall
fileset: add some basic predicates
r14676 getargs(x, 0, 0, _("symlink takes no arguments"))
return [f for f in mctx.subset if mctx.ctx.flags(f) == 'l']
Matt Mackall
fileset: add resolved and unresolved predicates
r14679 def resolved(mctx, x):
Matt Mackall
fileset: add some function help text
r14681 """``resolved()``
File that is marked resolved according to the resolve state.
"""
Matt Mackall
fileset: add resolved and unresolved predicates
r14679 getargs(x, 0, 0, _("resolved takes no arguments"))
if mctx.ctx.rev() is not None:
return []
ms = merge.mergestate(mctx.ctx._repo)
return [f for f in mctx.subset if f in ms and ms[f] == 'r']
def unresolved(mctx, x):
Matt Mackall
fileset: add some function help text
r14681 """``unresolved()``
File that is marked unresolved according to the resolve state.
"""
Matt Mackall
fileset: add resolved and unresolved predicates
r14679 getargs(x, 0, 0, _("unresolved takes no arguments"))
if mctx.ctx.rev() is not None:
return []
ms = merge.mergestate(mctx.ctx._repo)
return [f for f in mctx.subset if f in ms and ms[f] == 'u']
Matt Mackall
fileset: add hgignore
r14680 def hgignore(mctx, x):
Matt Mackall
fileset: add some function help text
r14681 """``resolved()``
File that matches the active .hgignore pattern.
"""
Matt Mackall
fileset: add hgignore
r14680 getargs(x, 0, 0, _("hgignore takes no arguments"))
ignore = mctx.ctx._repo.dirstate._ignore
return [f for f in mctx.subset if ignore(f)]
Matt Mackall
fileset: add grep predicate
r14682 def grep(mctx, x):
"""``grep(regex)``
File contains the given regular expression.
"""
pat = getstring(x, _("grep requires a pattern"))
r = re.compile(pat)
return [f for f in mctx.subset if r.search(mctx.ctx[f].data())]
Matt Mackall
fileset: add size() predicate
r14683 _units = dict(k=2**10, K=2**10, kB=2**10, KB=2**10,
Matt Mackall
fileset: drop backwards SI size units...
r14689 M=2**20, MB=2**20, G=2**30, GB=2**30)
Matt Mackall
fileset: add size() predicate
r14683
def _sizetoint(s):
try:
s = s.strip()
for k, v in _units.items():
if s.endswith(k):
return int(float(s[:-len(k)]) * v)
return int(s)
except ValueError:
raise
raise error.ParseError(_("couldn't parse size"), s)
def _sizetomax(s):
try:
s = s.strip()
for k, v in _units.items():
if s.endswith(k):
# max(4k) = 5k - 1, max(4.5k) = 4.6k - 1
n = s[:-len(k)]
inc = 1.0
if "." in n:
inc /= 10 ** len(n.split(".")[1])
return int((float(n) + inc) * v) - 1
# no extension, this is a precise value
return int(s)
except ValueError:
raise
raise error.ParseError(_("couldn't parse size"), s)
def size(mctx, x):
"""``size(expression)``
File size matches the given expression. Examples:
- 1k (files from 1024 to 2047 bytes)
- < 20k (files less than 20480 bytes)
Matt Mackall
fileset: drop backwards SI size units...
r14689 - >= .5MB (files at least 524288 bytes)
Matt Mackall
fileset: add size() predicate
r14683 - 4k - 1MB (files from 4096 bytes to 1048576 bytes)
"""
expr = getstring(x, _("grep requires a pattern")).strip()
if '-' in expr: # do we have a range?
a, b = expr.split('-', 1)
a = _sizetoint(a)
b = _sizetoint(b)
m = lambda x: x >= a and x <= b
elif expr.startswith("<="):
a = _sizetoint(expr[2:])
m = lambda x: x <= a
elif expr.startswith("<"):
a = _sizetoint(expr[1:])
m = lambda x: x < a
elif expr.startswith(">="):
a = _sizetoint(expr[2:])
m = lambda x: x >= a
elif expr.startswith(">"):
a = _sizetoint(expr[1:])
m = lambda x: x > a
elif expr[0].isdigit or expr[0] == '.':
a = _sizetoint(expr)
b = _sizetomax(expr)
Thomas Arendsen Hein
fileset: add missing whitespace around operator
r14690 m = lambda x: x >= a and x <= b
Matt Mackall
fileset: add size() predicate
r14683 else:
raise error.ParseError(_("couldn't parse size"), expr)
return [f for f in mctx.subset if m(mctx.ctx[f].size())]
Matt Mackall
fileset: add encoding() predicate
r14684 def encoding(mctx, x):
"""``encoding(name)``
File can be successfully decoded with the given character
encoding. May not be useful for encodings other than ASCII and
UTF-8.
"""
enc = getstring(x, _("encoding requires an encoding name"))
s = []
for f in mctx.subset:
d = mctx.ctx[f].data()
try:
d.decode(enc)
except LookupError:
raise util.Abort(_("unknown encoding '%s'") % enc)
except UnicodeDecodeError:
continue
s.append(f)
return s
Matt Mackall
fileset: add copied predicate
r14685 def copied(mctx, x):
"""``copied()``
File that is recorded as being copied.
"""
s = []
for f in mctx.subset:
p = mctx.ctx[f].parents()
if p and p[0].path() != f:
s.append(f)
return s
Matt Mackall
fileset: add some basic predicates
r14676 symbols = {
Matt Mackall
fileset: add support for file status predicates...
r14677 'added': added,
Matt Mackall
fileset: add some basic predicates
r14676 'binary': binary,
Matt Mackall
fileset: add support for file status predicates...
r14677 'clean': clean,
Matt Mackall
fileset: add copied predicate
r14685 'copied': copied,
Matt Mackall
fileset: add support for file status predicates...
r14677 'deleted': deleted,
Matt Mackall
fileset: add encoding() predicate
r14684 'encoding': encoding,
Matt Mackall
fileset: add some basic predicates
r14676 'exec': exec_,
Matt Mackall
fileset: add grep predicate
r14682 'grep': grep,
Matt Mackall
fileset: add support for file status predicates...
r14677 'ignored': ignored,
Matt Mackall
fileset: add hgignore
r14680 'hgignore': hgignore,
Matt Mackall
fileset: add support for file status predicates...
r14677 'modified': modified,
'removed': removed,
Matt Mackall
fileset: add resolved and unresolved predicates
r14679 'resolved': resolved,
Matt Mackall
fileset: add size() predicate
r14683 'size': size,
Matt Mackall
fileset: add some basic predicates
r14676 'symlink': symlink,
Matt Mackall
fileset: add support for file status predicates...
r14677 'unknown': unknown,
Matt Mackall
fileset: add resolved and unresolved predicates
r14679 'unresolved': unresolved,
Matt Mackall
fileset: add some basic predicates
r14676 }
Matt Mackall
fileset: basic pattern and boolean support...
r14551 methods = {
'string': stringset,
'symbol': stringset,
'and': andset,
'or': orset,
'list': listset,
'group': getset,
Matt Mackall
fileset: add some basic predicates
r14676 'not': notset,
'func': func,
Matt Mackall
fileset: basic pattern and boolean support...
r14551 }
class matchctx(object):
Matt Mackall
fileset: add support for file status predicates...
r14677 def __init__(self, ctx, subset=None, status=None):
Matt Mackall
fileset: basic pattern and boolean support...
r14551 self.ctx = ctx
self.subset = subset
Matt Mackall
fileset: add support for file status predicates...
r14677 self._status = status
def status(self):
return self._status
Matt Mackall
fileset: drop matchfn...
r14673 def matcher(self, patterns):
return self.ctx.match(patterns)
Matt Mackall
fileset: basic pattern and boolean support...
r14551 def filter(self, files):
return [f for f in files if f in self.subset]
def narrow(self, files):
Matt Mackall
fileset: add support for file status predicates...
r14677 return matchctx(self.ctx, self.filter(files), self._status)
Matt Mackall
fileset: basic pattern and boolean support...
r14551
Matt Mackall
fileset: prescan parse tree to optimize status usage...
r14678 def _intree(funcs, tree):
if isinstance(tree, tuple):
if tree[0] == 'func' and tree[1][0] == 'symbol':
if tree[1][1] in funcs:
return True
for s in tree[1:]:
if _intree(funcs, s):
return True
return False
Matt Mackall
fileset: drop matchfn...
r14673 def getfileset(ctx, expr):
Matt Mackall
fileset: basic pattern and boolean support...
r14551 tree, pos = parse(expr)
if (pos != len(expr)):
raise error.ParseError("invalid token", pos)
Matt Mackall
fileset: prescan parse tree to optimize status usage...
r14678
# do we need status info?
if _intree(['modified', 'added', 'removed', 'deleted',
'unknown', 'ignored', 'clean'], tree):
unknown = _intree(['unknown'], tree)
ignored = _intree(['ignored'], tree)
r = ctx._repo
status = r.status(ctx.p1(), ctx,
unknown=unknown, ignored=ignored, clean=True)
subset = []
for c in status:
subset.extend(c)
else:
status = None
subset = ctx.walk(ctx.match([]))
return getset(matchctx(ctx, subset, status), tree)
Matt Mackall
fileset: add some function help text
r14681
# tell hggettext to extract docstrings from these functions:
i18nfunctions = symbols.values()