|
|
# fileset.py - file set queries for mercurial
|
|
|
#
|
|
|
# Copyright 2010 Matt Mackall <mpm@selenic.com>
|
|
|
#
|
|
|
# This software may be used and distributed according to the terms of the
|
|
|
# GNU General Public License version 2 or any later version.
|
|
|
|
|
|
import re
|
|
|
import parser, error, util, merge
|
|
|
from i18n import _
|
|
|
|
|
|
elements = {
|
|
|
"(": (20, ("group", 1, ")"), ("func", 1, ")")),
|
|
|
"-": (5, ("negate", 19), ("minus", 5)),
|
|
|
"not": (10, ("not", 10)),
|
|
|
"!": (10, ("not", 10)),
|
|
|
"and": (5, None, ("and", 5)),
|
|
|
"&": (5, None, ("and", 5)),
|
|
|
"or": (4, None, ("or", 4)),
|
|
|
"|": (4, None, ("or", 4)),
|
|
|
"+": (4, None, ("or", 4)),
|
|
|
",": (2, None, ("list", 2)),
|
|
|
")": (0, None, None),
|
|
|
"symbol": (0, ("symbol",), None),
|
|
|
"string": (0, ("string",), None),
|
|
|
"end": (0, None, None),
|
|
|
}
|
|
|
|
|
|
keywords = set(['and', 'or', 'not'])
|
|
|
|
|
|
globchars = ".*{}[]?/\\_"
|
|
|
|
|
|
def tokenize(program):
|
|
|
pos, l = 0, len(program)
|
|
|
while pos < l:
|
|
|
c = program[pos]
|
|
|
if c.isspace(): # skip inter-token whitespace
|
|
|
pass
|
|
|
elif c in "(),-|&+!": # handle simple operators
|
|
|
yield (c, None, pos)
|
|
|
elif (c in '"\'' or c == 'r' and
|
|
|
program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
|
|
|
if c == 'r':
|
|
|
pos += 1
|
|
|
c = program[pos]
|
|
|
decode = lambda x: x
|
|
|
else:
|
|
|
decode = lambda x: x.decode('string-escape')
|
|
|
pos += 1
|
|
|
s = pos
|
|
|
while pos < l: # find closing quote
|
|
|
d = program[pos]
|
|
|
if d == '\\': # skip over escaped characters
|
|
|
pos += 2
|
|
|
continue
|
|
|
if d == c:
|
|
|
yield ('string', decode(program[s:pos]), s)
|
|
|
break
|
|
|
pos += 1
|
|
|
else:
|
|
|
raise error.ParseError(_("unterminated string"), s)
|
|
|
elif c.isalnum() or c in globchars or ord(c) > 127:
|
|
|
# gather up a symbol/keyword
|
|
|
s = pos
|
|
|
pos += 1
|
|
|
while pos < l: # find end of symbol
|
|
|
d = program[pos]
|
|
|
if not (d.isalnum() or d in globchars or ord(d) > 127):
|
|
|
break
|
|
|
pos += 1
|
|
|
sym = program[s:pos]
|
|
|
if sym in keywords: # operator keywords
|
|
|
yield (sym, None, s)
|
|
|
else:
|
|
|
yield ('symbol', sym, s)
|
|
|
pos -= 1
|
|
|
else:
|
|
|
raise error.ParseError(_("syntax error"), pos)
|
|
|
pos += 1
|
|
|
yield ('end', None, pos)
|
|
|
|
|
|
def parse(expr):
|
|
|
p = parser.parser(tokenize, elements)
|
|
|
return p.parse(expr)
|
|
|
|
|
|
def getstring(x, err):
|
|
|
if x and (x[0] == 'string' or x[0] == 'symbol'):
|
|
|
return x[1]
|
|
|
raise error.ParseError(err)
|
|
|
|
|
|
def getset(mctx, x):
|
|
|
if not x:
|
|
|
raise error.ParseError(_("missing argument"))
|
|
|
return methods[x[0]](mctx, *x[1:])
|
|
|
|
|
|
def stringset(mctx, x):
|
|
|
m = mctx.matcher([x])
|
|
|
return [f for f in mctx.subset if m(f)]
|
|
|
|
|
|
def andset(mctx, x, y):
|
|
|
return getset(mctx.narrow(getset(mctx, x)), y)
|
|
|
|
|
|
def orset(mctx, x, y):
|
|
|
# needs optimizing
|
|
|
xl = getset(mctx, x)
|
|
|
yl = getset(mctx, y)
|
|
|
return xl + [f for f in yl if f not in xl]
|
|
|
|
|
|
def notset(mctx, x):
|
|
|
s = set(getset(mctx, x))
|
|
|
return [r for r in mctx.subset if r not in s]
|
|
|
|
|
|
def minusset(mctx, x, y):
|
|
|
xl = getset(mctx, x)
|
|
|
yl = set(getset(mctx, y))
|
|
|
return [f for f in xl if f not in yl]
|
|
|
|
|
|
def listset(mctx, a, b):
|
|
|
raise error.ParseError(_("can't use a list in this context"))
|
|
|
|
|
|
def modified(mctx, x):
|
|
|
"""``modified()``
|
|
|
File that is modified according to status.
|
|
|
"""
|
|
|
# i18n: "modified" is a keyword
|
|
|
getargs(x, 0, 0, _("modified takes no arguments"))
|
|
|
s = mctx.status().modified
|
|
|
return [f for f in mctx.subset if f in s]
|
|
|
|
|
|
def added(mctx, x):
|
|
|
"""``added()``
|
|
|
File that is added according to status.
|
|
|
"""
|
|
|
# i18n: "added" is a keyword
|
|
|
getargs(x, 0, 0, _("added takes no arguments"))
|
|
|
s = mctx.status().added
|
|
|
return [f for f in mctx.subset if f in s]
|
|
|
|
|
|
def removed(mctx, x):
|
|
|
"""``removed()``
|
|
|
File that is removed according to status.
|
|
|
"""
|
|
|
# i18n: "removed" is a keyword
|
|
|
getargs(x, 0, 0, _("removed takes no arguments"))
|
|
|
s = mctx.status().removed
|
|
|
return [f for f in mctx.subset if f in s]
|
|
|
|
|
|
def deleted(mctx, x):
|
|
|
"""``deleted()``
|
|
|
File that is deleted according to status.
|
|
|
"""
|
|
|
# i18n: "deleted" is a keyword
|
|
|
getargs(x, 0, 0, _("deleted takes no arguments"))
|
|
|
s = mctx.status().deleted
|
|
|
return [f for f in mctx.subset if f in s]
|
|
|
|
|
|
def unknown(mctx, x):
|
|
|
"""``unknown()``
|
|
|
File that is unknown according to status. These files will only be
|
|
|
considered if this predicate is used.
|
|
|
"""
|
|
|
# i18n: "unknown" is a keyword
|
|
|
getargs(x, 0, 0, _("unknown takes no arguments"))
|
|
|
s = mctx.status().unknown
|
|
|
return [f for f in mctx.subset if f in s]
|
|
|
|
|
|
def ignored(mctx, x):
|
|
|
"""``ignored()``
|
|
|
File that is ignored according to status. These files will only be
|
|
|
considered if this predicate is used.
|
|
|
"""
|
|
|
# i18n: "ignored" is a keyword
|
|
|
getargs(x, 0, 0, _("ignored takes no arguments"))
|
|
|
s = mctx.status().ignored
|
|
|
return [f for f in mctx.subset if f in s]
|
|
|
|
|
|
def clean(mctx, x):
|
|
|
"""``clean()``
|
|
|
File that is clean according to status.
|
|
|
"""
|
|
|
# i18n: "clean" is a keyword
|
|
|
getargs(x, 0, 0, _("clean takes no arguments"))
|
|
|
s = mctx.status().clean
|
|
|
return [f for f in mctx.subset if f in s]
|
|
|
|
|
|
def func(mctx, a, b):
|
|
|
if a[0] == 'symbol' and a[1] in symbols:
|
|
|
return symbols[a[1]](mctx, b)
|
|
|
raise error.UnknownIdentifier(a[1], symbols.keys())
|
|
|
|
|
|
def getlist(x):
|
|
|
if not x:
|
|
|
return []
|
|
|
if x[0] == 'list':
|
|
|
return getlist(x[1]) + [x[2]]
|
|
|
return [x]
|
|
|
|
|
|
def getargs(x, min, max, err):
|
|
|
l = getlist(x)
|
|
|
if len(l) < min or len(l) > max:
|
|
|
raise error.ParseError(err)
|
|
|
return l
|
|
|
|
|
|
def binary(mctx, x):
|
|
|
"""``binary()``
|
|
|
File that appears to be binary (contains NUL bytes).
|
|
|
"""
|
|
|
# i18n: "binary" is a keyword
|
|
|
getargs(x, 0, 0, _("binary takes no arguments"))
|
|
|
return [f for f in mctx.existing() if util.binary(mctx.ctx[f].data())]
|
|
|
|
|
|
def exec_(mctx, x):
|
|
|
"""``exec()``
|
|
|
File that is marked as executable.
|
|
|
"""
|
|
|
# i18n: "exec" is a keyword
|
|
|
getargs(x, 0, 0, _("exec takes no arguments"))
|
|
|
return [f for f in mctx.existing() if mctx.ctx.flags(f) == 'x']
|
|
|
|
|
|
def symlink(mctx, x):
|
|
|
"""``symlink()``
|
|
|
File that is marked as a symlink.
|
|
|
"""
|
|
|
# i18n: "symlink" is a keyword
|
|
|
getargs(x, 0, 0, _("symlink takes no arguments"))
|
|
|
return [f for f in mctx.existing() if mctx.ctx.flags(f) == 'l']
|
|
|
|
|
|
def resolved(mctx, x):
|
|
|
"""``resolved()``
|
|
|
File that is marked resolved according to the resolve state.
|
|
|
"""
|
|
|
# i18n: "resolved" is a keyword
|
|
|
getargs(x, 0, 0, _("resolved takes no arguments"))
|
|
|
if mctx.ctx.rev() is not None:
|
|
|
return []
|
|
|
ms = merge.mergestate(mctx.ctx.repo())
|
|
|
return [f for f in mctx.subset if f in ms and ms[f] == 'r']
|
|
|
|
|
|
def unresolved(mctx, x):
|
|
|
"""``unresolved()``
|
|
|
File that is marked unresolved according to the resolve state.
|
|
|
"""
|
|
|
# i18n: "unresolved" is a keyword
|
|
|
getargs(x, 0, 0, _("unresolved takes no arguments"))
|
|
|
if mctx.ctx.rev() is not None:
|
|
|
return []
|
|
|
ms = merge.mergestate(mctx.ctx.repo())
|
|
|
return [f for f in mctx.subset if f in ms and ms[f] == 'u']
|
|
|
|
|
|
def hgignore(mctx, x):
|
|
|
"""``hgignore()``
|
|
|
File that matches the active .hgignore pattern.
|
|
|
"""
|
|
|
# i18n: "hgignore" is a keyword
|
|
|
getargs(x, 0, 0, _("hgignore takes no arguments"))
|
|
|
ignore = mctx.ctx.repo().dirstate._ignore
|
|
|
return [f for f in mctx.subset if ignore(f)]
|
|
|
|
|
|
def grep(mctx, x):
|
|
|
"""``grep(regex)``
|
|
|
File contains the given regular expression.
|
|
|
"""
|
|
|
try:
|
|
|
# i18n: "grep" is a keyword
|
|
|
r = re.compile(getstring(x, _("grep requires a pattern")))
|
|
|
except re.error, e:
|
|
|
raise error.ParseError(_('invalid match pattern: %s') % e)
|
|
|
return [f for f in mctx.existing() if r.search(mctx.ctx[f].data())]
|
|
|
|
|
|
def _sizetomax(s):
|
|
|
try:
|
|
|
s = s.strip()
|
|
|
for k, v in util._sizeunits:
|
|
|
if s.endswith(k):
|
|
|
# max(4k) = 5k - 1, max(4.5k) = 4.6k - 1
|
|
|
n = s[:-len(k)]
|
|
|
inc = 1.0
|
|
|
if "." in n:
|
|
|
inc /= 10 ** len(n.split(".")[1])
|
|
|
return int((float(n) + inc) * v) - 1
|
|
|
# no extension, this is a precise value
|
|
|
return int(s)
|
|
|
except ValueError:
|
|
|
raise error.ParseError(_("couldn't parse size: %s") % s)
|
|
|
|
|
|
def size(mctx, x):
|
|
|
"""``size(expression)``
|
|
|
File size matches the given expression. Examples:
|
|
|
|
|
|
- 1k (files from 1024 to 2047 bytes)
|
|
|
- < 20k (files less than 20480 bytes)
|
|
|
- >= .5MB (files at least 524288 bytes)
|
|
|
- 4k - 1MB (files from 4096 bytes to 1048576 bytes)
|
|
|
"""
|
|
|
|
|
|
# i18n: "size" is a keyword
|
|
|
expr = getstring(x, _("size requires an expression")).strip()
|
|
|
if '-' in expr: # do we have a range?
|
|
|
a, b = expr.split('-', 1)
|
|
|
a = util.sizetoint(a)
|
|
|
b = util.sizetoint(b)
|
|
|
m = lambda x: x >= a and x <= b
|
|
|
elif expr.startswith("<="):
|
|
|
a = util.sizetoint(expr[2:])
|
|
|
m = lambda x: x <= a
|
|
|
elif expr.startswith("<"):
|
|
|
a = util.sizetoint(expr[1:])
|
|
|
m = lambda x: x < a
|
|
|
elif expr.startswith(">="):
|
|
|
a = util.sizetoint(expr[2:])
|
|
|
m = lambda x: x >= a
|
|
|
elif expr.startswith(">"):
|
|
|
a = util.sizetoint(expr[1:])
|
|
|
m = lambda x: x > a
|
|
|
elif expr[0].isdigit or expr[0] == '.':
|
|
|
a = util.sizetoint(expr)
|
|
|
b = _sizetomax(expr)
|
|
|
m = lambda x: x >= a and x <= b
|
|
|
else:
|
|
|
raise error.ParseError(_("couldn't parse size: %s") % expr)
|
|
|
|
|
|
return [f for f in mctx.existing() if m(mctx.ctx[f].size())]
|
|
|
|
|
|
def encoding(mctx, x):
|
|
|
"""``encoding(name)``
|
|
|
File can be successfully decoded with the given character
|
|
|
encoding. May not be useful for encodings other than ASCII and
|
|
|
UTF-8.
|
|
|
"""
|
|
|
|
|
|
# i18n: "encoding" is a keyword
|
|
|
enc = getstring(x, _("encoding requires an encoding name"))
|
|
|
|
|
|
s = []
|
|
|
for f in mctx.existing():
|
|
|
d = mctx.ctx[f].data()
|
|
|
try:
|
|
|
d.decode(enc)
|
|
|
except LookupError:
|
|
|
raise util.Abort(_("unknown encoding '%s'") % enc)
|
|
|
except UnicodeDecodeError:
|
|
|
continue
|
|
|
s.append(f)
|
|
|
|
|
|
return s
|
|
|
|
|
|
def eol(mctx, x):
|
|
|
"""``eol(style)``
|
|
|
File contains newlines of the given style (dos, unix, mac). Binary
|
|
|
files are excluded, files with mixed line endings match multiple
|
|
|
styles.
|
|
|
"""
|
|
|
|
|
|
# i18n: "encoding" is a keyword
|
|
|
enc = getstring(x, _("encoding requires an encoding name"))
|
|
|
|
|
|
s = []
|
|
|
for f in mctx.existing():
|
|
|
d = mctx.ctx[f].data()
|
|
|
if util.binary(d):
|
|
|
continue
|
|
|
if (enc == 'dos' or enc == 'win') and '\r\n' in d:
|
|
|
s.append(f)
|
|
|
elif enc == 'unix' and re.search('(?<!\r)\n', d):
|
|
|
s.append(f)
|
|
|
elif enc == 'mac' and re.search('\r(?!\n)', d):
|
|
|
s.append(f)
|
|
|
return s
|
|
|
|
|
|
def copied(mctx, x):
|
|
|
"""``copied()``
|
|
|
File that is recorded as being copied.
|
|
|
"""
|
|
|
# i18n: "copied" is a keyword
|
|
|
getargs(x, 0, 0, _("copied takes no arguments"))
|
|
|
s = []
|
|
|
for f in mctx.subset:
|
|
|
p = mctx.ctx[f].parents()
|
|
|
if p and p[0].path() != f:
|
|
|
s.append(f)
|
|
|
return s
|
|
|
|
|
|
def subrepo(mctx, x):
|
|
|
"""``subrepo([pattern])``
|
|
|
Subrepositories whose paths match the given pattern.
|
|
|
"""
|
|
|
# i18n: "subrepo" is a keyword
|
|
|
getargs(x, 0, 1, _("subrepo takes at most one argument"))
|
|
|
ctx = mctx.ctx
|
|
|
sstate = sorted(ctx.substate)
|
|
|
if x:
|
|
|
# i18n: "subrepo" is a keyword
|
|
|
pat = getstring(x, _("subrepo requires a pattern or no arguments"))
|
|
|
|
|
|
import match as matchmod # avoid circular import issues
|
|
|
fast = not matchmod.patkind(pat)
|
|
|
if fast:
|
|
|
def m(s):
|
|
|
return (s == pat)
|
|
|
else:
|
|
|
m = matchmod.match(ctx.repo().root, '', [pat], ctx=ctx)
|
|
|
return [sub for sub in sstate if m(sub)]
|
|
|
else:
|
|
|
return [sub for sub in sstate]
|
|
|
|
|
|
symbols = {
|
|
|
'added': added,
|
|
|
'binary': binary,
|
|
|
'clean': clean,
|
|
|
'copied': copied,
|
|
|
'deleted': deleted,
|
|
|
'encoding': encoding,
|
|
|
'eol': eol,
|
|
|
'exec': exec_,
|
|
|
'grep': grep,
|
|
|
'ignored': ignored,
|
|
|
'hgignore': hgignore,
|
|
|
'modified': modified,
|
|
|
'removed': removed,
|
|
|
'resolved': resolved,
|
|
|
'size': size,
|
|
|
'symlink': symlink,
|
|
|
'unknown': unknown,
|
|
|
'unresolved': unresolved,
|
|
|
'subrepo': subrepo,
|
|
|
}
|
|
|
|
|
|
methods = {
|
|
|
'string': stringset,
|
|
|
'symbol': stringset,
|
|
|
'and': andset,
|
|
|
'or': orset,
|
|
|
'minus': minusset,
|
|
|
'list': listset,
|
|
|
'group': getset,
|
|
|
'not': notset,
|
|
|
'func': func,
|
|
|
}
|
|
|
|
|
|
class matchctx(object):
|
|
|
def __init__(self, ctx, subset=None, status=None):
|
|
|
self.ctx = ctx
|
|
|
self.subset = subset
|
|
|
self._status = status
|
|
|
def status(self):
|
|
|
return self._status
|
|
|
def matcher(self, patterns):
|
|
|
return self.ctx.match(patterns)
|
|
|
def filter(self, files):
|
|
|
return [f for f in files if f in self.subset]
|
|
|
def existing(self):
|
|
|
if self._status is not None:
|
|
|
removed = set(self._status[3])
|
|
|
unknown = set(self._status[4] + self._status[5])
|
|
|
else:
|
|
|
removed = set()
|
|
|
unknown = set()
|
|
|
return (f for f in self.subset
|
|
|
if (f in self.ctx and f not in removed) or f in unknown)
|
|
|
def narrow(self, files):
|
|
|
return matchctx(self.ctx, self.filter(files), self._status)
|
|
|
|
|
|
def _intree(funcs, tree):
|
|
|
if isinstance(tree, tuple):
|
|
|
if tree[0] == 'func' and tree[1][0] == 'symbol':
|
|
|
if tree[1][1] in funcs:
|
|
|
return True
|
|
|
for s in tree[1:]:
|
|
|
if _intree(funcs, s):
|
|
|
return True
|
|
|
return False
|
|
|
|
|
|
# filesets using matchctx.existing()
|
|
|
_existingcallers = [
|
|
|
'binary',
|
|
|
'exec',
|
|
|
'grep',
|
|
|
'size',
|
|
|
'symlink',
|
|
|
]
|
|
|
|
|
|
def getfileset(ctx, expr):
|
|
|
tree, pos = parse(expr)
|
|
|
if (pos != len(expr)):
|
|
|
raise error.ParseError(_("invalid token"), pos)
|
|
|
|
|
|
# do we need status info?
|
|
|
if (_intree(['modified', 'added', 'removed', 'deleted',
|
|
|
'unknown', 'ignored', 'clean'], tree) or
|
|
|
# Using matchctx.existing() on a workingctx requires us to check
|
|
|
# for deleted files.
|
|
|
(ctx.rev() is None and _intree(_existingcallers, tree))):
|
|
|
unknown = _intree(['unknown'], tree)
|
|
|
ignored = _intree(['ignored'], tree)
|
|
|
|
|
|
r = ctx.repo()
|
|
|
status = r.status(ctx.p1(), ctx,
|
|
|
unknown=unknown, ignored=ignored, clean=True)
|
|
|
subset = []
|
|
|
for c in status:
|
|
|
subset.extend(c)
|
|
|
else:
|
|
|
status = None
|
|
|
subset = list(ctx.walk(ctx.match([])))
|
|
|
|
|
|
return getset(matchctx(ctx, subset, status), tree)
|
|
|
|
|
|
# tell hggettext to extract docstrings from these functions:
|
|
|
i18nfunctions = symbols.values()
|
|
|
|