revsetlang.py
771 lines
| 25.0 KiB
| text/x-python
|
PythonLexer
/ mercurial / revsetlang.py
Yuya Nishihara
|
r31024 | # revsetlang.py - parser, tokenizer and utility for revision set language | ||
# | ||||
# Copyright 2010 Matt Mackall <mpm@selenic.com> | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
# GNU General Public License version 2 or any later version. | ||||
from __future__ import absolute_import | ||||
import string | ||||
from .i18n import _ | ||||
from . import ( | ||||
error, | ||||
node, | ||||
parser, | ||||
pycompat, | ||||
Augie Fackler
|
r31606 | util, | ||
Yuya Nishihara
|
r31024 | ) | ||
Yuya Nishihara
|
r37102 | from .utils import ( | ||
stringutil, | ||||
) | ||||
Yuya Nishihara
|
r31024 | |||
elements = { | ||||
# token-type: binding-strength, primary, prefix, infix, suffix | ||||
"(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None), | ||||
Yuya Nishihara
|
r33416 | "[": (21, None, None, ("subscript", 1, "]"), None), | ||
"#": (21, None, None, ("relation", 21), None), | ||||
Yuya Nishihara
|
r31024 | "##": (20, None, None, ("_concat", 20), None), | ||
"~": (18, None, None, ("ancestor", 18), None), | ||||
"^": (18, None, None, ("parent", 18), "parentpost"), | ||||
"-": (5, None, ("negate", 19), ("minus", 5), None), | ||||
Yuya Nishihara
|
r35556 | "::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17), | ||
"dagrangepost"), | ||||
"..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17), | ||||
"dagrangepost"), | ||||
Yuya Nishihara
|
r31024 | ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"), | ||
"not": (10, None, ("not", 10), None, None), | ||||
"!": (10, None, ("not", 10), None, None), | ||||
"and": (5, None, None, ("and", 5), None), | ||||
"&": (5, None, None, ("and", 5), None), | ||||
"%": (5, None, None, ("only", 5), "onlypost"), | ||||
"or": (4, None, None, ("or", 4), None), | ||||
"|": (4, None, None, ("or", 4), None), | ||||
"+": (4, None, None, ("or", 4), None), | ||||
"=": (3, None, None, ("keyvalue", 3), None), | ||||
",": (2, None, None, ("list", 2), None), | ||||
")": (0, None, None, None, None), | ||||
Yuya Nishihara
|
r33416 | "]": (0, None, None, None, None), | ||
Yuya Nishihara
|
r31024 | "symbol": (0, "symbol", None, None, None), | ||
"string": (0, "string", None, None, None), | ||||
"end": (0, None, None, None, None), | ||||
} | ||||
Martin von Zweigbergk
|
r32291 | keywords = {'and', 'or', 'not'} | ||
Yuya Nishihara
|
r31024 | |||
Jun Wu
|
r34274 | symbols = {} | ||
Martin von Zweigbergk
|
r32291 | _quoteletters = {'"', "'"} | ||
Yuya Nishihara
|
r33416 | _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%")) | ||
Yuya Nishihara
|
r31384 | |||
Yuya Nishihara
|
r31024 | # default set of valid characters for the initial letter of symbols | ||
Yuya Nishihara
|
r31383 | _syminitletters = set(pycompat.iterbytestr( | ||
string.ascii_letters.encode('ascii') + | ||||
string.digits.encode('ascii') + | ||||
'._@')) | set(map(pycompat.bytechr, xrange(128, 256))) | ||||
Yuya Nishihara
|
r31024 | |||
# default set of valid characters for non-initial letters of symbols | ||||
Yuya Nishihara
|
r31383 | _symletters = _syminitletters | set(pycompat.iterbytestr('-/')) | ||
Yuya Nishihara
|
r31024 | |||
def tokenize(program, lookup=None, syminitletters=None, symletters=None): | ||||
''' | ||||
Parse a revset statement into a stream of tokens | ||||
``syminitletters`` is the set of valid characters for the initial | ||||
letter of symbols. | ||||
By default, character ``c`` is recognized as valid for initial | ||||
letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``. | ||||
``symletters`` is the set of valid characters for non-initial | ||||
letters of symbols. | ||||
By default, character ``c`` is recognized as valid for non-initial | ||||
letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``. | ||||
Check that @ is a valid unquoted token character (issue3686): | ||||
Yuya Nishihara
|
r34133 | >>> list(tokenize(b"@::")) | ||
Yuya Nishihara
|
r31024 | [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)] | ||
''' | ||||
Yuya Nishihara
|
r37793 | if not isinstance(program, bytes): | ||
raise error.ProgrammingError('revset statement must be bytes, got %r' | ||||
% program) | ||||
Yuya Nishihara
|
r31441 | program = pycompat.bytestr(program) | ||
Yuya Nishihara
|
r31024 | if syminitletters is None: | ||
syminitletters = _syminitletters | ||||
if symletters is None: | ||||
symletters = _symletters | ||||
if program and lookup: | ||||
# attempt to parse old-style ranges first to deal with | ||||
# things like old-tag which contain query metacharacters | ||||
parts = program.split(':', 1) | ||||
if all(lookup(sym) for sym in parts if sym): | ||||
if parts[0]: | ||||
yield ('symbol', parts[0], 0) | ||||
if len(parts) > 1: | ||||
s = len(parts[0]) | ||||
yield (':', None, s) | ||||
if parts[1]: | ||||
yield ('symbol', parts[1], s + 1) | ||||
yield ('end', None, len(program)) | ||||
return | ||||
pos, l = 0, len(program) | ||||
while pos < l: | ||||
Yuya Nishihara
|
r31441 | c = program[pos] | ||
Yuya Nishihara
|
r31024 | if c.isspace(): # skip inter-token whitespace | ||
pass | ||||
elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully | ||||
yield ('::', None, pos) | ||||
pos += 1 # skip ahead | ||||
elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully | ||||
yield ('..', None, pos) | ||||
pos += 1 # skip ahead | ||||
elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully | ||||
yield ('##', None, pos) | ||||
pos += 1 # skip ahead | ||||
Yuya Nishihara
|
r31384 | elif c in _simpleopletters: # handle simple operators | ||
Yuya Nishihara
|
r31024 | yield (c, None, pos) | ||
Yuya Nishihara
|
r31384 | elif (c in _quoteletters or c == 'r' and | ||
Yuya Nishihara
|
r31024 | program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings | ||
if c == 'r': | ||||
pos += 1 | ||||
Yuya Nishihara
|
r31441 | c = program[pos] | ||
Yuya Nishihara
|
r31024 | decode = lambda x: x | ||
else: | ||||
decode = parser.unescapestr | ||||
pos += 1 | ||||
s = pos | ||||
while pos < l: # find closing quote | ||||
Yuya Nishihara
|
r31441 | d = program[pos] | ||
Yuya Nishihara
|
r31024 | if d == '\\': # skip over escaped characters | ||
pos += 2 | ||||
continue | ||||
if d == c: | ||||
yield ('string', decode(program[s:pos]), s) | ||||
break | ||||
pos += 1 | ||||
else: | ||||
raise error.ParseError(_("unterminated string"), s) | ||||
# gather up a symbol/keyword | ||||
elif c in syminitletters: | ||||
s = pos | ||||
pos += 1 | ||||
while pos < l: # find end of symbol | ||||
Yuya Nishihara
|
r31441 | d = program[pos] | ||
Yuya Nishihara
|
r31024 | if d not in symletters: | ||
break | ||||
Yuya Nishihara
|
r31441 | if d == '.' and program[pos - 1] == '.': # special case for .. | ||
Yuya Nishihara
|
r31024 | pos -= 1 | ||
break | ||||
pos += 1 | ||||
sym = program[s:pos] | ||||
if sym in keywords: # operator keywords | ||||
yield (sym, None, s) | ||||
elif '-' in sym: | ||||
# some jerk gave us foo-bar-baz, try to check if it's a symbol | ||||
if lookup and lookup(sym): | ||||
# looks like a real symbol | ||||
yield ('symbol', sym, s) | ||||
else: | ||||
# looks like an expression | ||||
parts = sym.split('-') | ||||
for p in parts[:-1]: | ||||
if p: # possible consecutive - | ||||
yield ('symbol', p, s) | ||||
s += len(p) | ||||
yield ('-', None, pos) | ||||
s += 1 | ||||
if parts[-1]: # possible trailing - | ||||
yield ('symbol', parts[-1], s) | ||||
else: | ||||
yield ('symbol', sym, s) | ||||
pos -= 1 | ||||
else: | ||||
raise error.ParseError(_("syntax error in revset '%s'") % | ||||
program, pos) | ||||
pos += 1 | ||||
yield ('end', None, pos) | ||||
# helpers | ||||
_notset = object() | ||||
def getsymbol(x): | ||||
if x and x[0] == 'symbol': | ||||
return x[1] | ||||
raise error.ParseError(_('not a symbol')) | ||||
def getstring(x, err): | ||||
if x and (x[0] == 'string' or x[0] == 'symbol'): | ||||
return x[1] | ||||
raise error.ParseError(err) | ||||
def getinteger(x, err, default=_notset): | ||||
if not x and default is not _notset: | ||||
return default | ||||
try: | ||||
return int(getstring(x, err)) | ||||
except ValueError: | ||||
raise error.ParseError(err) | ||||
Denis Laxalde
|
r31997 | def getboolean(x, err): | ||
Yuya Nishihara
|
r37102 | value = stringutil.parsebool(getsymbol(x)) | ||
Denis Laxalde
|
r31997 | if value is not None: | ||
return value | ||||
raise error.ParseError(err) | ||||
Yuya Nishihara
|
r31024 | def getlist(x): | ||
if not x: | ||||
return [] | ||||
if x[0] == 'list': | ||||
return list(x[1:]) | ||||
return [x] | ||||
def getrange(x, err): | ||||
if not x: | ||||
raise error.ParseError(err) | ||||
op = x[0] | ||||
if op == 'range': | ||||
return x[1], x[2] | ||||
elif op == 'rangepre': | ||||
return None, x[1] | ||||
elif op == 'rangepost': | ||||
return x[1], None | ||||
elif op == 'rangeall': | ||||
return None, None | ||||
raise error.ParseError(err) | ||||
def getargs(x, min, max, err): | ||||
l = getlist(x) | ||||
if len(l) < min or (max >= 0 and len(l) > max): | ||||
raise error.ParseError(err) | ||||
return l | ||||
def getargsdict(x, funcname, keys): | ||||
return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys), | ||||
keyvaluenode='keyvalue', keynode='symbol') | ||||
Yuya Nishihara
|
r34046 | # cache of {spec: raw parsed tree} built internally | ||
_treecache = {} | ||||
def _cachedtree(spec): | ||||
# thread safe because parse() is reentrant and dict.__setitem__() is atomic | ||||
tree = _treecache.get(spec) | ||||
if tree is None: | ||||
_treecache[spec] = tree = parse(spec) | ||||
return tree | ||||
def _build(tmplspec, *repls): | ||||
"""Create raw parsed tree from a template revset statement | ||||
Yuya Nishihara
|
r34133 | >>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2')) | ||
Yuya Nishihara
|
r34046 | ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2')) | ||
""" | ||||
template = _cachedtree(tmplspec) | ||||
return parser.buildtree(template, ('symbol', '_'), *repls) | ||||
Yuya Nishihara
|
r34048 | def _match(patspec, tree): | ||
"""Test if a tree matches the given pattern statement; return the matches | ||||
Yuya Nishihara
|
r34133 | >>> _match(b'f(_)', parse(b'f()')) | ||
>>> _match(b'f(_)', parse(b'f(1)')) | ||||
Yuya Nishihara
|
r34048 | [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')] | ||
Yuya Nishihara
|
r34133 | >>> _match(b'f(_)', parse(b'f(1, 2)')) | ||
Yuya Nishihara
|
r34048 | """ | ||
pattern = _cachedtree(patspec) | ||||
return parser.matchtree(pattern, tree, ('symbol', '_'), | ||||
{'keyvalue', 'list'}) | ||||
Yuya Nishihara
|
r31024 | def _matchonly(revs, bases): | ||
Yuya Nishihara
|
r34048 | return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases)) | ||
Yuya Nishihara
|
r31024 | |||
def _fixops(x): | ||||
"""Rewrite raw parsed tree to resolve ambiguous syntax which cannot be | ||||
handled well by our simple top-down parser""" | ||||
if not isinstance(x, tuple): | ||||
return x | ||||
op = x[0] | ||||
if op == 'parent': | ||||
# x^:y means (x^) : y, not x ^ (:y) | ||||
# x^: means (x^) :, not x ^ (:) | ||||
post = ('parentpost', x[1]) | ||||
if x[2][0] == 'dagrangepre': | ||||
return _fixops(('dagrange', post, x[2][1])) | ||||
Yuya Nishihara
|
r35556 | elif x[2][0] == 'dagrangeall': | ||
return _fixops(('dagrangepost', post)) | ||||
Yuya Nishihara
|
r31024 | elif x[2][0] == 'rangepre': | ||
return _fixops(('range', post, x[2][1])) | ||||
elif x[2][0] == 'rangeall': | ||||
return _fixops(('rangepost', post)) | ||||
elif op == 'or': | ||||
# make number of arguments deterministic: | ||||
# x + y + z -> (or x y z) -> (or (list x y z)) | ||||
return (op, _fixops(('list',) + x[1:])) | ||||
Yuya Nishihara
|
r33416 | elif op == 'subscript' and x[1][0] == 'relation': | ||
# x#y[z] ternary | ||||
return _fixops(('relsubscript', x[1][1], x[1][2], x[2])) | ||||
Yuya Nishihara
|
r31024 | |||
return (op,) + tuple(_fixops(y) for y in x[1:]) | ||||
Jun Wu
|
r34013 | def _analyze(x): | ||
Yuya Nishihara
|
r31024 | if x is None: | ||
return x | ||||
op = x[0] | ||||
if op == 'minus': | ||||
Yuya Nishihara
|
r34046 | return _analyze(_build('_ and not _', *x[1:])) | ||
Yuya Nishihara
|
r31024 | elif op == 'only': | ||
Yuya Nishihara
|
r34046 | return _analyze(_build('only(_, _)', *x[1:])) | ||
Yuya Nishihara
|
r31024 | elif op == 'onlypost': | ||
Yuya Nishihara
|
r34046 | return _analyze(_build('only(_)', x[1])) | ||
Yuya Nishihara
|
r35556 | elif op == 'dagrangeall': | ||
raise error.ParseError(_("can't use '::' in this context")) | ||||
Yuya Nishihara
|
r31024 | elif op == 'dagrangepre': | ||
Yuya Nishihara
|
r34046 | return _analyze(_build('ancestors(_)', x[1])) | ||
Yuya Nishihara
|
r31024 | elif op == 'dagrangepost': | ||
Yuya Nishihara
|
r34046 | return _analyze(_build('descendants(_)', x[1])) | ||
Yuya Nishihara
|
r31024 | elif op == 'negate': | ||
s = getstring(x[1], _("can't negate that")) | ||||
Jun Wu
|
r34013 | return _analyze(('string', '-' + s)) | ||
Yuya Nishihara
|
r31024 | elif op in ('string', 'symbol'): | ||
return x | ||||
elif op == 'rangeall': | ||||
Jun Wu
|
r34013 | return (op, None) | ||
elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}: | ||||
return (op, _analyze(x[1])) | ||||
Yuya Nishihara
|
r31024 | elif op == 'group': | ||
Jun Wu
|
r34013 | return _analyze(x[1]) | ||
elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation', | ||||
'subscript'}: | ||||
ta = _analyze(x[1]) | ||||
tb = _analyze(x[2]) | ||||
return (op, ta, tb) | ||||
Yuya Nishihara
|
r33416 | elif op == 'relsubscript': | ||
Jun Wu
|
r34013 | ta = _analyze(x[1]) | ||
tb = _analyze(x[2]) | ||||
tc = _analyze(x[3]) | ||||
return (op, ta, tb, tc) | ||||
Yuya Nishihara
|
r31024 | elif op == 'list': | ||
Jun Wu
|
r34013 | return (op,) + tuple(_analyze(y) for y in x[1:]) | ||
Yuya Nishihara
|
r31024 | elif op == 'keyvalue': | ||
Jun Wu
|
r34013 | return (op, x[1], _analyze(x[2])) | ||
Yuya Nishihara
|
r31024 | elif op == 'func': | ||
Boris Feld
|
r37778 | f = getsymbol(x[1]) | ||
if f == 'revset': | ||||
return _analyze(x[2]) | ||||
Jun Wu
|
r34013 | return (op, x[1], _analyze(x[2])) | ||
Yuya Nishihara
|
r31024 | raise ValueError('invalid operator %r' % op) | ||
Jun Wu
|
r34013 | def analyze(x): | ||
Yuya Nishihara
|
r31024 | """Transform raw parsed tree to evaluatable tree which can be fed to | ||
optimize() or getset() | ||||
All pseudo operations should be mapped to real operations or functions | ||||
defined in methods or symbols table respectively. | ||||
""" | ||||
Jun Wu
|
r34013 | return _analyze(x) | ||
Yuya Nishihara
|
r31024 | |||
Jun Wu
|
r34273 | def _optimize(x): | ||
Yuya Nishihara
|
r31024 | if x is None: | ||
return 0, x | ||||
op = x[0] | ||||
if op in ('string', 'symbol'): | ||||
Jun Wu
|
r34273 | return 0.5, x # single revisions are small | ||
Yuya Nishihara
|
r31024 | elif op == 'and': | ||
Jun Wu
|
r34273 | wa, ta = _optimize(x[1]) | ||
wb, tb = _optimize(x[2]) | ||||
Yuya Nishihara
|
r31024 | w = min(wa, wb) | ||
Jun Wu
|
r34067 | # (draft/secret/_notpublic() & ::x) have a fast path | ||
m = _match('_() & ancestors(_)', ('and', ta, tb)) | ||||
if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}: | ||||
return w, _build('_phaseandancestors(_, _)', m[1], m[2]) | ||||
Yuya Nishihara
|
r31024 | # (::x and not ::y)/(not ::y and ::x) have a fast path | ||
Yuya Nishihara
|
r34046 | m = _matchonly(ta, tb) or _matchonly(tb, ta) | ||
if m: | ||||
return w, _build('only(_, _)', *m[1:]) | ||||
Yuya Nishihara
|
r31024 | |||
Yuya Nishihara
|
r34048 | m = _match('not _', tb) | ||
if m: | ||||
return wa, ('difference', ta, m[1]) | ||||
Yuya Nishihara
|
r31024 | if wa > wb: | ||
Jun Wu
|
r34022 | op = 'andsmally' | ||
Jun Wu
|
r34013 | return w, (op, ta, tb) | ||
Yuya Nishihara
|
r31024 | elif op == 'or': | ||
# fast path for machine-generated expression, that is likely to have | ||||
# lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()' | ||||
ws, ts, ss = [], [], [] | ||||
def flushss(): | ||||
if not ss: | ||||
return | ||||
if len(ss) == 1: | ||||
w, t = ss[0] | ||||
else: | ||||
s = '\0'.join(t[1] for w, t in ss) | ||||
Yuya Nishihara
|
r34046 | y = _build('_list(_)', ('string', s)) | ||
Jun Wu
|
r34273 | w, t = _optimize(y) | ||
Yuya Nishihara
|
r31024 | ws.append(w) | ||
ts.append(t) | ||||
del ss[:] | ||||
for y in getlist(x[1]): | ||||
Jun Wu
|
r34273 | w, t = _optimize(y) | ||
Yuya Nishihara
|
r31024 | if t is not None and (t[0] == 'string' or t[0] == 'symbol'): | ||
ss.append((w, t)) | ||||
continue | ||||
flushss() | ||||
ws.append(w) | ||||
ts.append(t) | ||||
flushss() | ||||
if len(ts) == 1: | ||||
return ws[0], ts[0] # 'or' operation is fully optimized out | ||||
Jun Wu
|
r34013 | return max(ws), (op, ('list',) + tuple(ts)) | ||
Yuya Nishihara
|
r31024 | elif op == 'not': | ||
# Optimize not public() to _notpublic() because we have a fast version | ||||
Yuya Nishihara
|
r34048 | if _match('public()', x[1]): | ||
Jun Wu
|
r34273 | o = _optimize(_build('_notpublic()')) | ||
Yuya Nishihara
|
r31024 | return o[0], o[1] | ||
else: | ||||
Jun Wu
|
r34273 | o = _optimize(x[1]) | ||
Jun Wu
|
r34013 | return o[0], (op, o[1]) | ||
Yuya Nishihara
|
r31024 | elif op == 'rangeall': | ||
Jun Wu
|
r34273 | return 1, x | ||
Yuya Nishihara
|
r31024 | elif op in ('rangepre', 'rangepost', 'parentpost'): | ||
Jun Wu
|
r34273 | o = _optimize(x[1]) | ||
Jun Wu
|
r34013 | return o[0], (op, o[1]) | ||
Yuya Nishihara
|
r33415 | elif op in ('dagrange', 'range'): | ||
Jun Wu
|
r34273 | wa, ta = _optimize(x[1]) | ||
wb, tb = _optimize(x[2]) | ||||
Jun Wu
|
r34013 | return wa + wb, (op, ta, tb) | ||
Yuya Nishihara
|
r33416 | elif op in ('parent', 'ancestor', 'relation', 'subscript'): | ||
Jun Wu
|
r34273 | w, t = _optimize(x[1]) | ||
Jun Wu
|
r34013 | return w, (op, t, x[2]) | ||
Yuya Nishihara
|
r33416 | elif op == 'relsubscript': | ||
Jun Wu
|
r34273 | w, t = _optimize(x[1]) | ||
Jun Wu
|
r34013 | return w, (op, t, x[2], x[3]) | ||
Yuya Nishihara
|
r31024 | elif op == 'list': | ||
Jun Wu
|
r34273 | ws, ts = zip(*(_optimize(y) for y in x[1:])) | ||
Yuya Nishihara
|
r31024 | return sum(ws), (op,) + ts | ||
elif op == 'keyvalue': | ||||
Jun Wu
|
r34273 | w, t = _optimize(x[2]) | ||
Yuya Nishihara
|
r31024 | return w, (op, x[1], t) | ||
elif op == 'func': | ||||
f = getsymbol(x[1]) | ||||
Jun Wu
|
r34273 | wa, ta = _optimize(x[2]) | ||
Jun Wu
|
r34274 | w = getattr(symbols.get(f), '_weight', 1) | ||
Sean Farley
|
r38644 | m = _match('commonancestors(_)', ta) | ||
# Optimize heads(commonancestors(_)) because we have a fast version | ||||
if f == 'heads' and m: | ||||
return w + wa, _build('_commonancestorheads(_)', m[1]) | ||||
Jun Wu
|
r34013 | return w + wa, (op, x[1], ta) | ||
Yuya Nishihara
|
r31024 | raise ValueError('invalid operator %r' % op) | ||
def optimize(tree): | ||||
"""Optimize evaluatable tree | ||||
All pseudo operations should be transformed beforehand. | ||||
""" | ||||
Jun Wu
|
r34273 | _weight, newtree = _optimize(tree) | ||
Yuya Nishihara
|
r31024 | return newtree | ||
# the set of valid characters for the initial letter of symbols in | ||||
# alias declarations and definitions | ||||
Yuya Nishihara
|
r34071 | _aliassyminitletters = _syminitletters | {'$'} | ||
Yuya Nishihara
|
r31024 | |||
def _parsewith(spec, lookup=None, syminitletters=None): | ||||
"""Generate a parse tree of given spec with given tokenizing options | ||||
Yuya Nishihara
|
r34133 | >>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters) | ||
Yuya Nishihara
|
r31024 | ('func', ('symbol', 'foo'), ('symbol', '$1')) | ||
Yuya Nishihara
|
r34133 | >>> _parsewith(b'$1') | ||
Yuya Nishihara
|
r31024 | Traceback (most recent call last): | ||
... | ||||
ParseError: ("syntax error in revset '$1'", 0) | ||||
Yuya Nishihara
|
r34133 | >>> _parsewith(b'foo bar') | ||
Yuya Nishihara
|
r31024 | Traceback (most recent call last): | ||
... | ||||
ParseError: ('invalid token', 4) | ||||
""" | ||||
Boris Feld
|
r37778 | if lookup and spec.startswith('revset(') and spec.endswith(')'): | ||
lookup = None | ||||
Yuya Nishihara
|
r31024 | p = parser.parser(elements) | ||
tree, pos = p.parse(tokenize(spec, lookup=lookup, | ||||
syminitletters=syminitletters)) | ||||
if pos != len(spec): | ||||
raise error.ParseError(_('invalid token'), pos) | ||||
return _fixops(parser.simplifyinfixops(tree, ('list', 'or'))) | ||||
class _aliasrules(parser.basealiasrules): | ||||
"""Parsing and expansion rule set of revset aliases""" | ||||
_section = _('revset alias') | ||||
@staticmethod | ||||
def _parse(spec): | ||||
"""Parse alias declaration/definition ``spec`` | ||||
This allows symbol names to use also ``$`` as an initial letter | ||||
(for backward compatibility), and callers of this function should | ||||
examine whether ``$`` is used also for unexpected symbols or not. | ||||
""" | ||||
return _parsewith(spec, syminitletters=_aliassyminitletters) | ||||
@staticmethod | ||||
def _trygetfunc(tree): | ||||
if tree[0] == 'func' and tree[1][0] == 'symbol': | ||||
return tree[1][1], getlist(tree[2]) | ||||
Jun Wu
|
r33336 | def expandaliases(tree, aliases, warn=None): | ||
"""Expand aliases in a tree, aliases is a list of (name, value) tuples""" | ||||
aliases = _aliasrules.buildmap(aliases) | ||||
Yuya Nishihara
|
r31024 | tree = _aliasrules.expand(aliases, tree) | ||
# warn about problematic (but not referred) aliases | ||||
Jun Wu
|
r33336 | if warn is not None: | ||
for name, alias in sorted(aliases.iteritems()): | ||||
if alias.error and not alias.warned: | ||||
warn(_('warning: %s\n') % (alias.error)) | ||||
alias.warned = True | ||||
Yuya Nishihara
|
r31024 | return tree | ||
def foldconcat(tree): | ||||
"""Fold elements to be concatenated by `##` | ||||
""" | ||||
if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'): | ||||
return tree | ||||
if tree[0] == '_concat': | ||||
pending = [tree] | ||||
l = [] | ||||
while pending: | ||||
e = pending.pop() | ||||
if e[0] == '_concat': | ||||
pending.extend(reversed(e[1:])) | ||||
elif e[0] in ('string', 'symbol'): | ||||
l.append(e[1]) | ||||
else: | ||||
msg = _("\"##\" can't concatenate \"%s\" element") % (e[0]) | ||||
raise error.ParseError(msg) | ||||
return ('string', ''.join(l)) | ||||
else: | ||||
return tuple(foldconcat(t) for t in tree) | ||||
def parse(spec, lookup=None): | ||||
Ryan McElroy
|
r36703 | try: | ||
return _parsewith(spec, lookup=lookup) | ||||
except error.ParseError as inst: | ||||
if len(inst.args) > 1: # has location | ||||
Yuya Nishihara
|
r36709 | loc = inst.args[1] | ||
Ryan McElroy
|
r36703 | # Remove newlines -- spaces are equivalent whitespace. | ||
spec = spec.replace('\n', ' ') | ||||
# We want the caret to point to the place in the template that | ||||
# failed to parse, but in a hint we get a open paren at the | ||||
# start. Therefore, we print "loc + 1" spaces (instead of "loc") | ||||
# to line up the caret with the location of the error. | ||||
Yuya Nishihara
|
r36709 | inst.hint = spec + '\n' + ' ' * (loc + 1) + '^ ' + _('here') | ||
Ryan McElroy
|
r36703 | raise | ||
Yuya Nishihara
|
r31024 | |||
Augie Fackler
|
r31604 | def _quote(s): | ||
Augie Fackler
|
r31605 | r"""Quote a value in order to make it safe for the revset engine. | ||
Yuya Nishihara
|
r34133 | >>> _quote(b'asdf') | ||
Augie Fackler
|
r31605 | "'asdf'" | ||
Yuya Nishihara
|
r34133 | >>> _quote(b"asdf'\"") | ||
Augie Fackler
|
r31605 | '\'asdf\\\'"\'' | ||
Yuya Nishihara
|
r34133 | >>> _quote(b'asdf\'') | ||
Augie Fackler
|
r31606 | "'asdf\\''" | ||
Augie Fackler
|
r31605 | >>> _quote(1) | ||
"'1'" | ||||
""" | ||||
Yuya Nishihara
|
r37102 | return "'%s'" % stringutil.escapestr(pycompat.bytestr(s)) | ||
Augie Fackler
|
r31604 | |||
Yuya Nishihara
|
r35614 | def _formatargtype(c, arg): | ||
if c == 'd': | ||||
return '%d' % int(arg) | ||||
elif c == 's': | ||||
return _quote(arg) | ||||
elif c == 'r': | ||||
Yuya Nishihara
|
r37793 | if not isinstance(arg, bytes): | ||
raise TypeError | ||||
Yuya Nishihara
|
r35614 | parse(arg) # make sure syntax errors are confined | ||
return '(%s)' % arg | ||||
elif c == 'n': | ||||
return _quote(node.hex(arg)) | ||||
elif c == 'b': | ||||
try: | ||||
return _quote(arg.branch()) | ||||
except AttributeError: | ||||
raise TypeError | ||||
raise error.ParseError(_('unexpected revspec format character %s') % c) | ||||
def _formatlistexp(s, t): | ||||
l = len(s) | ||||
if l == 0: | ||||
return "_list('')" | ||||
elif l == 1: | ||||
return _formatargtype(t, s[0]) | ||||
elif t == 'd': | ||||
return "_intlist('%s')" % "\0".join('%d' % int(a) for a in s) | ||||
elif t == 's': | ||||
return "_list(%s)" % _quote("\0".join(s)) | ||||
elif t == 'n': | ||||
return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s) | ||||
elif t == 'b': | ||||
try: | ||||
return "_list('%s')" % "\0".join(a.branch() for a in s) | ||||
except AttributeError: | ||||
raise TypeError | ||||
m = l // 2 | ||||
return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t)) | ||||
Yuya Nishihara
|
r35615 | def _formatparamexp(args, t): | ||
return ', '.join(_formatargtype(t, a) for a in args) | ||||
_formatlistfuncs = { | ||||
'l': _formatlistexp, | ||||
'p': _formatparamexp, | ||||
} | ||||
Yuya Nishihara
|
r31024 | def formatspec(expr, *args): | ||
''' | ||||
This is a convenience function for using revsets internally, and | ||||
escapes arguments appropriately. Aliases are intentionally ignored | ||||
so that intended expression behavior isn't accidentally subverted. | ||||
Supported arguments: | ||||
%r = revset expression, parenthesized | ||||
%d = int(arg), no quoting | ||||
%s = string(arg), escaped and single-quoted | ||||
%b = arg.branch(), escaped and single-quoted | ||||
%n = hex(arg), single-quoted | ||||
%% = a literal '%' | ||||
Yuya Nishihara
|
r35615 | Prefixing the type with 'l' specifies a parenthesized list of that type, | ||
and 'p' specifies a list of function parameters of that type. | ||||
Yuya Nishihara
|
r31024 | |||
Yuya Nishihara
|
r34133 | >>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()")) | ||
Yuya Nishihara
|
r31024 | '(10 or 11):: and ((this()) or (that()))' | ||
Yuya Nishihara
|
r34133 | >>> formatspec(b'%d:: and not %d::', 10, 20) | ||
Yuya Nishihara
|
r31024 | '10:: and not 20::' | ||
Yuya Nishihara
|
r34133 | >>> formatspec(b'%ld or %ld', [], [1]) | ||
Yuya Nishihara
|
r31024 | "_list('') or 1" | ||
Yuya Nishihara
|
r34133 | >>> formatspec(b'keyword(%s)', b'foo\\xe9') | ||
Yuya Nishihara
|
r31024 | "keyword('foo\\\\xe9')" | ||
Yuya Nishihara
|
r34133 | >>> b = lambda: b'default' | ||
Yuya Nishihara
|
r31024 | >>> b.branch = b | ||
Yuya Nishihara
|
r34133 | >>> formatspec(b'branch(%b)', b) | ||
Yuya Nishihara
|
r31024 | "branch('default')" | ||
Yuya Nishihara
|
r34133 | >>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd']) | ||
Yuya Nishihara
|
r35613 | "root(_list('a\\\\x00b\\\\x00c\\\\x00d'))" | ||
Yuya Nishihara
|
r35615 | >>> formatspec(b'sort(%r, %ps)', b':', [b'desc', b'user']) | ||
"sort((:), 'desc', 'user')" | ||||
Augie Fackler
|
r35840 | >>> formatspec(b'%ls', [b'a', b"'"]) | ||
Yuya Nishihara
|
r35613 | "_list('a\\\\x00\\\\'')" | ||
Yuya Nishihara
|
r31024 | ''' | ||
Yuya Nishihara
|
r31440 | expr = pycompat.bytestr(expr) | ||
Yuya Nishihara
|
r35574 | argiter = iter(args) | ||
Yuya Nishihara
|
r35571 | ret = [] | ||
Yuya Nishihara
|
r31024 | pos = 0 | ||
while pos < len(expr): | ||||
Yuya Nishihara
|
r35572 | q = expr.find('%', pos) | ||
if q < 0: | ||||
ret.append(expr[pos:]) | ||||
break | ||||
ret.append(expr[pos:q]) | ||||
pos = q + 1 | ||||
Yuya Nishihara
|
r35611 | try: | ||
d = expr[pos] | ||||
except IndexError: | ||||
raise error.ParseError(_('incomplete revspec format character')) | ||||
Yuya Nishihara
|
r35573 | if d == '%': | ||
ret.append(d) | ||||
Yuya Nishihara
|
r35610 | pos += 1 | ||
continue | ||||
try: | ||||
arg = next(argiter) | ||||
except StopIteration: | ||||
raise error.ParseError(_('missing argument for revspec')) | ||||
Yuya Nishihara
|
r35615 | f = _formatlistfuncs.get(d) | ||
if f: | ||||
Yuya Nishihara
|
r35573 | # a list of some type | ||
pos += 1 | ||||
Yuya Nishihara
|
r35611 | try: | ||
d = expr[pos] | ||||
except IndexError: | ||||
raise error.ParseError(_('incomplete revspec format character')) | ||||
Yuya Nishihara
|
r35612 | try: | ||
Yuya Nishihara
|
r35615 | ret.append(f(list(arg), d)) | ||
Yuya Nishihara
|
r35612 | except (TypeError, ValueError): | ||
raise error.ParseError(_('invalid argument for revspec')) | ||||
Yuya Nishihara
|
r35573 | else: | ||
Yuya Nishihara
|
r35612 | try: | ||
Yuya Nishihara
|
r35614 | ret.append(_formatargtype(d, arg)) | ||
Yuya Nishihara
|
r35612 | except (TypeError, ValueError): | ||
raise error.ParseError(_('invalid argument for revspec')) | ||||
Yuya Nishihara
|
r31024 | pos += 1 | ||
Yuya Nishihara
|
r35610 | try: | ||
next(argiter) | ||||
raise error.ParseError(_('too many revspec arguments specified')) | ||||
except StopIteration: | ||||
pass | ||||
Yuya Nishihara
|
r35571 | return ''.join(ret) | ||
Yuya Nishihara
|
r31024 | |||
def prettyformat(tree): | ||||
return parser.prettyformat(tree, ('string', 'symbol')) | ||||
def depth(tree): | ||||
if isinstance(tree, tuple): | ||||
return max(map(depth, tree)) + 1 | ||||
else: | ||||
return 0 | ||||
def funcsused(tree): | ||||
if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'): | ||||
return set() | ||||
else: | ||||
funcs = set() | ||||
for s in tree[1:]: | ||||
funcs |= funcsused(s) | ||||
if tree[0] == 'func': | ||||
funcs.add(tree[1][1]) | ||||
return funcs | ||||
Pulkit Goyal
|
r35510 | |||
_hashre = util.re.compile('[0-9a-fA-F]{1,40}$') | ||||
def _ishashlikesymbol(symbol): | ||||
"""returns true if the symbol looks like a hash""" | ||||
return _hashre.match(symbol) | ||||
def gethashlikesymbols(tree): | ||||
"""returns the list of symbols of the tree that look like hashes | ||||
Yuya Nishihara
|
r35900 | >>> gethashlikesymbols(parse(b'3::abe3ff')) | ||
Pulkit Goyal
|
r35510 | ['3', 'abe3ff'] | ||
Yuya Nishihara
|
r35900 | >>> gethashlikesymbols(parse(b'precursors(.)')) | ||
Pulkit Goyal
|
r35510 | [] | ||
Yuya Nishihara
|
r35900 | >>> gethashlikesymbols(parse(b'precursors(34)')) | ||
Pulkit Goyal
|
r35510 | ['34'] | ||
Yuya Nishihara
|
r35900 | >>> gethashlikesymbols(parse(b'abe3ffZ')) | ||
Pulkit Goyal
|
r35510 | [] | ||
""" | ||||
if not tree: | ||||
return [] | ||||
if tree[0] == "symbol": | ||||
if _ishashlikesymbol(tree[1]): | ||||
return [tree[1]] | ||||
elif len(tree) >= 3: | ||||
results = [] | ||||
for subtree in tree[1:]: | ||||
results += gethashlikesymbols(subtree) | ||||
return results | ||||
return [] | ||||