|
|
# revsetlang.py - parser, tokenizer and utility for revision set language
|
|
|
#
|
|
|
# Copyright 2010 Matt Mackall <mpm@selenic.com>
|
|
|
#
|
|
|
# This software may be used and distributed according to the terms of the
|
|
|
# GNU General Public License version 2 or any later version.
|
|
|
|
|
|
from __future__ import absolute_import
|
|
|
|
|
|
import string
|
|
|
|
|
|
from .i18n import _
|
|
|
from . import (
|
|
|
error,
|
|
|
node,
|
|
|
parser,
|
|
|
pycompat,
|
|
|
smartset,
|
|
|
util,
|
|
|
)
|
|
|
from .utils import (
|
|
|
stringutil,
|
|
|
)
|
|
|
|
|
|
elements = {
|
|
|
# token-type: binding-strength, primary, prefix, infix, suffix
|
|
|
"(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
|
|
|
"[": (21, None, None, ("subscript", 1, "]"), None),
|
|
|
"#": (21, None, None, ("relation", 21), None),
|
|
|
"##": (20, None, None, ("_concat", 20), None),
|
|
|
"~": (18, None, None, ("ancestor", 18), None),
|
|
|
"^": (18, None, None, ("parent", 18), "parentpost"),
|
|
|
"-": (5, None, ("negate", 19), ("minus", 5), None),
|
|
|
"::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
|
|
|
"dagrangepost"),
|
|
|
"..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
|
|
|
"dagrangepost"),
|
|
|
":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
|
|
|
"not": (10, None, ("not", 10), None, None),
|
|
|
"!": (10, None, ("not", 10), None, None),
|
|
|
"and": (5, None, None, ("and", 5), None),
|
|
|
"&": (5, None, None, ("and", 5), None),
|
|
|
"%": (5, None, None, ("only", 5), "onlypost"),
|
|
|
"or": (4, None, None, ("or", 4), None),
|
|
|
"|": (4, None, None, ("or", 4), None),
|
|
|
"+": (4, None, None, ("or", 4), None),
|
|
|
"=": (3, None, None, ("keyvalue", 3), None),
|
|
|
",": (2, None, None, ("list", 2), None),
|
|
|
")": (0, None, None, None, None),
|
|
|
"]": (0, None, None, None, None),
|
|
|
"symbol": (0, "symbol", None, None, None),
|
|
|
"string": (0, "string", None, None, None),
|
|
|
"end": (0, None, None, None, None),
|
|
|
}
|
|
|
|
|
|
keywords = {'and', 'or', 'not'}
|
|
|
|
|
|
symbols = {}
|
|
|
|
|
|
_quoteletters = {'"', "'"}
|
|
|
_simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
|
|
|
|
|
|
# default set of valid characters for the initial letter of symbols
|
|
|
_syminitletters = set(pycompat.iterbytestr(
|
|
|
string.ascii_letters.encode('ascii') +
|
|
|
string.digits.encode('ascii') +
|
|
|
'._@')) | set(map(pycompat.bytechr, pycompat.xrange(128, 256)))
|
|
|
|
|
|
# default set of valid characters for non-initial letters of symbols
|
|
|
_symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
|
|
|
|
|
|
def tokenize(program, lookup=None, syminitletters=None, symletters=None):
|
|
|
'''
|
|
|
Parse a revset statement into a stream of tokens
|
|
|
|
|
|
``syminitletters`` is the set of valid characters for the initial
|
|
|
letter of symbols.
|
|
|
|
|
|
By default, character ``c`` is recognized as valid for initial
|
|
|
letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
|
|
|
|
|
|
``symletters`` is the set of valid characters for non-initial
|
|
|
letters of symbols.
|
|
|
|
|
|
By default, character ``c`` is recognized as valid for non-initial
|
|
|
letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
|
|
|
|
|
|
Check that @ is a valid unquoted token character (issue3686):
|
|
|
>>> list(tokenize(b"@::"))
|
|
|
[('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
|
|
|
|
|
|
'''
|
|
|
if not isinstance(program, bytes):
|
|
|
raise error.ProgrammingError('revset statement must be bytes, got %r'
|
|
|
% program)
|
|
|
program = pycompat.bytestr(program)
|
|
|
if syminitletters is None:
|
|
|
syminitletters = _syminitletters
|
|
|
if symletters is None:
|
|
|
symletters = _symletters
|
|
|
|
|
|
if program and lookup:
|
|
|
# attempt to parse old-style ranges first to deal with
|
|
|
# things like old-tag which contain query metacharacters
|
|
|
parts = program.split(':', 1)
|
|
|
if all(lookup(sym) for sym in parts if sym):
|
|
|
if parts[0]:
|
|
|
yield ('symbol', parts[0], 0)
|
|
|
if len(parts) > 1:
|
|
|
s = len(parts[0])
|
|
|
yield (':', None, s)
|
|
|
if parts[1]:
|
|
|
yield ('symbol', parts[1], s + 1)
|
|
|
yield ('end', None, len(program))
|
|
|
return
|
|
|
|
|
|
pos, l = 0, len(program)
|
|
|
while pos < l:
|
|
|
c = program[pos]
|
|
|
if c.isspace(): # skip inter-token whitespace
|
|
|
pass
|
|
|
elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
|
|
|
yield ('::', None, pos)
|
|
|
pos += 1 # skip ahead
|
|
|
elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
|
|
|
yield ('..', None, pos)
|
|
|
pos += 1 # skip ahead
|
|
|
elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
|
|
|
yield ('##', None, pos)
|
|
|
pos += 1 # skip ahead
|
|
|
elif c in _simpleopletters: # handle simple operators
|
|
|
yield (c, None, pos)
|
|
|
elif (c in _quoteletters or c == 'r' and
|
|
|
program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
|
|
|
if c == 'r':
|
|
|
pos += 1
|
|
|
c = program[pos]
|
|
|
decode = lambda x: x
|
|
|
else:
|
|
|
decode = parser.unescapestr
|
|
|
pos += 1
|
|
|
s = pos
|
|
|
while pos < l: # find closing quote
|
|
|
d = program[pos]
|
|
|
if d == '\\': # skip over escaped characters
|
|
|
pos += 2
|
|
|
continue
|
|
|
if d == c:
|
|
|
yield ('string', decode(program[s:pos]), s)
|
|
|
break
|
|
|
pos += 1
|
|
|
else:
|
|
|
raise error.ParseError(_("unterminated string"), s)
|
|
|
# gather up a symbol/keyword
|
|
|
elif c in syminitletters:
|
|
|
s = pos
|
|
|
pos += 1
|
|
|
while pos < l: # find end of symbol
|
|
|
d = program[pos]
|
|
|
if d not in symletters:
|
|
|
break
|
|
|
if d == '.' and program[pos - 1] == '.': # special case for ..
|
|
|
pos -= 1
|
|
|
break
|
|
|
pos += 1
|
|
|
sym = program[s:pos]
|
|
|
if sym in keywords: # operator keywords
|
|
|
yield (sym, None, s)
|
|
|
elif '-' in sym:
|
|
|
# some jerk gave us foo-bar-baz, try to check if it's a symbol
|
|
|
if lookup and lookup(sym):
|
|
|
# looks like a real symbol
|
|
|
yield ('symbol', sym, s)
|
|
|
else:
|
|
|
# looks like an expression
|
|
|
parts = sym.split('-')
|
|
|
for p in parts[:-1]:
|
|
|
if p: # possible consecutive -
|
|
|
yield ('symbol', p, s)
|
|
|
s += len(p)
|
|
|
yield ('-', None, s)
|
|
|
s += 1
|
|
|
if parts[-1]: # possible trailing -
|
|
|
yield ('symbol', parts[-1], s)
|
|
|
else:
|
|
|
yield ('symbol', sym, s)
|
|
|
pos -= 1
|
|
|
else:
|
|
|
raise error.ParseError(_("syntax error in revset '%s'") %
|
|
|
program, pos)
|
|
|
pos += 1
|
|
|
yield ('end', None, pos)
|
|
|
|
|
|
# helpers
|
|
|
|
|
|
_notset = object()
|
|
|
|
|
|
def getsymbol(x):
|
|
|
if x and x[0] == 'symbol':
|
|
|
return x[1]
|
|
|
raise error.ParseError(_('not a symbol'))
|
|
|
|
|
|
def getstring(x, err):
|
|
|
if x and (x[0] == 'string' or x[0] == 'symbol'):
|
|
|
return x[1]
|
|
|
raise error.ParseError(err)
|
|
|
|
|
|
def getinteger(x, err, default=_notset):
|
|
|
if not x and default is not _notset:
|
|
|
return default
|
|
|
try:
|
|
|
return int(getstring(x, err))
|
|
|
except ValueError:
|
|
|
raise error.ParseError(err)
|
|
|
|
|
|
def getboolean(x, err):
|
|
|
value = stringutil.parsebool(getsymbol(x))
|
|
|
if value is not None:
|
|
|
return value
|
|
|
raise error.ParseError(err)
|
|
|
|
|
|
def getlist(x):
|
|
|
if not x:
|
|
|
return []
|
|
|
if x[0] == 'list':
|
|
|
return list(x[1:])
|
|
|
return [x]
|
|
|
|
|
|
def getrange(x, err):
|
|
|
if not x:
|
|
|
raise error.ParseError(err)
|
|
|
op = x[0]
|
|
|
if op == 'range':
|
|
|
return x[1], x[2]
|
|
|
elif op == 'rangepre':
|
|
|
return None, x[1]
|
|
|
elif op == 'rangepost':
|
|
|
return x[1], None
|
|
|
elif op == 'rangeall':
|
|
|
return None, None
|
|
|
raise error.ParseError(err)
|
|
|
|
|
|
def getargs(x, min, max, err):
|
|
|
l = getlist(x)
|
|
|
if len(l) < min or (max >= 0 and len(l) > max):
|
|
|
raise error.ParseError(err)
|
|
|
return l
|
|
|
|
|
|
def getargsdict(x, funcname, keys):
|
|
|
return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
|
|
|
keyvaluenode='keyvalue', keynode='symbol')
|
|
|
|
|
|
# cache of {spec: raw parsed tree} built internally
|
|
|
_treecache = {}
|
|
|
|
|
|
def _cachedtree(spec):
|
|
|
# thread safe because parse() is reentrant and dict.__setitem__() is atomic
|
|
|
tree = _treecache.get(spec)
|
|
|
if tree is None:
|
|
|
_treecache[spec] = tree = parse(spec)
|
|
|
return tree
|
|
|
|
|
|
def _build(tmplspec, *repls):
|
|
|
"""Create raw parsed tree from a template revset statement
|
|
|
|
|
|
>>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))
|
|
|
('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
|
|
|
"""
|
|
|
template = _cachedtree(tmplspec)
|
|
|
return parser.buildtree(template, ('symbol', '_'), *repls)
|
|
|
|
|
|
def _match(patspec, tree):
|
|
|
"""Test if a tree matches the given pattern statement; return the matches
|
|
|
|
|
|
>>> _match(b'f(_)', parse(b'f()'))
|
|
|
>>> _match(b'f(_)', parse(b'f(1)'))
|
|
|
[('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]
|
|
|
>>> _match(b'f(_)', parse(b'f(1, 2)'))
|
|
|
"""
|
|
|
pattern = _cachedtree(patspec)
|
|
|
return parser.matchtree(pattern, tree, ('symbol', '_'),
|
|
|
{'keyvalue', 'list'})
|
|
|
|
|
|
def _matchonly(revs, bases):
|
|
|
return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))
|
|
|
|
|
|
def _fixops(x):
|
|
|
"""Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
|
|
|
handled well by our simple top-down parser"""
|
|
|
if not isinstance(x, tuple):
|
|
|
return x
|
|
|
|
|
|
op = x[0]
|
|
|
if op == 'parent':
|
|
|
# x^:y means (x^) : y, not x ^ (:y)
|
|
|
# x^: means (x^) :, not x ^ (:)
|
|
|
post = ('parentpost', x[1])
|
|
|
if x[2][0] == 'dagrangepre':
|
|
|
return _fixops(('dagrange', post, x[2][1]))
|
|
|
elif x[2][0] == 'dagrangeall':
|
|
|
return _fixops(('dagrangepost', post))
|
|
|
elif x[2][0] == 'rangepre':
|
|
|
return _fixops(('range', post, x[2][1]))
|
|
|
elif x[2][0] == 'rangeall':
|
|
|
return _fixops(('rangepost', post))
|
|
|
elif op == 'or':
|
|
|
# make number of arguments deterministic:
|
|
|
# x + y + z -> (or x y z) -> (or (list x y z))
|
|
|
return (op, _fixops(('list',) + x[1:]))
|
|
|
elif op == 'subscript' and x[1][0] == 'relation':
|
|
|
# x#y[z] ternary
|
|
|
return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
|
|
|
|
|
|
return (op,) + tuple(_fixops(y) for y in x[1:])
|
|
|
|
|
|
def _analyze(x):
|
|
|
if x is None:
|
|
|
return x
|
|
|
|
|
|
op = x[0]
|
|
|
if op == 'minus':
|
|
|
return _analyze(_build('_ and not _', *x[1:]))
|
|
|
elif op == 'only':
|
|
|
return _analyze(_build('only(_, _)', *x[1:]))
|
|
|
elif op == 'onlypost':
|
|
|
return _analyze(_build('only(_)', x[1]))
|
|
|
elif op == 'dagrangeall':
|
|
|
raise error.ParseError(_("can't use '::' in this context"))
|
|
|
elif op == 'dagrangepre':
|
|
|
return _analyze(_build('ancestors(_)', x[1]))
|
|
|
elif op == 'dagrangepost':
|
|
|
return _analyze(_build('descendants(_)', x[1]))
|
|
|
elif op == 'negate':
|
|
|
s = getstring(x[1], _("can't negate that"))
|
|
|
return _analyze(('string', '-' + s))
|
|
|
elif op in ('string', 'symbol', 'smartset'):
|
|
|
return x
|
|
|
elif op == 'rangeall':
|
|
|
return (op, None)
|
|
|
elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
|
|
|
return (op, _analyze(x[1]))
|
|
|
elif op == 'group':
|
|
|
return _analyze(x[1])
|
|
|
elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
|
|
|
'subscript'}:
|
|
|
ta = _analyze(x[1])
|
|
|
tb = _analyze(x[2])
|
|
|
return (op, ta, tb)
|
|
|
elif op == 'relsubscript':
|
|
|
ta = _analyze(x[1])
|
|
|
tb = _analyze(x[2])
|
|
|
tc = _analyze(x[3])
|
|
|
return (op, ta, tb, tc)
|
|
|
elif op == 'list':
|
|
|
return (op,) + tuple(_analyze(y) for y in x[1:])
|
|
|
elif op == 'keyvalue':
|
|
|
return (op, x[1], _analyze(x[2]))
|
|
|
elif op == 'func':
|
|
|
return (op, x[1], _analyze(x[2]))
|
|
|
raise ValueError('invalid operator %r' % op)
|
|
|
|
|
|
def analyze(x):
|
|
|
"""Transform raw parsed tree to evaluatable tree which can be fed to
|
|
|
optimize() or getset()
|
|
|
|
|
|
All pseudo operations should be mapped to real operations or functions
|
|
|
defined in methods or symbols table respectively.
|
|
|
"""
|
|
|
return _analyze(x)
|
|
|
|
|
|
def _optimize(x):
|
|
|
if x is None:
|
|
|
return 0, x
|
|
|
|
|
|
op = x[0]
|
|
|
if op in ('string', 'symbol', 'smartset'):
|
|
|
return 0.5, x # single revisions are small
|
|
|
elif op == 'and':
|
|
|
wa, ta = _optimize(x[1])
|
|
|
wb, tb = _optimize(x[2])
|
|
|
w = min(wa, wb)
|
|
|
|
|
|
# (draft/secret/_notpublic() & ::x) have a fast path
|
|
|
m = _match('_() & ancestors(_)', ('and', ta, tb))
|
|
|
if m and getsymbol(m[1]) in {'draft', 'secret', '_notpublic'}:
|
|
|
return w, _build('_phaseandancestors(_, _)', m[1], m[2])
|
|
|
|
|
|
# (::x and not ::y)/(not ::y and ::x) have a fast path
|
|
|
m = _matchonly(ta, tb) or _matchonly(tb, ta)
|
|
|
if m:
|
|
|
return w, _build('only(_, _)', *m[1:])
|
|
|
|
|
|
m = _match('not _', tb)
|
|
|
if m:
|
|
|
return wa, ('difference', ta, m[1])
|
|
|
if wa > wb:
|
|
|
op = 'andsmally'
|
|
|
return w, (op, ta, tb)
|
|
|
elif op == 'or':
|
|
|
# fast path for machine-generated expression, that is likely to have
|
|
|
# lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
|
|
|
ws, ts, ss = [], [], []
|
|
|
def flushss():
|
|
|
if not ss:
|
|
|
return
|
|
|
if len(ss) == 1:
|
|
|
w, t = ss[0]
|
|
|
else:
|
|
|
s = '\0'.join(t[1] for w, t in ss)
|
|
|
y = _build('_list(_)', ('string', s))
|
|
|
w, t = _optimize(y)
|
|
|
ws.append(w)
|
|
|
ts.append(t)
|
|
|
del ss[:]
|
|
|
for y in getlist(x[1]):
|
|
|
w, t = _optimize(y)
|
|
|
if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
|
|
|
ss.append((w, t))
|
|
|
continue
|
|
|
flushss()
|
|
|
ws.append(w)
|
|
|
ts.append(t)
|
|
|
flushss()
|
|
|
if len(ts) == 1:
|
|
|
return ws[0], ts[0] # 'or' operation is fully optimized out
|
|
|
return max(ws), (op, ('list',) + tuple(ts))
|
|
|
elif op == 'not':
|
|
|
# Optimize not public() to _notpublic() because we have a fast version
|
|
|
if _match('public()', x[1]):
|
|
|
o = _optimize(_build('_notpublic()'))
|
|
|
return o[0], o[1]
|
|
|
else:
|
|
|
o = _optimize(x[1])
|
|
|
return o[0], (op, o[1])
|
|
|
elif op == 'rangeall':
|
|
|
return 1, x
|
|
|
elif op in ('rangepre', 'rangepost', 'parentpost'):
|
|
|
o = _optimize(x[1])
|
|
|
return o[0], (op, o[1])
|
|
|
elif op in ('dagrange', 'range'):
|
|
|
wa, ta = _optimize(x[1])
|
|
|
wb, tb = _optimize(x[2])
|
|
|
return wa + wb, (op, ta, tb)
|
|
|
elif op in ('parent', 'ancestor', 'relation', 'subscript'):
|
|
|
w, t = _optimize(x[1])
|
|
|
return w, (op, t, x[2])
|
|
|
elif op == 'relsubscript':
|
|
|
w, t = _optimize(x[1])
|
|
|
return w, (op, t, x[2], x[3])
|
|
|
elif op == 'list':
|
|
|
ws, ts = zip(*(_optimize(y) for y in x[1:]))
|
|
|
return sum(ws), (op,) + ts
|
|
|
elif op == 'keyvalue':
|
|
|
w, t = _optimize(x[2])
|
|
|
return w, (op, x[1], t)
|
|
|
elif op == 'func':
|
|
|
f = getsymbol(x[1])
|
|
|
wa, ta = _optimize(x[2])
|
|
|
w = getattr(symbols.get(f), '_weight', 1)
|
|
|
m = _match('commonancestors(_)', ta)
|
|
|
|
|
|
# Optimize heads(commonancestors(_)) because we have a fast version
|
|
|
if f == 'heads' and m:
|
|
|
return w + wa, _build('_commonancestorheads(_)', m[1])
|
|
|
|
|
|
return w + wa, (op, x[1], ta)
|
|
|
raise ValueError('invalid operator %r' % op)
|
|
|
|
|
|
def optimize(tree):
|
|
|
"""Optimize evaluatable tree
|
|
|
|
|
|
All pseudo operations should be transformed beforehand.
|
|
|
"""
|
|
|
_weight, newtree = _optimize(tree)
|
|
|
return newtree
|
|
|
|
|
|
# the set of valid characters for the initial letter of symbols in
|
|
|
# alias declarations and definitions
|
|
|
_aliassyminitletters = _syminitletters | {'$'}
|
|
|
|
|
|
def _parsewith(spec, lookup=None, syminitletters=None):
|
|
|
"""Generate a parse tree of given spec with given tokenizing options
|
|
|
|
|
|
>>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)
|
|
|
('func', ('symbol', 'foo'), ('symbol', '$1'))
|
|
|
>>> _parsewith(b'$1')
|
|
|
Traceback (most recent call last):
|
|
|
...
|
|
|
ParseError: ("syntax error in revset '$1'", 0)
|
|
|
>>> _parsewith(b'foo bar')
|
|
|
Traceback (most recent call last):
|
|
|
...
|
|
|
ParseError: ('invalid token', 4)
|
|
|
"""
|
|
|
if lookup and spec.startswith('revset(') and spec.endswith(')'):
|
|
|
lookup = None
|
|
|
p = parser.parser(elements)
|
|
|
tree, pos = p.parse(tokenize(spec, lookup=lookup,
|
|
|
syminitletters=syminitletters))
|
|
|
if pos != len(spec):
|
|
|
raise error.ParseError(_('invalid token'), pos)
|
|
|
return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
|
|
|
|
|
|
class _aliasrules(parser.basealiasrules):
|
|
|
"""Parsing and expansion rule set of revset aliases"""
|
|
|
_section = _('revset alias')
|
|
|
|
|
|
@staticmethod
|
|
|
def _parse(spec):
|
|
|
"""Parse alias declaration/definition ``spec``
|
|
|
|
|
|
This allows symbol names to use also ``$`` as an initial letter
|
|
|
(for backward compatibility), and callers of this function should
|
|
|
examine whether ``$`` is used also for unexpected symbols or not.
|
|
|
"""
|
|
|
return _parsewith(spec, syminitletters=_aliassyminitletters)
|
|
|
|
|
|
@staticmethod
|
|
|
def _trygetfunc(tree):
|
|
|
if tree[0] == 'func' and tree[1][0] == 'symbol':
|
|
|
return tree[1][1], getlist(tree[2])
|
|
|
|
|
|
def expandaliases(tree, aliases, warn=None):
|
|
|
"""Expand aliases in a tree, aliases is a list of (name, value) tuples"""
|
|
|
aliases = _aliasrules.buildmap(aliases)
|
|
|
tree = _aliasrules.expand(aliases, tree)
|
|
|
# warn about problematic (but not referred) aliases
|
|
|
if warn is not None:
|
|
|
for name, alias in sorted(aliases.iteritems()):
|
|
|
if alias.error and not alias.warned:
|
|
|
warn(_('warning: %s\n') % (alias.error))
|
|
|
alias.warned = True
|
|
|
return tree
|
|
|
|
|
|
def foldconcat(tree):
|
|
|
"""Fold elements to be concatenated by `##`
|
|
|
"""
|
|
|
if (not isinstance(tree, tuple)
|
|
|
or tree[0] in ('string', 'symbol', 'smartset')):
|
|
|
return tree
|
|
|
if tree[0] == '_concat':
|
|
|
pending = [tree]
|
|
|
l = []
|
|
|
while pending:
|
|
|
e = pending.pop()
|
|
|
if e[0] == '_concat':
|
|
|
pending.extend(reversed(e[1:]))
|
|
|
elif e[0] in ('string', 'symbol'):
|
|
|
l.append(e[1])
|
|
|
else:
|
|
|
msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
|
|
|
raise error.ParseError(msg)
|
|
|
return ('string', ''.join(l))
|
|
|
else:
|
|
|
return tuple(foldconcat(t) for t in tree)
|
|
|
|
|
|
def parse(spec, lookup=None):
|
|
|
try:
|
|
|
return _parsewith(spec, lookup=lookup)
|
|
|
except error.ParseError as inst:
|
|
|
if len(inst.args) > 1: # has location
|
|
|
loc = inst.args[1]
|
|
|
# Remove newlines -- spaces are equivalent whitespace.
|
|
|
spec = spec.replace('\n', ' ')
|
|
|
# We want the caret to point to the place in the template that
|
|
|
# failed to parse, but in a hint we get a open paren at the
|
|
|
# start. Therefore, we print "loc + 1" spaces (instead of "loc")
|
|
|
# to line up the caret with the location of the error.
|
|
|
inst.hint = spec + '\n' + ' ' * (loc + 1) + '^ ' + _('here')
|
|
|
raise
|
|
|
|
|
|
def _quote(s):
|
|
|
r"""Quote a value in order to make it safe for the revset engine.
|
|
|
|
|
|
>>> _quote(b'asdf')
|
|
|
"'asdf'"
|
|
|
>>> _quote(b"asdf'\"")
|
|
|
'\'asdf\\\'"\''
|
|
|
>>> _quote(b'asdf\'')
|
|
|
"'asdf\\''"
|
|
|
>>> _quote(1)
|
|
|
"'1'"
|
|
|
"""
|
|
|
return "'%s'" % stringutil.escapestr(pycompat.bytestr(s))
|
|
|
|
|
|
def _formatargtype(c, arg):
|
|
|
if c == 'd':
|
|
|
return '_rev(%d)' % int(arg)
|
|
|
elif c == 's':
|
|
|
return _quote(arg)
|
|
|
elif c == 'r':
|
|
|
if not isinstance(arg, bytes):
|
|
|
raise TypeError
|
|
|
parse(arg) # make sure syntax errors are confined
|
|
|
return '(%s)' % arg
|
|
|
elif c == 'n':
|
|
|
return _quote(node.hex(arg))
|
|
|
elif c == 'b':
|
|
|
try:
|
|
|
return _quote(arg.branch())
|
|
|
except AttributeError:
|
|
|
raise TypeError
|
|
|
raise error.ParseError(_('unexpected revspec format character %s') % c)
|
|
|
|
|
|
def _formatlistexp(s, t):
|
|
|
l = len(s)
|
|
|
if l == 0:
|
|
|
return "_list('')"
|
|
|
elif l == 1:
|
|
|
return _formatargtype(t, s[0])
|
|
|
elif t == 'd':
|
|
|
return _formatintlist(s)
|
|
|
elif t == 's':
|
|
|
return "_list(%s)" % _quote("\0".join(s))
|
|
|
elif t == 'n':
|
|
|
return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
|
|
|
elif t == 'b':
|
|
|
try:
|
|
|
return "_list('%s')" % "\0".join(a.branch() for a in s)
|
|
|
except AttributeError:
|
|
|
raise TypeError
|
|
|
|
|
|
m = l // 2
|
|
|
return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t))
|
|
|
|
|
|
def _formatintlist(data):
|
|
|
try:
|
|
|
l = len(data)
|
|
|
if l == 0:
|
|
|
return "_list('')"
|
|
|
elif l == 1:
|
|
|
return _formatargtype('d', data[0])
|
|
|
return "_intlist('%s')" % "\0".join('%d' % int(a) for a in data)
|
|
|
except (TypeError, ValueError):
|
|
|
raise error.ParseError(_('invalid argument for revspec'))
|
|
|
|
|
|
def _formatparamexp(args, t):
|
|
|
return ', '.join(_formatargtype(t, a) for a in args)
|
|
|
|
|
|
_formatlistfuncs = {
|
|
|
'l': _formatlistexp,
|
|
|
'p': _formatparamexp,
|
|
|
}
|
|
|
|
|
|
def formatspec(expr, *args):
|
|
|
'''
|
|
|
This is a convenience function for using revsets internally, and
|
|
|
escapes arguments appropriately. Aliases are intentionally ignored
|
|
|
so that intended expression behavior isn't accidentally subverted.
|
|
|
|
|
|
Supported arguments:
|
|
|
|
|
|
%r = revset expression, parenthesized
|
|
|
%d = rev(int(arg)), no quoting
|
|
|
%s = string(arg), escaped and single-quoted
|
|
|
%b = arg.branch(), escaped and single-quoted
|
|
|
%n = hex(arg), single-quoted
|
|
|
%% = a literal '%'
|
|
|
|
|
|
Prefixing the type with 'l' specifies a parenthesized list of that type,
|
|
|
and 'p' specifies a list of function parameters of that type.
|
|
|
|
|
|
>>> formatspec(b'%r:: and %lr', b'10 or 11', (b"this()", b"that()"))
|
|
|
'(10 or 11):: and ((this()) or (that()))'
|
|
|
>>> formatspec(b'%d:: and not %d::', 10, 20)
|
|
|
'_rev(10):: and not _rev(20)::'
|
|
|
>>> formatspec(b'%ld or %ld', [], [1])
|
|
|
"_list('') or _rev(1)"
|
|
|
>>> formatspec(b'keyword(%s)', b'foo\\xe9')
|
|
|
"keyword('foo\\\\xe9')"
|
|
|
>>> b = lambda: b'default'
|
|
|
>>> b.branch = b
|
|
|
>>> formatspec(b'branch(%b)', b)
|
|
|
"branch('default')"
|
|
|
>>> formatspec(b'root(%ls)', [b'a', b'b', b'c', b'd'])
|
|
|
"root(_list('a\\\\x00b\\\\x00c\\\\x00d'))"
|
|
|
>>> formatspec(b'sort(%r, %ps)', b':', [b'desc', b'user'])
|
|
|
"sort((:), 'desc', 'user')"
|
|
|
>>> formatspec(b'%ls', [b'a', b"'"])
|
|
|
"_list('a\\\\x00\\\\'')"
|
|
|
'''
|
|
|
parsed = _parseargs(expr, args)
|
|
|
ret = []
|
|
|
for t, arg in parsed:
|
|
|
if t is None:
|
|
|
ret.append(arg)
|
|
|
elif t == 'baseset':
|
|
|
if isinstance(arg, set):
|
|
|
arg = sorted(arg)
|
|
|
ret.append(_formatintlist(list(arg)))
|
|
|
else:
|
|
|
raise error.ProgrammingError("unknown revspec item type: %r" % t)
|
|
|
return b''.join(ret)
|
|
|
|
|
|
def spectree(expr, *args):
|
|
|
"""similar to formatspec but return a parsed and optimized tree"""
|
|
|
parsed = _parseargs(expr, args)
|
|
|
ret = []
|
|
|
inputs = []
|
|
|
for t, arg in parsed:
|
|
|
if t is None:
|
|
|
ret.append(arg)
|
|
|
elif t == 'baseset':
|
|
|
newtree = ('smartset', smartset.baseset(arg))
|
|
|
inputs.append(newtree)
|
|
|
ret.append("$")
|
|
|
else:
|
|
|
raise error.ProgrammingError("unknown revspec item type: %r" % t)
|
|
|
expr = b''.join(ret)
|
|
|
tree = _parsewith(expr, syminitletters=_aliassyminitletters)
|
|
|
tree = parser.buildtree(tree, ('symbol', '$'), *inputs)
|
|
|
tree = foldconcat(tree)
|
|
|
tree = analyze(tree)
|
|
|
tree = optimize(tree)
|
|
|
return tree
|
|
|
|
|
|
def _parseargs(expr, args):
|
|
|
"""parse the expression and replace all inexpensive args
|
|
|
|
|
|
return a list of tuple [(arg-type, arg-value)]
|
|
|
|
|
|
Arg-type can be:
|
|
|
* None: a string ready to be concatenated into a final spec
|
|
|
* 'baseset': an iterable of revisions
|
|
|
"""
|
|
|
expr = pycompat.bytestr(expr)
|
|
|
argiter = iter(args)
|
|
|
ret = []
|
|
|
pos = 0
|
|
|
while pos < len(expr):
|
|
|
q = expr.find('%', pos)
|
|
|
if q < 0:
|
|
|
ret.append((None, expr[pos:]))
|
|
|
break
|
|
|
ret.append((None, expr[pos:q]))
|
|
|
pos = q + 1
|
|
|
try:
|
|
|
d = expr[pos]
|
|
|
except IndexError:
|
|
|
raise error.ParseError(_('incomplete revspec format character'))
|
|
|
if d == '%':
|
|
|
ret.append((None, d))
|
|
|
pos += 1
|
|
|
continue
|
|
|
|
|
|
try:
|
|
|
arg = next(argiter)
|
|
|
except StopIteration:
|
|
|
raise error.ParseError(_('missing argument for revspec'))
|
|
|
f = _formatlistfuncs.get(d)
|
|
|
if f:
|
|
|
# a list of some type, might be expensive, do not replace
|
|
|
pos += 1
|
|
|
islist = (d == 'l')
|
|
|
try:
|
|
|
d = expr[pos]
|
|
|
except IndexError:
|
|
|
raise error.ParseError(_('incomplete revspec format character'))
|
|
|
if islist and d == 'd' and arg:
|
|
|
# we don't create a baseset yet, because it come with an
|
|
|
# extra cost. If we are going to serialize it we better
|
|
|
# skip it.
|
|
|
ret.append(('baseset', arg))
|
|
|
pos += 1
|
|
|
continue
|
|
|
try:
|
|
|
ret.append((None, f(list(arg), d)))
|
|
|
except (TypeError, ValueError):
|
|
|
raise error.ParseError(_('invalid argument for revspec'))
|
|
|
else:
|
|
|
# a single entry, not expensive, replace
|
|
|
try:
|
|
|
ret.append((None, _formatargtype(d, arg)))
|
|
|
except (TypeError, ValueError):
|
|
|
raise error.ParseError(_('invalid argument for revspec'))
|
|
|
pos += 1
|
|
|
|
|
|
try:
|
|
|
next(argiter)
|
|
|
raise error.ParseError(_('too many revspec arguments specified'))
|
|
|
except StopIteration:
|
|
|
pass
|
|
|
return ret
|
|
|
|
|
|
def prettyformat(tree):
|
|
|
return parser.prettyformat(tree, ('string', 'symbol'))
|
|
|
|
|
|
def depth(tree):
|
|
|
if isinstance(tree, tuple):
|
|
|
return max(map(depth, tree)) + 1
|
|
|
else:
|
|
|
return 0
|
|
|
|
|
|
def funcsused(tree):
|
|
|
if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
|
|
|
return set()
|
|
|
else:
|
|
|
funcs = set()
|
|
|
for s in tree[1:]:
|
|
|
funcs |= funcsused(s)
|
|
|
if tree[0] == 'func':
|
|
|
funcs.add(tree[1][1])
|
|
|
return funcs
|
|
|
|
|
|
_hashre = util.re.compile('[0-9a-fA-F]{1,40}$')
|
|
|
|
|
|
def _ishashlikesymbol(symbol):
|
|
|
"""returns true if the symbol looks like a hash"""
|
|
|
return _hashre.match(symbol)
|
|
|
|
|
|
def gethashlikesymbols(tree):
|
|
|
"""returns the list of symbols of the tree that look like hashes
|
|
|
|
|
|
>>> gethashlikesymbols(parse(b'3::abe3ff'))
|
|
|
['3', 'abe3ff']
|
|
|
>>> gethashlikesymbols(parse(b'precursors(.)'))
|
|
|
[]
|
|
|
>>> gethashlikesymbols(parse(b'precursors(34)'))
|
|
|
['34']
|
|
|
>>> gethashlikesymbols(parse(b'abe3ffZ'))
|
|
|
[]
|
|
|
"""
|
|
|
if not tree:
|
|
|
return []
|
|
|
|
|
|
if tree[0] == "symbol":
|
|
|
if _ishashlikesymbol(tree[1]):
|
|
|
return [tree[1]]
|
|
|
elif len(tree) >= 3:
|
|
|
results = []
|
|
|
for subtree in tree[1:]:
|
|
|
results += gethashlikesymbols(subtree)
|
|
|
return results
|
|
|
return []
|
|
|
|