# HG changeset patch # User Yuya Nishihara # Date 2016-02-17 12:38:25 # Node ID b862e6fca7ac2bbf59837ae63a97a2b9715002e8 # Parent 90896b61fe2653c68062139e8cc06894a96f1764 revsetlang: build optimized tree by helper function This should make optimize() more readable, but it doubles the parsing cost. (original) $ python -m timeit -n10000 -s 'from mercurial import revsetlang as L' \ 'L.optimize(L.analyze(L.parse("::tip")))' 10000 loops, best of 3: 18.1 usec per loop (this patch) $ python -m timeit -n10000 -s 'from mercurial import revsetlang as L' \ 'L._treecache.clear(); L.optimize(L.analyze(L.parse("::tip")))' 10000 loops, best of 3: 48.4 usec per loop 30usec isn't dominant compared to the revset evaluation, but that is a cost. That's why a parsed tree is cached, which can benefit in hgweb or chg server. diff --git a/mercurial/revsetlang.py b/mercurial/revsetlang.py --- a/mercurial/revsetlang.py +++ b/mercurial/revsetlang.py @@ -239,6 +239,25 @@ def getargsdict(x, funcname, keys): return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys), keyvaluenode='keyvalue', keynode='symbol') +# cache of {spec: raw parsed tree} built internally +_treecache = {} + +def _cachedtree(spec): + # thread safe because parse() is reentrant and dict.__setitem__() is atomic + tree = _treecache.get(spec) + if tree is None: + _treecache[spec] = tree = parse(spec) + return tree + +def _build(tmplspec, *repls): + """Create raw parsed tree from a template revset statement + + >>> _build('f(_) and _', ('string', '1'), ('symbol', '2')) + ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2')) + """ + template = _cachedtree(tmplspec) + return parser.buildtree(template, ('symbol', '_'), *repls) + def _isnamedfunc(x, funcname): """Check if given tree matches named function""" return x and x[0] == 'func' and getsymbol(x[1]) == funcname @@ -302,16 +321,15 @@ def _analyze(x): op = x[0] if op == 'minus': - return _analyze(('and', x[1], ('not', x[2]))) + return _analyze(_build('_ and not _', *x[1:])) elif op == 'only': - t = ('func', ('symbol', 'only'), ('list', x[1], x[2])) - return _analyze(t) + return _analyze(_build('only(_, _)', *x[1:])) elif op == 'onlypost': - return _analyze(('func', ('symbol', 'only'), x[1])) + return _analyze(_build('only(_)', x[1])) elif op == 'dagrangepre': - return _analyze(('func', ('symbol', 'ancestors'), x[1])) + return _analyze(_build('ancestors(_)', x[1])) elif op == 'dagrangepost': - return _analyze(('func', ('symbol', 'descendants'), x[1])) + return _analyze(_build('descendants(_)', x[1])) elif op == 'negate': s = getstring(x[1], _("can't negate that")) return _analyze(('string', '-' + s)) @@ -367,9 +385,9 @@ def _optimize(x, small): w = min(wa, wb) # (::x and not ::y)/(not ::y and ::x) have a fast path - tm = _matchonly(ta, tb) or _matchonly(tb, ta) - if tm: - return w, ('func', ('symbol', 'only'), tm) + m = _matchonly(ta, tb) or _matchonly(tb, ta) + if m: + return w, _build('only(_, _)', *m[1:]) if tb is not None and tb[0] == 'not': return wa, ('difference', ta, tb[1]) @@ -387,7 +405,7 @@ def _optimize(x, small): w, t = ss[0] else: s = '\0'.join(t[1] for w, t in ss) - y = ('func', ('symbol', '_list'), ('string', s)) + y = _build('_list(_)', ('string', s)) w, t = _optimize(y, False) ws.append(w) ts.append(t) @@ -407,8 +425,7 @@ def _optimize(x, small): elif op == 'not': # Optimize not public() to _notpublic() because we have a fast version if x[1][:3] == ('func', ('symbol', 'public'), None): - newsym = ('func', ('symbol', '_notpublic'), None) - o = _optimize(newsym, not small) + o = _optimize(_build('_notpublic()'), not small) return o[0], o[1] else: o = _optimize(x[1], not small)