filesetlang.py
351 lines
| 10.3 KiB
| text/x-python
|
PythonLexer
/ mercurial / filesetlang.py
Yuya Nishihara
|
r38841 | # filesetlang.py - parser, tokenizer and utility for file set language | ||
# | ||||
Raphaël Gomès
|
r47575 | # Copyright 2010 Olivia Mackall <olivia@selenic.com> | ||
Yuya Nishihara
|
r38841 | # | ||
# This software may be used and distributed according to the terms of the | ||||
# GNU General Public License version 2 or any later version. | ||||
Matt Harbison
|
r52755 | from __future__ import annotations | ||
Yuya Nishihara
|
r38841 | |||
from .i18n import _ | ||||
from . import ( | ||||
error, | ||||
parser, | ||||
pycompat, | ||||
) | ||||
Yuya Nishihara
|
r38899 | # common weight constants for static optimization | ||
# (see registrar.filesetpredicate for details) | ||||
WEIGHT_CHECK_FILENAME = 0.5 | ||||
WEIGHT_READ_CONTENTS = 30 | ||||
WEIGHT_STATUS = 10 | ||||
WEIGHT_STATUS_THOROUGH = 50 | ||||
Yuya Nishihara
|
r38841 | elements = { | ||
# token-type: binding-strength, primary, prefix, infix, suffix | ||||
Augie Fackler
|
r43347 | b"(": (20, None, (b"group", 1, b")"), (b"func", 1, b")"), None), | ||
b":": (15, None, None, (b"kindpat", 15), None), | ||||
b"-": (5, None, (b"negate", 19), (b"minus", 5), None), | ||||
b"not": (10, None, (b"not", 10), None, None), | ||||
b"!": (10, None, (b"not", 10), None, None), | ||||
b"and": (5, None, None, (b"and", 5), None), | ||||
b"&": (5, None, None, (b"and", 5), None), | ||||
b"or": (4, None, None, (b"or", 4), None), | ||||
b"|": (4, None, None, (b"or", 4), None), | ||||
b"+": (4, None, None, (b"or", 4), None), | ||||
b",": (2, None, None, (b"list", 2), None), | ||||
b")": (0, None, None, None, None), | ||||
b"symbol": (0, b"symbol", None, None, None), | ||||
b"string": (0, b"string", None, None, None), | ||||
b"end": (0, None, None, None, None), | ||||
Yuya Nishihara
|
r38841 | } | ||
Augie Fackler
|
r43347 | keywords = {b'and', b'or', b'not'} | ||
Yuya Nishihara
|
r38841 | |||
symbols = {} | ||||
Augie Fackler
|
r43347 | globchars = b".*{}[]?/\\_" | ||
Yuya Nishihara
|
r38841 | |||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r38841 | def tokenize(program): | ||
pos, l = 0, len(program) | ||||
program = pycompat.bytestr(program) | ||||
while pos < l: | ||||
c = program[pos] | ||||
Augie Fackler
|
r43346 | if c.isspace(): # skip inter-token whitespace | ||
Yuya Nishihara
|
r38841 | pass | ||
Augie Fackler
|
r43347 | elif c in b"(),-:|&+!": # handle simple operators | ||
Yuya Nishihara
|
r38841 | yield (c, None, pos) | ||
Augie Fackler
|
r43346 | elif ( | ||
Augie Fackler
|
r43347 | c in b'"\'' | ||
or c == b'r' | ||||
and program[pos : pos + 2] in (b"r'", b'r"') | ||||
Augie Fackler
|
r43346 | ): # handle quoted strings | ||
Augie Fackler
|
r43347 | if c == b'r': | ||
Yuya Nishihara
|
r38841 | pos += 1 | ||
c = program[pos] | ||||
decode = lambda x: x | ||||
else: | ||||
decode = parser.unescapestr | ||||
pos += 1 | ||||
s = pos | ||||
Augie Fackler
|
r43346 | while pos < l: # find closing quote | ||
Yuya Nishihara
|
r38841 | d = program[pos] | ||
Augie Fackler
|
r43347 | if d == b'\\': # skip over escaped characters | ||
Yuya Nishihara
|
r38841 | pos += 2 | ||
continue | ||||
if d == c: | ||||
Augie Fackler
|
r43347 | yield (b'string', decode(program[s:pos]), s) | ||
Yuya Nishihara
|
r38841 | break | ||
pos += 1 | ||||
else: | ||||
Augie Fackler
|
r43347 | raise error.ParseError(_(b"unterminated string"), s) | ||
Yuya Nishihara
|
r38841 | elif c.isalnum() or c in globchars or ord(c) > 127: | ||
# gather up a symbol/keyword | ||||
s = pos | ||||
pos += 1 | ||||
Augie Fackler
|
r43346 | while pos < l: # find end of symbol | ||
Yuya Nishihara
|
r38841 | d = program[pos] | ||
if not (d.isalnum() or d in globchars or ord(d) > 127): | ||||
break | ||||
pos += 1 | ||||
sym = program[s:pos] | ||||
Augie Fackler
|
r43346 | if sym in keywords: # operator keywords | ||
Yuya Nishihara
|
r38841 | yield (sym, None, s) | ||
else: | ||||
Augie Fackler
|
r43347 | yield (b'symbol', sym, s) | ||
Yuya Nishihara
|
r38841 | pos -= 1 | ||
else: | ||||
Augie Fackler
|
r43347 | raise error.ParseError(_(b"syntax error"), pos) | ||
Yuya Nishihara
|
r38841 | pos += 1 | ||
Augie Fackler
|
r43347 | yield (b'end', None, pos) | ||
Yuya Nishihara
|
r38841 | |||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r38841 | def parse(expr): | ||
p = parser.parser(elements) | ||||
tree, pos = p.parse(tokenize(expr)) | ||||
if pos != len(expr): | ||||
Augie Fackler
|
r43347 | raise error.ParseError(_(b"invalid token"), pos) | ||
return parser.simplifyinfixops(tree, {b'list', b'or'}) | ||||
Yuya Nishihara
|
r38841 | |||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r38841 | def getsymbol(x): | ||
Augie Fackler
|
r43347 | if x and x[0] == b'symbol': | ||
Yuya Nishihara
|
r38841 | return x[1] | ||
Augie Fackler
|
r43347 | raise error.ParseError(_(b'not a symbol')) | ||
Yuya Nishihara
|
r38841 | |||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r38841 | def getstring(x, err): | ||
Augie Fackler
|
r43347 | if x and (x[0] == b'string' or x[0] == b'symbol'): | ||
Yuya Nishihara
|
r38841 | return x[1] | ||
raise error.ParseError(err) | ||||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r38841 | def getkindpat(x, y, allkinds, err): | ||
kind = getsymbol(x) | ||||
pat = getstring(y, err) | ||||
if kind not in allkinds: | ||||
Augie Fackler
|
r43347 | raise error.ParseError(_(b"invalid pattern kind: %s") % kind) | ||
return b'%s:%s' % (kind, pat) | ||||
Yuya Nishihara
|
r38841 | |||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r38841 | def getpattern(x, allkinds, err): | ||
Augie Fackler
|
r43347 | if x and x[0] == b'kindpat': | ||
Yuya Nishihara
|
r38841 | return getkindpat(x[1], x[2], allkinds, err) | ||
return getstring(x, err) | ||||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r38841 | def getlist(x): | ||
if not x: | ||||
return [] | ||||
Augie Fackler
|
r43347 | if x[0] == b'list': | ||
Yuya Nishihara
|
r38841 | return list(x[1:]) | ||
return [x] | ||||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r38841 | def getargs(x, min, max, err): | ||
l = getlist(x) | ||||
if len(l) < min or len(l) > max: | ||||
raise error.ParseError(err) | ||||
return l | ||||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r38862 | def _analyze(x): | ||
if x is None: | ||||
return x | ||||
op = x[0] | ||||
Augie Fackler
|
r43347 | if op in {b'string', b'symbol'}: | ||
Yuya Nishihara
|
r38862 | return x | ||
Augie Fackler
|
r43347 | if op == b'kindpat': | ||
Yuya Nishihara
|
r38862 | getsymbol(x[1]) # kind must be a symbol | ||
t = _analyze(x[2]) | ||||
return (op, x[1], t) | ||||
Augie Fackler
|
r43347 | if op == b'group': | ||
Yuya Nishihara
|
r38863 | return _analyze(x[1]) | ||
Augie Fackler
|
r43347 | if op == b'negate': | ||
raise error.ParseError(_(b"can't use negate operator in this context")) | ||||
if op == b'not': | ||||
Yuya Nishihara
|
r38862 | t = _analyze(x[1]) | ||
return (op, t) | ||||
Augie Fackler
|
r43347 | if op == b'and': | ||
Yuya Nishihara
|
r38862 | ta = _analyze(x[1]) | ||
tb = _analyze(x[2]) | ||||
return (op, ta, tb) | ||||
Augie Fackler
|
r43347 | if op == b'minus': | ||
return _analyze((b'and', x[1], (b'not', x[2]))) | ||||
if op in {b'list', b'or'}: | ||||
Yuya Nishihara
|
r38862 | ts = tuple(_analyze(y) for y in x[1:]) | ||
return (op,) + ts | ||||
Augie Fackler
|
r43347 | if op == b'func': | ||
Yuya Nishihara
|
r38862 | getsymbol(x[1]) # function name must be a symbol | ||
ta = _analyze(x[2]) | ||||
return (op, x[1], ta) | ||||
Augie Fackler
|
r43347 | raise error.ProgrammingError(b'invalid operator %r' % op) | ||
Yuya Nishihara
|
r38862 | |||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r38915 | def _insertstatushints(x): | ||
"""Insert hint nodes where status should be calculated (first path) | ||||
This works in bottom-up way, summing up status names and inserting hint | ||||
nodes at 'and' and 'or' as needed. Thus redundant hint nodes may be left. | ||||
Returns (status-names, new-tree) at the given subtree, where status-names | ||||
is a sum of status names referenced in the given subtree. | ||||
""" | ||||
if x is None: | ||||
return (), x | ||||
op = x[0] | ||||
Augie Fackler
|
r43347 | if op in {b'string', b'symbol', b'kindpat'}: | ||
Yuya Nishihara
|
r38915 | return (), x | ||
Augie Fackler
|
r43347 | if op == b'not': | ||
Yuya Nishihara
|
r38915 | h, t = _insertstatushints(x[1]) | ||
return h, (op, t) | ||||
Augie Fackler
|
r43347 | if op == b'and': | ||
Yuya Nishihara
|
r38915 | ha, ta = _insertstatushints(x[1]) | ||
hb, tb = _insertstatushints(x[2]) | ||||
hr = ha + hb | ||||
if ha and hb: | ||||
Augie Fackler
|
r43347 | return hr, (b'withstatus', (op, ta, tb), (b'string', b' '.join(hr))) | ||
Yuya Nishihara
|
r38915 | return hr, (op, ta, tb) | ||
Augie Fackler
|
r43347 | if op == b'or': | ||
Yuya Nishihara
|
r38915 | hs, ts = zip(*(_insertstatushints(y) for y in x[1:])) | ||
hr = sum(hs, ()) | ||||
if sum(bool(h) for h in hs) > 1: | ||||
Augie Fackler
|
r43347 | return hr, (b'withstatus', (op,) + ts, (b'string', b' '.join(hr))) | ||
Yuya Nishihara
|
r38915 | return hr, (op,) + ts | ||
Augie Fackler
|
r43347 | if op == b'list': | ||
Yuya Nishihara
|
r38915 | hs, ts = zip(*(_insertstatushints(y) for y in x[1:])) | ||
return sum(hs, ()), (op,) + ts | ||||
Augie Fackler
|
r43347 | if op == b'func': | ||
Yuya Nishihara
|
r38915 | f = getsymbol(x[1]) | ||
# don't propagate 'ha' crossing a function boundary | ||||
ha, ta = _insertstatushints(x[2]) | ||||
if getattr(symbols.get(f), '_callstatus', False): | ||||
Augie Fackler
|
r43347 | return (f,), (b'withstatus', (op, x[1], ta), (b'string', f)) | ||
Yuya Nishihara
|
r38915 | return (), (op, x[1], ta) | ||
Augie Fackler
|
r43347 | raise error.ProgrammingError(b'invalid operator %r' % op) | ||
Yuya Nishihara
|
r38915 | |||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r38915 | def _mergestatushints(x, instatus): | ||
"""Remove redundant status hint nodes (second path) | ||||
This is the top-down path to eliminate inner hint nodes. | ||||
""" | ||||
if x is None: | ||||
return x | ||||
op = x[0] | ||||
Augie Fackler
|
r43347 | if op == b'withstatus': | ||
Yuya Nishihara
|
r38915 | if instatus: | ||
# drop redundant hint node | ||||
return _mergestatushints(x[1], instatus) | ||||
t = _mergestatushints(x[1], instatus=True) | ||||
return (op, t, x[2]) | ||||
Augie Fackler
|
r43347 | if op in {b'string', b'symbol', b'kindpat'}: | ||
Yuya Nishihara
|
r38915 | return x | ||
Augie Fackler
|
r43347 | if op == b'not': | ||
Yuya Nishihara
|
r38915 | t = _mergestatushints(x[1], instatus) | ||
return (op, t) | ||||
Augie Fackler
|
r43347 | if op == b'and': | ||
Yuya Nishihara
|
r38915 | ta = _mergestatushints(x[1], instatus) | ||
tb = _mergestatushints(x[2], instatus) | ||||
return (op, ta, tb) | ||||
Augie Fackler
|
r43347 | if op in {b'list', b'or'}: | ||
Yuya Nishihara
|
r38915 | ts = tuple(_mergestatushints(y, instatus) for y in x[1:]) | ||
return (op,) + ts | ||||
Augie Fackler
|
r43347 | if op == b'func': | ||
Yuya Nishihara
|
r38915 | # don't propagate 'instatus' crossing a function boundary | ||
ta = _mergestatushints(x[2], instatus=False) | ||||
return (op, x[1], ta) | ||||
Augie Fackler
|
r43347 | raise error.ProgrammingError(b'invalid operator %r' % op) | ||
Yuya Nishihara
|
r38915 | |||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r38862 | def analyze(x): | ||
"""Transform raw parsed tree to evaluatable tree which can be fed to | ||||
Yuya Nishihara
|
r38865 | optimize() or getmatch() | ||
Yuya Nishihara
|
r38862 | |||
All pseudo operations should be mapped to real operations or functions | ||||
defined in methods or symbols table respectively. | ||||
""" | ||||
Yuya Nishihara
|
r38915 | t = _analyze(x) | ||
_h, t = _insertstatushints(t) | ||||
return _mergestatushints(t, instatus=False) | ||||
Yuya Nishihara
|
r38862 | |||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r38868 | def _optimizeandops(op, ta, tb): | ||
Augie Fackler
|
r43347 | if tb is not None and tb[0] == b'not': | ||
return (b'minus', ta, tb[1]) | ||||
Yuya Nishihara
|
r38868 | return (op, ta, tb) | ||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r38901 | def _optimizeunion(xs): | ||
# collect string patterns so they can be compiled into a single regexp | ||||
ws, ts, ss = [], [], [] | ||||
for x in xs: | ||||
w, t = _optimize(x) | ||||
Augie Fackler
|
r43347 | if t is not None and t[0] in {b'string', b'symbol', b'kindpat'}: | ||
Yuya Nishihara
|
r38901 | ss.append(t) | ||
continue | ||||
ws.append(w) | ||||
ts.append(t) | ||||
if ss: | ||||
ws.append(WEIGHT_CHECK_FILENAME) | ||||
Augie Fackler
|
r43347 | ts.append((b'patterns',) + tuple(ss)) | ||
Yuya Nishihara
|
r38901 | return ws, ts | ||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r38865 | def _optimize(x): | ||
if x is None: | ||||
return 0, x | ||||
op = x[0] | ||||
Augie Fackler
|
r43347 | if op == b'withstatus': | ||
Yuya Nishihara
|
r38915 | w, t = _optimize(x[1]) | ||
return w, (op, t, x[2]) | ||||
Augie Fackler
|
r43347 | if op in {b'string', b'symbol'}: | ||
Yuya Nishihara
|
r38899 | return WEIGHT_CHECK_FILENAME, x | ||
Augie Fackler
|
r43347 | if op == b'kindpat': | ||
Yuya Nishihara
|
r38865 | w, t = _optimize(x[2]) | ||
return w, (op, x[1], t) | ||||
Augie Fackler
|
r43347 | if op == b'not': | ||
Yuya Nishihara
|
r38865 | w, t = _optimize(x[1]) | ||
return w, (op, t) | ||||
Augie Fackler
|
r43347 | if op == b'and': | ||
Yuya Nishihara
|
r38867 | wa, ta = _optimize(x[1]) | ||
wb, tb = _optimize(x[2]) | ||||
if wa <= wb: | ||||
Yuya Nishihara
|
r38868 | return wa, _optimizeandops(op, ta, tb) | ||
Yuya Nishihara
|
r38867 | else: | ||
Yuya Nishihara
|
r38868 | return wb, _optimizeandops(op, tb, ta) | ||
Augie Fackler
|
r43347 | if op == b'or': | ||
Yuya Nishihara
|
r38901 | ws, ts = _optimizeunion(x[1:]) | ||
if len(ts) == 1: | ||||
Augie Fackler
|
r43346 | return ws[0], ts[0] # 'or' operation is fully optimized out | ||
ts = tuple( | ||||
it[1] for it in sorted(enumerate(ts), key=lambda it: ws[it[0]]) | ||||
) | ||||
Yuya Nishihara
|
r38865 | return max(ws), (op,) + ts | ||
Augie Fackler
|
r43347 | if op == b'list': | ||
Yuya Nishihara
|
r38865 | ws, ts = zip(*(_optimize(y) for y in x[1:])) | ||
return sum(ws), (op,) + ts | ||||
Augie Fackler
|
r43347 | if op == b'func': | ||
Yuya Nishihara
|
r38865 | f = getsymbol(x[1]) | ||
w = getattr(symbols.get(f), '_weight', 1) | ||||
wa, ta = _optimize(x[2]) | ||||
return w + wa, (op, x[1], ta) | ||||
Augie Fackler
|
r43347 | raise error.ProgrammingError(b'invalid operator %r' % op) | ||
Yuya Nishihara
|
r38865 | |||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r38865 | def optimize(x): | ||
"""Reorder/rewrite evaluatable tree for optimization | ||||
All pseudo operations should be transformed beforehand. | ||||
""" | ||||
_w, t = _optimize(x) | ||||
return t | ||||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r38841 | def prettyformat(tree): | ||
Augie Fackler
|
r43347 | return parser.prettyformat(tree, (b'string', b'symbol')) | ||