parser.py
216 lines
| 7.6 KiB
| text/x-python
|
PythonLexer
/ mercurial / parser.py
Matt Mackall
|
r11274 | # parser.py - simple top-down operator precedence parser for mercurial | ||
# | ||||
# Copyright 2010 Matt Mackall <mpm@selenic.com> | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
# GNU General Public License version 2 or any later version. | ||||
Julian Cowley
|
r11449 | # see http://effbot.org/zone/simple-top-down-parsing.htm and | ||
Matt Mackall
|
r11274 | # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/ | ||
# for background | ||||
# takes a tokenizer and elements | ||||
Yuya Nishihara
|
r25655 | # tokenizer is an iterator that returns (type, value, pos) tuples | ||
# elements is a mapping of types to binding strength, prefix, infix and | ||||
# optional suffix actions | ||||
Matt Mackall
|
r11274 | # an action is a tree node name, a tree label, and an optional match | ||
timeless@mozdev.org
|
r17500 | # __call__(program) parses program into a labeled tree | ||
Matt Mackall
|
r11274 | |||
Matt Mackall
|
r11289 | import error | ||
Mads Kiilerich
|
r14701 | from i18n import _ | ||
Matt Mackall
|
r11289 | |||
Matt Mackall
|
r11274 | class parser(object): | ||
Yuya Nishihara
|
r25654 | def __init__(self, elements, methods=None): | ||
Matt Mackall
|
r11274 | self._elements = elements | ||
self._methods = methods | ||||
Matt Mackall
|
r13176 | self.current = None | ||
Matt Mackall
|
r11274 | def _advance(self): | ||
'advance the tokenizer' | ||||
t = self.current | ||||
Pierre-Yves David
|
r25171 | self.current = next(self._iter, None) | ||
Matt Mackall
|
r11274 | return t | ||
Peter Arrenbrecht
|
r11319 | def _match(self, m, pos): | ||
Matt Mackall
|
r11274 | 'make sure the tokenizer matches an end condition' | ||
if self.current[0] != m: | ||||
Mads Kiilerich
|
r14701 | raise error.ParseError(_("unexpected token: %s") % self.current[0], | ||
Dirkjan Ochtman
|
r11305 | self.current[2]) | ||
Matt Mackall
|
r11274 | self._advance() | ||
def _parse(self, bind=0): | ||||
Matt Mackall
|
r11289 | token, value, pos = self._advance() | ||
Matt Mackall
|
r11274 | # handle prefix rules on current token | ||
prefix = self._elements[token][1] | ||||
if not prefix: | ||||
Mads Kiilerich
|
r14701 | raise error.ParseError(_("not a prefix: %s") % token, pos) | ||
Matt Mackall
|
r11274 | if len(prefix) == 1: | ||
expr = (prefix[0], value) | ||||
else: | ||||
if len(prefix) > 2 and prefix[2] == self.current[0]: | ||||
Peter Arrenbrecht
|
r11319 | self._match(prefix[2], pos) | ||
Matt Mackall
|
r11274 | expr = (prefix[0], None) | ||
else: | ||||
expr = (prefix[0], self._parse(prefix[1])) | ||||
if len(prefix) > 2: | ||||
Peter Arrenbrecht
|
r11319 | self._match(prefix[2], pos) | ||
Matt Mackall
|
r11274 | # gather tokens until we meet a lower binding strength | ||
while bind < self._elements[self.current[0]][0]: | ||||
Matt Mackall
|
r11289 | token, value, pos = self._advance() | ||
Matt Mackall
|
r11278 | e = self._elements[token] | ||
# check for suffix - next token isn't a valid prefix | ||||
if len(e) == 4 and not self._elements[self.current[0]][1]: | ||||
suffix = e[3] | ||||
expr = (suffix[0], expr) | ||||
Matt Mackall
|
r11274 | else: | ||
Matt Mackall
|
r11278 | # handle infix rules | ||
Matt Mackall
|
r11412 | if len(e) < 3 or not e[2]: | ||
Mads Kiilerich
|
r14701 | raise error.ParseError(_("not an infix: %s") % token, pos) | ||
Matt Mackall
|
r11412 | infix = e[2] | ||
Matt Mackall
|
r11278 | if len(infix) == 3 and infix[2] == self.current[0]: | ||
Peter Arrenbrecht
|
r11319 | self._match(infix[2], pos) | ||
Matt Mackall
|
r11278 | expr = (infix[0], expr, (None)) | ||
else: | ||||
expr = (infix[0], expr, self._parse(infix[1])) | ||||
if len(infix) == 3: | ||||
Peter Arrenbrecht
|
r11319 | self._match(infix[2], pos) | ||
Matt Mackall
|
r11274 | return expr | ||
Yuya Nishihara
|
r25654 | def parse(self, tokeniter): | ||
'generate a parse tree from tokens' | ||||
self._iter = tokeniter | ||||
Matt Mackall
|
r13176 | self._advance() | ||
Bernhard Leiner
|
r13665 | res = self._parse() | ||
token, value, pos = self.current | ||||
return res, pos | ||||
Matt Mackall
|
r11274 | def eval(self, tree): | ||
'recursively evaluate a parse tree using node methods' | ||||
if not isinstance(tree, tuple): | ||||
return tree | ||||
return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]]) | ||||
Yuya Nishihara
|
r25654 | def __call__(self, tokeniter): | ||
'parse tokens into a parse tree and evaluate if methods given' | ||||
t = self.parse(tokeniter) | ||||
Matt Mackall
|
r11274 | if self._methods: | ||
return self.eval(t) | ||||
return t | ||||
Yuya Nishihara
|
r25253 | |||
Yuya Nishihara
|
r25705 | def buildargsdict(trees, funcname, keys, keyvaluenode, keynode): | ||
"""Build dict from list containing positional and keyword arguments | ||||
Invalid keywords or too many positional arguments are rejected, but | ||||
missing arguments are just omitted. | ||||
""" | ||||
if len(trees) > len(keys): | ||||
raise error.ParseError(_("%(func)s takes at most %(nargs)d arguments") | ||||
% {'func': funcname, 'nargs': len(keys)}) | ||||
args = {} | ||||
# consume positional arguments | ||||
for k, x in zip(keys, trees): | ||||
if x[0] == keyvaluenode: | ||||
break | ||||
args[k] = x | ||||
# remainder should be keyword arguments | ||||
for x in trees[len(args):]: | ||||
if x[0] != keyvaluenode or x[1][0] != keynode: | ||||
raise error.ParseError(_("%(func)s got an invalid argument") | ||||
% {'func': funcname}) | ||||
k = x[1][1] | ||||
if k not in keys: | ||||
raise error.ParseError(_("%(func)s got an unexpected keyword " | ||||
"argument '%(key)s'") | ||||
% {'func': funcname, 'key': k}) | ||||
if k in args: | ||||
raise error.ParseError(_("%(func)s got multiple values for keyword " | ||||
"argument '%(key)s'") | ||||
% {'func': funcname, 'key': k}) | ||||
args[k] = x[2] | ||||
return args | ||||
Yuya Nishihara
|
r25254 | def _prettyformat(tree, leafnodes, level, lines): | ||
if not isinstance(tree, tuple) or tree[0] in leafnodes: | ||||
lines.append((level, str(tree))) | ||||
else: | ||||
lines.append((level, '(%s' % tree[0])) | ||||
for s in tree[1:]: | ||||
_prettyformat(s, leafnodes, level + 1, lines) | ||||
lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')] | ||||
Yuya Nishihara
|
r25253 | def prettyformat(tree, leafnodes): | ||
lines = [] | ||||
Yuya Nishihara
|
r25254 | _prettyformat(tree, leafnodes, 0, lines) | ||
Yuya Nishihara
|
r25253 | output = '\n'.join((' ' * l + s) for l, s in lines) | ||
return output | ||||
Yuya Nishihara
|
r25306 | |||
def simplifyinfixops(tree, targetnodes): | ||||
"""Flatten chained infix operations to reduce usage of Python stack | ||||
>>> def f(tree): | ||||
... print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',)) | ||||
>>> f(('or', | ||||
... ('or', | ||||
... ('symbol', '1'), | ||||
... ('symbol', '2')), | ||||
... ('symbol', '3'))) | ||||
(or | ||||
('symbol', '1') | ||||
('symbol', '2') | ||||
('symbol', '3')) | ||||
>>> f(('func', | ||||
... ('symbol', 'p1'), | ||||
... ('or', | ||||
... ('or', | ||||
... ('func', | ||||
... ('symbol', 'sort'), | ||||
... ('list', | ||||
... ('or', | ||||
... ('or', | ||||
... ('symbol', '1'), | ||||
... ('symbol', '2')), | ||||
... ('symbol', '3')), | ||||
... ('negate', | ||||
... ('symbol', 'rev')))), | ||||
... ('and', | ||||
... ('symbol', '4'), | ||||
... ('group', | ||||
... ('or', | ||||
... ('or', | ||||
... ('symbol', '5'), | ||||
... ('symbol', '6')), | ||||
... ('symbol', '7'))))), | ||||
... ('symbol', '8')))) | ||||
(func | ||||
('symbol', 'p1') | ||||
(or | ||||
(func | ||||
('symbol', 'sort') | ||||
(list | ||||
(or | ||||
('symbol', '1') | ||||
('symbol', '2') | ||||
('symbol', '3')) | ||||
(negate | ||||
('symbol', 'rev')))) | ||||
(and | ||||
('symbol', '4') | ||||
(group | ||||
(or | ||||
('symbol', '5') | ||||
('symbol', '6') | ||||
('symbol', '7')))) | ||||
('symbol', '8'))) | ||||
""" | ||||
if not isinstance(tree, tuple): | ||||
return tree | ||||
op = tree[0] | ||||
if op not in targetnodes: | ||||
return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:]) | ||||
# walk down left nodes taking each right node. no recursion to left nodes | ||||
# because infix operators are left-associative, i.e. left tree is deep. | ||||
# e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3) | ||||
simplified = [] | ||||
x = tree | ||||
while x[0] == op: | ||||
l, r = x[1:] | ||||
simplified.append(simplifyinfixops(r, targetnodes)) | ||||
x = l | ||||
simplified.append(simplifyinfixops(x, targetnodes)) | ||||
simplified.append(op) | ||||
return tuple(reversed(simplified)) | ||||