parser.py
187 lines
| 6.4 KiB
| text/x-python
|
PythonLexer
/ mercurial / parser.py
Matt Mackall
|
r11274 | # parser.py - simple top-down operator precedence parser for mercurial | ||
# | ||||
# Copyright 2010 Matt Mackall <mpm@selenic.com> | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
# GNU General Public License version 2 or any later version. | ||||
Julian Cowley
|
r11449 | # see http://effbot.org/zone/simple-top-down-parsing.htm and | ||
Matt Mackall
|
r11274 | # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/ | ||
# for background | ||||
# takes a tokenizer and elements | ||||
# tokenizer is an iterator that returns type, value pairs | ||||
# elements is a mapping of types to binding strength, prefix and infix actions | ||||
# an action is a tree node name, a tree label, and an optional match | ||||
timeless@mozdev.org
|
r17500 | # __call__(program) parses program into a labeled tree | ||
Matt Mackall
|
r11274 | |||
Matt Mackall
|
r11289 | import error | ||
Mads Kiilerich
|
r14701 | from i18n import _ | ||
Matt Mackall
|
r11289 | |||
Matt Mackall
|
r11274 | class parser(object): | ||
def __init__(self, tokenizer, elements, methods=None): | ||||
self._tokenizer = tokenizer | ||||
self._elements = elements | ||||
self._methods = methods | ||||
Matt Mackall
|
r13176 | self.current = None | ||
Matt Mackall
|
r11274 | def _advance(self): | ||
'advance the tokenizer' | ||||
t = self.current | ||||
Pierre-Yves David
|
r25171 | self.current = next(self._iter, None) | ||
Matt Mackall
|
r11274 | return t | ||
Peter Arrenbrecht
|
r11319 | def _match(self, m, pos): | ||
Matt Mackall
|
r11274 | 'make sure the tokenizer matches an end condition' | ||
if self.current[0] != m: | ||||
Mads Kiilerich
|
r14701 | raise error.ParseError(_("unexpected token: %s") % self.current[0], | ||
Dirkjan Ochtman
|
r11305 | self.current[2]) | ||
Matt Mackall
|
r11274 | self._advance() | ||
def _parse(self, bind=0): | ||||
Matt Mackall
|
r11289 | token, value, pos = self._advance() | ||
Matt Mackall
|
r11274 | # handle prefix rules on current token | ||
prefix = self._elements[token][1] | ||||
if not prefix: | ||||
Mads Kiilerich
|
r14701 | raise error.ParseError(_("not a prefix: %s") % token, pos) | ||
Matt Mackall
|
r11274 | if len(prefix) == 1: | ||
expr = (prefix[0], value) | ||||
else: | ||||
if len(prefix) > 2 and prefix[2] == self.current[0]: | ||||
Peter Arrenbrecht
|
r11319 | self._match(prefix[2], pos) | ||
Matt Mackall
|
r11274 | expr = (prefix[0], None) | ||
else: | ||||
expr = (prefix[0], self._parse(prefix[1])) | ||||
if len(prefix) > 2: | ||||
Peter Arrenbrecht
|
r11319 | self._match(prefix[2], pos) | ||
Matt Mackall
|
r11274 | # gather tokens until we meet a lower binding strength | ||
while bind < self._elements[self.current[0]][0]: | ||||
Matt Mackall
|
r11289 | token, value, pos = self._advance() | ||
Matt Mackall
|
r11278 | e = self._elements[token] | ||
# check for suffix - next token isn't a valid prefix | ||||
if len(e) == 4 and not self._elements[self.current[0]][1]: | ||||
suffix = e[3] | ||||
expr = (suffix[0], expr) | ||||
Matt Mackall
|
r11274 | else: | ||
Matt Mackall
|
r11278 | # handle infix rules | ||
Matt Mackall
|
r11412 | if len(e) < 3 or not e[2]: | ||
Mads Kiilerich
|
r14701 | raise error.ParseError(_("not an infix: %s") % token, pos) | ||
Matt Mackall
|
r11412 | infix = e[2] | ||
Matt Mackall
|
r11278 | if len(infix) == 3 and infix[2] == self.current[0]: | ||
Peter Arrenbrecht
|
r11319 | self._match(infix[2], pos) | ||
Matt Mackall
|
r11278 | expr = (infix[0], expr, (None)) | ||
else: | ||||
expr = (infix[0], expr, self._parse(infix[1])) | ||||
if len(infix) == 3: | ||||
Peter Arrenbrecht
|
r11319 | self._match(infix[2], pos) | ||
Matt Mackall
|
r11274 | return expr | ||
Matt Mackall
|
r20778 | def parse(self, message, lookup=None): | ||
Matt Mackall
|
r11274 | 'generate a parse tree from a message' | ||
Matt Mackall
|
r20778 | if lookup: | ||
self._iter = self._tokenizer(message, lookup) | ||||
else: | ||||
self._iter = self._tokenizer(message) | ||||
Matt Mackall
|
r13176 | self._advance() | ||
Bernhard Leiner
|
r13665 | res = self._parse() | ||
token, value, pos = self.current | ||||
return res, pos | ||||
Matt Mackall
|
r11274 | def eval(self, tree): | ||
'recursively evaluate a parse tree using node methods' | ||||
if not isinstance(tree, tuple): | ||||
return tree | ||||
return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]]) | ||||
def __call__(self, message): | ||||
'parse a message into a parse tree and evaluate if methods given' | ||||
t = self.parse(message) | ||||
if self._methods: | ||||
return self.eval(t) | ||||
return t | ||||
Yuya Nishihara
|
r25253 | |||
Yuya Nishihara
|
r25254 | def _prettyformat(tree, leafnodes, level, lines): | ||
if not isinstance(tree, tuple) or tree[0] in leafnodes: | ||||
lines.append((level, str(tree))) | ||||
else: | ||||
lines.append((level, '(%s' % tree[0])) | ||||
for s in tree[1:]: | ||||
_prettyformat(s, leafnodes, level + 1, lines) | ||||
lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')] | ||||
Yuya Nishihara
|
r25253 | def prettyformat(tree, leafnodes): | ||
lines = [] | ||||
Yuya Nishihara
|
r25254 | _prettyformat(tree, leafnodes, 0, lines) | ||
Yuya Nishihara
|
r25253 | output = '\n'.join((' ' * l + s) for l, s in lines) | ||
return output | ||||
Yuya Nishihara
|
r25306 | |||
def simplifyinfixops(tree, targetnodes): | ||||
"""Flatten chained infix operations to reduce usage of Python stack | ||||
>>> def f(tree): | ||||
... print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',)) | ||||
>>> f(('or', | ||||
... ('or', | ||||
... ('symbol', '1'), | ||||
... ('symbol', '2')), | ||||
... ('symbol', '3'))) | ||||
(or | ||||
('symbol', '1') | ||||
('symbol', '2') | ||||
('symbol', '3')) | ||||
>>> f(('func', | ||||
... ('symbol', 'p1'), | ||||
... ('or', | ||||
... ('or', | ||||
... ('func', | ||||
... ('symbol', 'sort'), | ||||
... ('list', | ||||
... ('or', | ||||
... ('or', | ||||
... ('symbol', '1'), | ||||
... ('symbol', '2')), | ||||
... ('symbol', '3')), | ||||
... ('negate', | ||||
... ('symbol', 'rev')))), | ||||
... ('and', | ||||
... ('symbol', '4'), | ||||
... ('group', | ||||
... ('or', | ||||
... ('or', | ||||
... ('symbol', '5'), | ||||
... ('symbol', '6')), | ||||
... ('symbol', '7'))))), | ||||
... ('symbol', '8')))) | ||||
(func | ||||
('symbol', 'p1') | ||||
(or | ||||
(func | ||||
('symbol', 'sort') | ||||
(list | ||||
(or | ||||
('symbol', '1') | ||||
('symbol', '2') | ||||
('symbol', '3')) | ||||
(negate | ||||
('symbol', 'rev')))) | ||||
(and | ||||
('symbol', '4') | ||||
(group | ||||
(or | ||||
('symbol', '5') | ||||
('symbol', '6') | ||||
('symbol', '7')))) | ||||
('symbol', '8'))) | ||||
""" | ||||
if not isinstance(tree, tuple): | ||||
return tree | ||||
op = tree[0] | ||||
if op not in targetnodes: | ||||
return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:]) | ||||
# walk down left nodes taking each right node. no recursion to left nodes | ||||
# because infix operators are left-associative, i.e. left tree is deep. | ||||
# e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3) | ||||
simplified = [] | ||||
x = tree | ||||
while x[0] == op: | ||||
l, r = x[1:] | ||||
simplified.append(simplifyinfixops(r, targetnodes)) | ||||
x = l | ||||
simplified.append(simplifyinfixops(x, targetnodes)) | ||||
simplified.append(op) | ||||
return tuple(reversed(simplified)) | ||||