parser.py
98 lines
| 3.8 KiB
| text/x-python
|
PythonLexer
/ mercurial / parser.py
Matt Mackall
|
r11274 | # parser.py - simple top-down operator precedence parser for mercurial | ||
# | ||||
# Copyright 2010 Matt Mackall <mpm@selenic.com> | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
# GNU General Public License version 2 or any later version. | ||||
Julian Cowley
|
r11449 | # see http://effbot.org/zone/simple-top-down-parsing.htm and | ||
Matt Mackall
|
r11274 | # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/ | ||
# for background | ||||
# takes a tokenizer and elements | ||||
# tokenizer is an iterator that returns type, value pairs | ||||
# elements is a mapping of types to binding strength, prefix and infix actions | ||||
# an action is a tree node name, a tree label, and an optional match | ||||
timeless@mozdev.org
|
r17500 | # __call__(program) parses program into a labeled tree | ||
Matt Mackall
|
r11274 | |||
Matt Mackall
|
r11289 | import error | ||
Mads Kiilerich
|
r14701 | from i18n import _ | ||
Matt Mackall
|
r11289 | |||
Matt Mackall
|
r11274 | class parser(object): | ||
def __init__(self, tokenizer, elements, methods=None): | ||||
self._tokenizer = tokenizer | ||||
self._elements = elements | ||||
self._methods = methods | ||||
Matt Mackall
|
r13176 | self.current = None | ||
Matt Mackall
|
r11274 | def _advance(self): | ||
'advance the tokenizer' | ||||
t = self.current | ||||
Matt Mackall
|
r11278 | try: | ||
self.current = self._iter.next() | ||||
except StopIteration: | ||||
pass | ||||
Matt Mackall
|
r11274 | return t | ||
Peter Arrenbrecht
|
r11319 | def _match(self, m, pos): | ||
Matt Mackall
|
r11274 | 'make sure the tokenizer matches an end condition' | ||
if self.current[0] != m: | ||||
Mads Kiilerich
|
r14701 | raise error.ParseError(_("unexpected token: %s") % self.current[0], | ||
Dirkjan Ochtman
|
r11305 | self.current[2]) | ||
Matt Mackall
|
r11274 | self._advance() | ||
def _parse(self, bind=0): | ||||
Matt Mackall
|
r11289 | token, value, pos = self._advance() | ||
Matt Mackall
|
r11274 | # handle prefix rules on current token | ||
prefix = self._elements[token][1] | ||||
if not prefix: | ||||
Mads Kiilerich
|
r14701 | raise error.ParseError(_("not a prefix: %s") % token, pos) | ||
Matt Mackall
|
r11274 | if len(prefix) == 1: | ||
expr = (prefix[0], value) | ||||
else: | ||||
if len(prefix) > 2 and prefix[2] == self.current[0]: | ||||
Peter Arrenbrecht
|
r11319 | self._match(prefix[2], pos) | ||
Matt Mackall
|
r11274 | expr = (prefix[0], None) | ||
else: | ||||
expr = (prefix[0], self._parse(prefix[1])) | ||||
if len(prefix) > 2: | ||||
Peter Arrenbrecht
|
r11319 | self._match(prefix[2], pos) | ||
Matt Mackall
|
r11274 | # gather tokens until we meet a lower binding strength | ||
while bind < self._elements[self.current[0]][0]: | ||||
Matt Mackall
|
r11289 | token, value, pos = self._advance() | ||
Matt Mackall
|
r11278 | e = self._elements[token] | ||
# check for suffix - next token isn't a valid prefix | ||||
if len(e) == 4 and not self._elements[self.current[0]][1]: | ||||
suffix = e[3] | ||||
expr = (suffix[0], expr) | ||||
Matt Mackall
|
r11274 | else: | ||
Matt Mackall
|
r11278 | # handle infix rules | ||
Matt Mackall
|
r11412 | if len(e) < 3 or not e[2]: | ||
Mads Kiilerich
|
r14701 | raise error.ParseError(_("not an infix: %s") % token, pos) | ||
Matt Mackall
|
r11412 | infix = e[2] | ||
Matt Mackall
|
r11278 | if len(infix) == 3 and infix[2] == self.current[0]: | ||
Peter Arrenbrecht
|
r11319 | self._match(infix[2], pos) | ||
Matt Mackall
|
r11278 | expr = (infix[0], expr, (None)) | ||
else: | ||||
expr = (infix[0], expr, self._parse(infix[1])) | ||||
if len(infix) == 3: | ||||
Peter Arrenbrecht
|
r11319 | self._match(infix[2], pos) | ||
Matt Mackall
|
r11274 | return expr | ||
Matt Mackall
|
r20778 | def parse(self, message, lookup=None): | ||
Matt Mackall
|
r11274 | 'generate a parse tree from a message' | ||
Matt Mackall
|
r20778 | if lookup: | ||
self._iter = self._tokenizer(message, lookup) | ||||
else: | ||||
self._iter = self._tokenizer(message) | ||||
Matt Mackall
|
r13176 | self._advance() | ||
Bernhard Leiner
|
r13665 | res = self._parse() | ||
token, value, pos = self.current | ||||
return res, pos | ||||
Matt Mackall
|
r11274 | def eval(self, tree): | ||
'recursively evaluate a parse tree using node methods' | ||||
if not isinstance(tree, tuple): | ||||
return tree | ||||
return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]]) | ||||
def __call__(self, message): | ||||
'parse a message into a parse tree and evaluate if methods given' | ||||
t = self.parse(message) | ||||
if self._methods: | ||||
return self.eval(t) | ||||
return t | ||||