##// END OF EJS Templates
parser: improve infix error checking...
Matt Mackall -
r11412:51ceb157 default
parent child Browse files
Show More
@@ -1,91 +1,91 b''
1 1 # parser.py - simple top-down operator precedence parser for mercurial
2 2 #
3 3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 # see http://effbot.org/zone/simple-top-down-parsing.txt and
9 9 # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
10 10 # for background
11 11
12 12 # takes a tokenizer and elements
13 13 # tokenizer is an iterator that returns type, value pairs
14 14 # elements is a mapping of types to binding strength, prefix and infix actions
15 15 # an action is a tree node name, a tree label, and an optional match
16 16 # __call__(program) parses program into a labelled tree
17 17
18 18 import error
19 19
20 20 class parser(object):
21 21 def __init__(self, tokenizer, elements, methods=None):
22 22 self._tokenizer = tokenizer
23 23 self._elements = elements
24 24 self._methods = methods
25 25 def _advance(self):
26 26 'advance the tokenizer'
27 27 t = self.current
28 28 try:
29 29 self.current = self._iter.next()
30 30 except StopIteration:
31 31 pass
32 32 return t
33 33 def _match(self, m, pos):
34 34 'make sure the tokenizer matches an end condition'
35 35 if self.current[0] != m:
36 36 raise error.ParseError("unexpected token: %s" % self.current[0],
37 37 self.current[2])
38 38 self._advance()
39 39 def _parse(self, bind=0):
40 40 token, value, pos = self._advance()
41 41 # handle prefix rules on current token
42 42 prefix = self._elements[token][1]
43 43 if not prefix:
44 44 raise error.ParseError("not a prefix: %s" % token, pos)
45 45 if len(prefix) == 1:
46 46 expr = (prefix[0], value)
47 47 else:
48 48 if len(prefix) > 2 and prefix[2] == self.current[0]:
49 49 self._match(prefix[2], pos)
50 50 expr = (prefix[0], None)
51 51 else:
52 52 expr = (prefix[0], self._parse(prefix[1]))
53 53 if len(prefix) > 2:
54 54 self._match(prefix[2], pos)
55 55 # gather tokens until we meet a lower binding strength
56 56 while bind < self._elements[self.current[0]][0]:
57 57 token, value, pos = self._advance()
58 58 e = self._elements[token]
59 59 # check for suffix - next token isn't a valid prefix
60 60 if len(e) == 4 and not self._elements[self.current[0]][1]:
61 61 suffix = e[3]
62 62 expr = (suffix[0], expr)
63 63 else:
64 64 # handle infix rules
65 infix = self._elements[token][2]
65 if len(e) < 3 or not e[2]:
66 raise error.ParseError("not an infix: %s" % token, pos)
67 infix = e[2]
66 68 if len(infix) == 3 and infix[2] == self.current[0]:
67 69 self._match(infix[2], pos)
68 70 expr = (infix[0], expr, (None))
69 71 else:
70 if not infix[0]:
71 raise error.ParseError("not an infix: %s" % token, pos)
72 72 expr = (infix[0], expr, self._parse(infix[1]))
73 73 if len(infix) == 3:
74 74 self._match(infix[2], pos)
75 75 return expr
76 76 def parse(self, message):
77 77 'generate a parse tree from a message'
78 78 self._iter = self._tokenizer(message)
79 79 self.current = self._iter.next()
80 80 return self._parse()
81 81 def eval(self, tree):
82 82 'recursively evaluate a parse tree using node methods'
83 83 if not isinstance(tree, tuple):
84 84 return tree
85 85 return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
86 86 def __call__(self, message):
87 87 'parse a message into a parse tree and evaluate if methods given'
88 88 t = self.parse(message)
89 89 if self._methods:
90 90 return self.eval(t)
91 91 return t
General Comments 0
You need to be logged in to leave comments. Login now