##// END OF EJS Templates
parser: improve infix error checking...
Matt Mackall -
r11412:51ceb157 default
parent child Browse files
Show More
@@ -1,91 +1,91 b''
1 # parser.py - simple top-down operator precedence parser for mercurial
1 # parser.py - simple top-down operator precedence parser for mercurial
2 #
2 #
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 # see http://effbot.org/zone/simple-top-down-parsing.txt and
8 # see http://effbot.org/zone/simple-top-down-parsing.txt and
9 # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
9 # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
10 # for background
10 # for background
11
11
12 # takes a tokenizer and elements
12 # takes a tokenizer and elements
13 # tokenizer is an iterator that returns type, value pairs
13 # tokenizer is an iterator that returns type, value pairs
14 # elements is a mapping of types to binding strength, prefix and infix actions
14 # elements is a mapping of types to binding strength, prefix and infix actions
15 # an action is a tree node name, a tree label, and an optional match
15 # an action is a tree node name, a tree label, and an optional match
16 # __call__(program) parses program into a labelled tree
16 # __call__(program) parses program into a labelled tree
17
17
18 import error
18 import error
19
19
20 class parser(object):
20 class parser(object):
21 def __init__(self, tokenizer, elements, methods=None):
21 def __init__(self, tokenizer, elements, methods=None):
22 self._tokenizer = tokenizer
22 self._tokenizer = tokenizer
23 self._elements = elements
23 self._elements = elements
24 self._methods = methods
24 self._methods = methods
25 def _advance(self):
25 def _advance(self):
26 'advance the tokenizer'
26 'advance the tokenizer'
27 t = self.current
27 t = self.current
28 try:
28 try:
29 self.current = self._iter.next()
29 self.current = self._iter.next()
30 except StopIteration:
30 except StopIteration:
31 pass
31 pass
32 return t
32 return t
33 def _match(self, m, pos):
33 def _match(self, m, pos):
34 'make sure the tokenizer matches an end condition'
34 'make sure the tokenizer matches an end condition'
35 if self.current[0] != m:
35 if self.current[0] != m:
36 raise error.ParseError("unexpected token: %s" % self.current[0],
36 raise error.ParseError("unexpected token: %s" % self.current[0],
37 self.current[2])
37 self.current[2])
38 self._advance()
38 self._advance()
39 def _parse(self, bind=0):
39 def _parse(self, bind=0):
40 token, value, pos = self._advance()
40 token, value, pos = self._advance()
41 # handle prefix rules on current token
41 # handle prefix rules on current token
42 prefix = self._elements[token][1]
42 prefix = self._elements[token][1]
43 if not prefix:
43 if not prefix:
44 raise error.ParseError("not a prefix: %s" % token, pos)
44 raise error.ParseError("not a prefix: %s" % token, pos)
45 if len(prefix) == 1:
45 if len(prefix) == 1:
46 expr = (prefix[0], value)
46 expr = (prefix[0], value)
47 else:
47 else:
48 if len(prefix) > 2 and prefix[2] == self.current[0]:
48 if len(prefix) > 2 and prefix[2] == self.current[0]:
49 self._match(prefix[2], pos)
49 self._match(prefix[2], pos)
50 expr = (prefix[0], None)
50 expr = (prefix[0], None)
51 else:
51 else:
52 expr = (prefix[0], self._parse(prefix[1]))
52 expr = (prefix[0], self._parse(prefix[1]))
53 if len(prefix) > 2:
53 if len(prefix) > 2:
54 self._match(prefix[2], pos)
54 self._match(prefix[2], pos)
55 # gather tokens until we meet a lower binding strength
55 # gather tokens until we meet a lower binding strength
56 while bind < self._elements[self.current[0]][0]:
56 while bind < self._elements[self.current[0]][0]:
57 token, value, pos = self._advance()
57 token, value, pos = self._advance()
58 e = self._elements[token]
58 e = self._elements[token]
59 # check for suffix - next token isn't a valid prefix
59 # check for suffix - next token isn't a valid prefix
60 if len(e) == 4 and not self._elements[self.current[0]][1]:
60 if len(e) == 4 and not self._elements[self.current[0]][1]:
61 suffix = e[3]
61 suffix = e[3]
62 expr = (suffix[0], expr)
62 expr = (suffix[0], expr)
63 else:
63 else:
64 # handle infix rules
64 # handle infix rules
65 infix = self._elements[token][2]
65 if len(e) < 3 or not e[2]:
66 raise error.ParseError("not an infix: %s" % token, pos)
67 infix = e[2]
66 if len(infix) == 3 and infix[2] == self.current[0]:
68 if len(infix) == 3 and infix[2] == self.current[0]:
67 self._match(infix[2], pos)
69 self._match(infix[2], pos)
68 expr = (infix[0], expr, (None))
70 expr = (infix[0], expr, (None))
69 else:
71 else:
70 if not infix[0]:
71 raise error.ParseError("not an infix: %s" % token, pos)
72 expr = (infix[0], expr, self._parse(infix[1]))
72 expr = (infix[0], expr, self._parse(infix[1]))
73 if len(infix) == 3:
73 if len(infix) == 3:
74 self._match(infix[2], pos)
74 self._match(infix[2], pos)
75 return expr
75 return expr
76 def parse(self, message):
76 def parse(self, message):
77 'generate a parse tree from a message'
77 'generate a parse tree from a message'
78 self._iter = self._tokenizer(message)
78 self._iter = self._tokenizer(message)
79 self.current = self._iter.next()
79 self.current = self._iter.next()
80 return self._parse()
80 return self._parse()
81 def eval(self, tree):
81 def eval(self, tree):
82 'recursively evaluate a parse tree using node methods'
82 'recursively evaluate a parse tree using node methods'
83 if not isinstance(tree, tuple):
83 if not isinstance(tree, tuple):
84 return tree
84 return tree
85 return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
85 return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
86 def __call__(self, message):
86 def __call__(self, message):
87 'parse a message into a parse tree and evaluate if methods given'
87 'parse a message into a parse tree and evaluate if methods given'
88 t = self.parse(message)
88 t = self.parse(message)
89 if self._methods:
89 if self._methods:
90 return self.eval(t)
90 return self.eval(t)
91 return t
91 return t
General Comments 0
You need to be logged in to leave comments. Login now