##// END OF EJS Templates
parser: take suffix action if no infix action is defined...
Yuya Nishihara -
r25818:455190fb default
parent child Browse files
Show More
@@ -1,213 +1,213 b''
1 1 # parser.py - simple top-down operator precedence parser for mercurial
2 2 #
3 3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 # see http://effbot.org/zone/simple-top-down-parsing.htm and
9 9 # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
10 10 # for background
11 11
12 12 # takes a tokenizer and elements
13 13 # tokenizer is an iterator that returns (type, value, pos) tuples
14 14 # elements is a mapping of types to binding strength, primary, prefix, infix
15 15 # and suffix actions
16 16 # an action is a tree node name, a tree label, and an optional match
17 17 # __call__(program) parses program into a labeled tree
18 18
19 19 import error
20 20 from i18n import _
21 21
22 22 class parser(object):
23 23 def __init__(self, elements, methods=None):
24 24 self._elements = elements
25 25 self._methods = methods
26 26 self.current = None
27 27 def _advance(self):
28 28 'advance the tokenizer'
29 29 t = self.current
30 30 self.current = next(self._iter, None)
31 31 return t
32 32 def _hasnewterm(self):
33 33 'True if next token may start new term'
34 34 return any(self._elements[self.current[0]][1:3])
35 35 def _match(self, m):
36 36 'make sure the tokenizer matches an end condition'
37 37 if self.current[0] != m:
38 38 raise error.ParseError(_("unexpected token: %s") % self.current[0],
39 39 self.current[2])
40 40 self._advance()
41 41 def _parseoperand(self, bind, m=None):
42 42 'gather right-hand-side operand until an end condition or binding met'
43 43 if m and self.current[0] == m:
44 44 expr = None
45 45 else:
46 46 expr = self._parse(bind)
47 47 if m:
48 48 self._match(m)
49 49 return expr
50 50 def _parse(self, bind=0):
51 51 token, value, pos = self._advance()
52 52 # handle prefix rules on current token, take as primary if unambiguous
53 53 primary, prefix = self._elements[token][1:3]
54 54 if primary and not (prefix and self._hasnewterm()):
55 55 expr = (primary, value)
56 56 elif prefix:
57 57 expr = (prefix[0], self._parseoperand(*prefix[1:]))
58 58 else:
59 59 raise error.ParseError(_("not a prefix: %s") % token, pos)
60 60 # gather tokens until we meet a lower binding strength
61 61 while bind < self._elements[self.current[0]][0]:
62 62 token, value, pos = self._advance()
63 63 # handle infix rules, take as suffix if unambiguous
64 64 infix, suffix = self._elements[token][3:]
65 if suffix and not self._hasnewterm():
65 if suffix and not (infix and self._hasnewterm()):
66 66 expr = (suffix[0], expr)
67 67 elif infix:
68 68 expr = (infix[0], expr, self._parseoperand(*infix[1:]))
69 69 else:
70 70 raise error.ParseError(_("not an infix: %s") % token, pos)
71 71 return expr
72 72 def parse(self, tokeniter):
73 73 'generate a parse tree from tokens'
74 74 self._iter = tokeniter
75 75 self._advance()
76 76 res = self._parse()
77 77 token, value, pos = self.current
78 78 return res, pos
79 79 def eval(self, tree):
80 80 'recursively evaluate a parse tree using node methods'
81 81 if not isinstance(tree, tuple):
82 82 return tree
83 83 return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
84 84 def __call__(self, tokeniter):
85 85 'parse tokens into a parse tree and evaluate if methods given'
86 86 t = self.parse(tokeniter)
87 87 if self._methods:
88 88 return self.eval(t)
89 89 return t
90 90
91 91 def buildargsdict(trees, funcname, keys, keyvaluenode, keynode):
92 92 """Build dict from list containing positional and keyword arguments
93 93
94 94 Invalid keywords or too many positional arguments are rejected, but
95 95 missing arguments are just omitted.
96 96 """
97 97 if len(trees) > len(keys):
98 98 raise error.ParseError(_("%(func)s takes at most %(nargs)d arguments")
99 99 % {'func': funcname, 'nargs': len(keys)})
100 100 args = {}
101 101 # consume positional arguments
102 102 for k, x in zip(keys, trees):
103 103 if x[0] == keyvaluenode:
104 104 break
105 105 args[k] = x
106 106 # remainder should be keyword arguments
107 107 for x in trees[len(args):]:
108 108 if x[0] != keyvaluenode or x[1][0] != keynode:
109 109 raise error.ParseError(_("%(func)s got an invalid argument")
110 110 % {'func': funcname})
111 111 k = x[1][1]
112 112 if k not in keys:
113 113 raise error.ParseError(_("%(func)s got an unexpected keyword "
114 114 "argument '%(key)s'")
115 115 % {'func': funcname, 'key': k})
116 116 if k in args:
117 117 raise error.ParseError(_("%(func)s got multiple values for keyword "
118 118 "argument '%(key)s'")
119 119 % {'func': funcname, 'key': k})
120 120 args[k] = x[2]
121 121 return args
122 122
123 123 def _prettyformat(tree, leafnodes, level, lines):
124 124 if not isinstance(tree, tuple) or tree[0] in leafnodes:
125 125 lines.append((level, str(tree)))
126 126 else:
127 127 lines.append((level, '(%s' % tree[0]))
128 128 for s in tree[1:]:
129 129 _prettyformat(s, leafnodes, level + 1, lines)
130 130 lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')]
131 131
132 132 def prettyformat(tree, leafnodes):
133 133 lines = []
134 134 _prettyformat(tree, leafnodes, 0, lines)
135 135 output = '\n'.join((' ' * l + s) for l, s in lines)
136 136 return output
137 137
138 138 def simplifyinfixops(tree, targetnodes):
139 139 """Flatten chained infix operations to reduce usage of Python stack
140 140
141 141 >>> def f(tree):
142 142 ... print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',))
143 143 >>> f(('or',
144 144 ... ('or',
145 145 ... ('symbol', '1'),
146 146 ... ('symbol', '2')),
147 147 ... ('symbol', '3')))
148 148 (or
149 149 ('symbol', '1')
150 150 ('symbol', '2')
151 151 ('symbol', '3'))
152 152 >>> f(('func',
153 153 ... ('symbol', 'p1'),
154 154 ... ('or',
155 155 ... ('or',
156 156 ... ('func',
157 157 ... ('symbol', 'sort'),
158 158 ... ('list',
159 159 ... ('or',
160 160 ... ('or',
161 161 ... ('symbol', '1'),
162 162 ... ('symbol', '2')),
163 163 ... ('symbol', '3')),
164 164 ... ('negate',
165 165 ... ('symbol', 'rev')))),
166 166 ... ('and',
167 167 ... ('symbol', '4'),
168 168 ... ('group',
169 169 ... ('or',
170 170 ... ('or',
171 171 ... ('symbol', '5'),
172 172 ... ('symbol', '6')),
173 173 ... ('symbol', '7'))))),
174 174 ... ('symbol', '8'))))
175 175 (func
176 176 ('symbol', 'p1')
177 177 (or
178 178 (func
179 179 ('symbol', 'sort')
180 180 (list
181 181 (or
182 182 ('symbol', '1')
183 183 ('symbol', '2')
184 184 ('symbol', '3'))
185 185 (negate
186 186 ('symbol', 'rev'))))
187 187 (and
188 188 ('symbol', '4')
189 189 (group
190 190 (or
191 191 ('symbol', '5')
192 192 ('symbol', '6')
193 193 ('symbol', '7'))))
194 194 ('symbol', '8')))
195 195 """
196 196 if not isinstance(tree, tuple):
197 197 return tree
198 198 op = tree[0]
199 199 if op not in targetnodes:
200 200 return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:])
201 201
202 202 # walk down left nodes taking each right node. no recursion to left nodes
203 203 # because infix operators are left-associative, i.e. left tree is deep.
204 204 # e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3)
205 205 simplified = []
206 206 x = tree
207 207 while x[0] == op:
208 208 l, r = x[1:]
209 209 simplified.append(simplifyinfixops(r, targetnodes))
210 210 x = l
211 211 simplified.append(simplifyinfixops(x, targetnodes))
212 212 simplified.append(op)
213 213 return tuple(reversed(simplified))
General Comments 0
You need to be logged in to leave comments. Login now