##// END OF EJS Templates
parser: reorder infix/suffix handling to be similar to prefix/primary flow...
Yuya Nishihara -
r25817:42ac9d1d default
parent child Browse files
Show More
@@ -1,214 +1,213 b''
1 # parser.py - simple top-down operator precedence parser for mercurial
1 # parser.py - simple top-down operator precedence parser for mercurial
2 #
2 #
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 # see http://effbot.org/zone/simple-top-down-parsing.htm and
8 # see http://effbot.org/zone/simple-top-down-parsing.htm and
9 # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
9 # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
10 # for background
10 # for background
11
11
12 # takes a tokenizer and elements
12 # takes a tokenizer and elements
13 # tokenizer is an iterator that returns (type, value, pos) tuples
13 # tokenizer is an iterator that returns (type, value, pos) tuples
14 # elements is a mapping of types to binding strength, primary, prefix, infix
14 # elements is a mapping of types to binding strength, primary, prefix, infix
15 # and suffix actions
15 # and suffix actions
16 # an action is a tree node name, a tree label, and an optional match
16 # an action is a tree node name, a tree label, and an optional match
17 # __call__(program) parses program into a labeled tree
17 # __call__(program) parses program into a labeled tree
18
18
19 import error
19 import error
20 from i18n import _
20 from i18n import _
21
21
22 class parser(object):
22 class parser(object):
23 def __init__(self, elements, methods=None):
23 def __init__(self, elements, methods=None):
24 self._elements = elements
24 self._elements = elements
25 self._methods = methods
25 self._methods = methods
26 self.current = None
26 self.current = None
27 def _advance(self):
27 def _advance(self):
28 'advance the tokenizer'
28 'advance the tokenizer'
29 t = self.current
29 t = self.current
30 self.current = next(self._iter, None)
30 self.current = next(self._iter, None)
31 return t
31 return t
32 def _hasnewterm(self):
32 def _hasnewterm(self):
33 'True if next token may start new term'
33 'True if next token may start new term'
34 return any(self._elements[self.current[0]][1:3])
34 return any(self._elements[self.current[0]][1:3])
35 def _match(self, m):
35 def _match(self, m):
36 'make sure the tokenizer matches an end condition'
36 'make sure the tokenizer matches an end condition'
37 if self.current[0] != m:
37 if self.current[0] != m:
38 raise error.ParseError(_("unexpected token: %s") % self.current[0],
38 raise error.ParseError(_("unexpected token: %s") % self.current[0],
39 self.current[2])
39 self.current[2])
40 self._advance()
40 self._advance()
41 def _parseoperand(self, bind, m=None):
41 def _parseoperand(self, bind, m=None):
42 'gather right-hand-side operand until an end condition or binding met'
42 'gather right-hand-side operand until an end condition or binding met'
43 if m and self.current[0] == m:
43 if m and self.current[0] == m:
44 expr = None
44 expr = None
45 else:
45 else:
46 expr = self._parse(bind)
46 expr = self._parse(bind)
47 if m:
47 if m:
48 self._match(m)
48 self._match(m)
49 return expr
49 return expr
50 def _parse(self, bind=0):
50 def _parse(self, bind=0):
51 token, value, pos = self._advance()
51 token, value, pos = self._advance()
52 # handle prefix rules on current token, take as primary if unambiguous
52 # handle prefix rules on current token, take as primary if unambiguous
53 primary, prefix = self._elements[token][1:3]
53 primary, prefix = self._elements[token][1:3]
54 if primary and not (prefix and self._hasnewterm()):
54 if primary and not (prefix and self._hasnewterm()):
55 expr = (primary, value)
55 expr = (primary, value)
56 elif prefix:
56 elif prefix:
57 expr = (prefix[0], self._parseoperand(*prefix[1:]))
57 expr = (prefix[0], self._parseoperand(*prefix[1:]))
58 else:
58 else:
59 raise error.ParseError(_("not a prefix: %s") % token, pos)
59 raise error.ParseError(_("not a prefix: %s") % token, pos)
60 # gather tokens until we meet a lower binding strength
60 # gather tokens until we meet a lower binding strength
61 while bind < self._elements[self.current[0]][0]:
61 while bind < self._elements[self.current[0]][0]:
62 token, value, pos = self._advance()
62 token, value, pos = self._advance()
63 # handle infix rules, take as suffix if unambiguous
63 infix, suffix = self._elements[token][3:]
64 infix, suffix = self._elements[token][3:]
64 # check for suffix - next token isn't a valid prefix
65 if suffix and not self._hasnewterm():
65 if suffix and not self._hasnewterm():
66 expr = (suffix[0], expr)
66 expr = (suffix[0], expr)
67 elif infix:
68 expr = (infix[0], expr, self._parseoperand(*infix[1:]))
67 else:
69 else:
68 # handle infix rules
70 raise error.ParseError(_("not an infix: %s") % token, pos)
69 if not infix:
70 raise error.ParseError(_("not an infix: %s") % token, pos)
71 expr = (infix[0], expr, self._parseoperand(*infix[1:]))
72 return expr
71 return expr
73 def parse(self, tokeniter):
72 def parse(self, tokeniter):
74 'generate a parse tree from tokens'
73 'generate a parse tree from tokens'
75 self._iter = tokeniter
74 self._iter = tokeniter
76 self._advance()
75 self._advance()
77 res = self._parse()
76 res = self._parse()
78 token, value, pos = self.current
77 token, value, pos = self.current
79 return res, pos
78 return res, pos
80 def eval(self, tree):
79 def eval(self, tree):
81 'recursively evaluate a parse tree using node methods'
80 'recursively evaluate a parse tree using node methods'
82 if not isinstance(tree, tuple):
81 if not isinstance(tree, tuple):
83 return tree
82 return tree
84 return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
83 return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
85 def __call__(self, tokeniter):
84 def __call__(self, tokeniter):
86 'parse tokens into a parse tree and evaluate if methods given'
85 'parse tokens into a parse tree and evaluate if methods given'
87 t = self.parse(tokeniter)
86 t = self.parse(tokeniter)
88 if self._methods:
87 if self._methods:
89 return self.eval(t)
88 return self.eval(t)
90 return t
89 return t
91
90
92 def buildargsdict(trees, funcname, keys, keyvaluenode, keynode):
91 def buildargsdict(trees, funcname, keys, keyvaluenode, keynode):
93 """Build dict from list containing positional and keyword arguments
92 """Build dict from list containing positional and keyword arguments
94
93
95 Invalid keywords or too many positional arguments are rejected, but
94 Invalid keywords or too many positional arguments are rejected, but
96 missing arguments are just omitted.
95 missing arguments are just omitted.
97 """
96 """
98 if len(trees) > len(keys):
97 if len(trees) > len(keys):
99 raise error.ParseError(_("%(func)s takes at most %(nargs)d arguments")
98 raise error.ParseError(_("%(func)s takes at most %(nargs)d arguments")
100 % {'func': funcname, 'nargs': len(keys)})
99 % {'func': funcname, 'nargs': len(keys)})
101 args = {}
100 args = {}
102 # consume positional arguments
101 # consume positional arguments
103 for k, x in zip(keys, trees):
102 for k, x in zip(keys, trees):
104 if x[0] == keyvaluenode:
103 if x[0] == keyvaluenode:
105 break
104 break
106 args[k] = x
105 args[k] = x
107 # remainder should be keyword arguments
106 # remainder should be keyword arguments
108 for x in trees[len(args):]:
107 for x in trees[len(args):]:
109 if x[0] != keyvaluenode or x[1][0] != keynode:
108 if x[0] != keyvaluenode or x[1][0] != keynode:
110 raise error.ParseError(_("%(func)s got an invalid argument")
109 raise error.ParseError(_("%(func)s got an invalid argument")
111 % {'func': funcname})
110 % {'func': funcname})
112 k = x[1][1]
111 k = x[1][1]
113 if k not in keys:
112 if k not in keys:
114 raise error.ParseError(_("%(func)s got an unexpected keyword "
113 raise error.ParseError(_("%(func)s got an unexpected keyword "
115 "argument '%(key)s'")
114 "argument '%(key)s'")
116 % {'func': funcname, 'key': k})
115 % {'func': funcname, 'key': k})
117 if k in args:
116 if k in args:
118 raise error.ParseError(_("%(func)s got multiple values for keyword "
117 raise error.ParseError(_("%(func)s got multiple values for keyword "
119 "argument '%(key)s'")
118 "argument '%(key)s'")
120 % {'func': funcname, 'key': k})
119 % {'func': funcname, 'key': k})
121 args[k] = x[2]
120 args[k] = x[2]
122 return args
121 return args
123
122
124 def _prettyformat(tree, leafnodes, level, lines):
123 def _prettyformat(tree, leafnodes, level, lines):
125 if not isinstance(tree, tuple) or tree[0] in leafnodes:
124 if not isinstance(tree, tuple) or tree[0] in leafnodes:
126 lines.append((level, str(tree)))
125 lines.append((level, str(tree)))
127 else:
126 else:
128 lines.append((level, '(%s' % tree[0]))
127 lines.append((level, '(%s' % tree[0]))
129 for s in tree[1:]:
128 for s in tree[1:]:
130 _prettyformat(s, leafnodes, level + 1, lines)
129 _prettyformat(s, leafnodes, level + 1, lines)
131 lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')]
130 lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')]
132
131
133 def prettyformat(tree, leafnodes):
132 def prettyformat(tree, leafnodes):
134 lines = []
133 lines = []
135 _prettyformat(tree, leafnodes, 0, lines)
134 _prettyformat(tree, leafnodes, 0, lines)
136 output = '\n'.join((' ' * l + s) for l, s in lines)
135 output = '\n'.join((' ' * l + s) for l, s in lines)
137 return output
136 return output
138
137
139 def simplifyinfixops(tree, targetnodes):
138 def simplifyinfixops(tree, targetnodes):
140 """Flatten chained infix operations to reduce usage of Python stack
139 """Flatten chained infix operations to reduce usage of Python stack
141
140
142 >>> def f(tree):
141 >>> def f(tree):
143 ... print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',))
142 ... print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',))
144 >>> f(('or',
143 >>> f(('or',
145 ... ('or',
144 ... ('or',
146 ... ('symbol', '1'),
145 ... ('symbol', '1'),
147 ... ('symbol', '2')),
146 ... ('symbol', '2')),
148 ... ('symbol', '3')))
147 ... ('symbol', '3')))
149 (or
148 (or
150 ('symbol', '1')
149 ('symbol', '1')
151 ('symbol', '2')
150 ('symbol', '2')
152 ('symbol', '3'))
151 ('symbol', '3'))
153 >>> f(('func',
152 >>> f(('func',
154 ... ('symbol', 'p1'),
153 ... ('symbol', 'p1'),
155 ... ('or',
154 ... ('or',
156 ... ('or',
155 ... ('or',
157 ... ('func',
156 ... ('func',
158 ... ('symbol', 'sort'),
157 ... ('symbol', 'sort'),
159 ... ('list',
158 ... ('list',
160 ... ('or',
159 ... ('or',
161 ... ('or',
160 ... ('or',
162 ... ('symbol', '1'),
161 ... ('symbol', '1'),
163 ... ('symbol', '2')),
162 ... ('symbol', '2')),
164 ... ('symbol', '3')),
163 ... ('symbol', '3')),
165 ... ('negate',
164 ... ('negate',
166 ... ('symbol', 'rev')))),
165 ... ('symbol', 'rev')))),
167 ... ('and',
166 ... ('and',
168 ... ('symbol', '4'),
167 ... ('symbol', '4'),
169 ... ('group',
168 ... ('group',
170 ... ('or',
169 ... ('or',
171 ... ('or',
170 ... ('or',
172 ... ('symbol', '5'),
171 ... ('symbol', '5'),
173 ... ('symbol', '6')),
172 ... ('symbol', '6')),
174 ... ('symbol', '7'))))),
173 ... ('symbol', '7'))))),
175 ... ('symbol', '8'))))
174 ... ('symbol', '8'))))
176 (func
175 (func
177 ('symbol', 'p1')
176 ('symbol', 'p1')
178 (or
177 (or
179 (func
178 (func
180 ('symbol', 'sort')
179 ('symbol', 'sort')
181 (list
180 (list
182 (or
181 (or
183 ('symbol', '1')
182 ('symbol', '1')
184 ('symbol', '2')
183 ('symbol', '2')
185 ('symbol', '3'))
184 ('symbol', '3'))
186 (negate
185 (negate
187 ('symbol', 'rev'))))
186 ('symbol', 'rev'))))
188 (and
187 (and
189 ('symbol', '4')
188 ('symbol', '4')
190 (group
189 (group
191 (or
190 (or
192 ('symbol', '5')
191 ('symbol', '5')
193 ('symbol', '6')
192 ('symbol', '6')
194 ('symbol', '7'))))
193 ('symbol', '7'))))
195 ('symbol', '8')))
194 ('symbol', '8')))
196 """
195 """
197 if not isinstance(tree, tuple):
196 if not isinstance(tree, tuple):
198 return tree
197 return tree
199 op = tree[0]
198 op = tree[0]
200 if op not in targetnodes:
199 if op not in targetnodes:
201 return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:])
200 return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:])
202
201
203 # walk down left nodes taking each right node. no recursion to left nodes
202 # walk down left nodes taking each right node. no recursion to left nodes
204 # because infix operators are left-associative, i.e. left tree is deep.
203 # because infix operators are left-associative, i.e. left tree is deep.
205 # e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3)
204 # e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3)
206 simplified = []
205 simplified = []
207 x = tree
206 x = tree
208 while x[0] == op:
207 while x[0] == op:
209 l, r = x[1:]
208 l, r = x[1:]
210 simplified.append(simplifyinfixops(r, targetnodes))
209 simplified.append(simplifyinfixops(r, targetnodes))
211 x = l
210 x = l
212 simplified.append(simplifyinfixops(x, targetnodes))
211 simplified.append(simplifyinfixops(x, targetnodes))
213 simplified.append(op)
212 simplified.append(op)
214 return tuple(reversed(simplified))
213 return tuple(reversed(simplified))
General Comments 0
You need to be logged in to leave comments. Login now