##// END OF EJS Templates
parser: take suffix action if no infix action is defined...
Yuya Nishihara -
r25818:455190fb default
parent child Browse files
Show More
@@ -1,213 +1,213 b''
1 # parser.py - simple top-down operator precedence parser for mercurial
1 # parser.py - simple top-down operator precedence parser for mercurial
2 #
2 #
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 # see http://effbot.org/zone/simple-top-down-parsing.htm and
8 # see http://effbot.org/zone/simple-top-down-parsing.htm and
9 # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
9 # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
10 # for background
10 # for background
11
11
12 # takes a tokenizer and elements
12 # takes a tokenizer and elements
13 # tokenizer is an iterator that returns (type, value, pos) tuples
13 # tokenizer is an iterator that returns (type, value, pos) tuples
14 # elements is a mapping of types to binding strength, primary, prefix, infix
14 # elements is a mapping of types to binding strength, primary, prefix, infix
15 # and suffix actions
15 # and suffix actions
16 # an action is a tree node name, a tree label, and an optional match
16 # an action is a tree node name, a tree label, and an optional match
17 # __call__(program) parses program into a labeled tree
17 # __call__(program) parses program into a labeled tree
18
18
19 import error
19 import error
20 from i18n import _
20 from i18n import _
21
21
22 class parser(object):
22 class parser(object):
23 def __init__(self, elements, methods=None):
23 def __init__(self, elements, methods=None):
24 self._elements = elements
24 self._elements = elements
25 self._methods = methods
25 self._methods = methods
26 self.current = None
26 self.current = None
27 def _advance(self):
27 def _advance(self):
28 'advance the tokenizer'
28 'advance the tokenizer'
29 t = self.current
29 t = self.current
30 self.current = next(self._iter, None)
30 self.current = next(self._iter, None)
31 return t
31 return t
32 def _hasnewterm(self):
32 def _hasnewterm(self):
33 'True if next token may start new term'
33 'True if next token may start new term'
34 return any(self._elements[self.current[0]][1:3])
34 return any(self._elements[self.current[0]][1:3])
35 def _match(self, m):
35 def _match(self, m):
36 'make sure the tokenizer matches an end condition'
36 'make sure the tokenizer matches an end condition'
37 if self.current[0] != m:
37 if self.current[0] != m:
38 raise error.ParseError(_("unexpected token: %s") % self.current[0],
38 raise error.ParseError(_("unexpected token: %s") % self.current[0],
39 self.current[2])
39 self.current[2])
40 self._advance()
40 self._advance()
41 def _parseoperand(self, bind, m=None):
41 def _parseoperand(self, bind, m=None):
42 'gather right-hand-side operand until an end condition or binding met'
42 'gather right-hand-side operand until an end condition or binding met'
43 if m and self.current[0] == m:
43 if m and self.current[0] == m:
44 expr = None
44 expr = None
45 else:
45 else:
46 expr = self._parse(bind)
46 expr = self._parse(bind)
47 if m:
47 if m:
48 self._match(m)
48 self._match(m)
49 return expr
49 return expr
50 def _parse(self, bind=0):
50 def _parse(self, bind=0):
51 token, value, pos = self._advance()
51 token, value, pos = self._advance()
52 # handle prefix rules on current token, take as primary if unambiguous
52 # handle prefix rules on current token, take as primary if unambiguous
53 primary, prefix = self._elements[token][1:3]
53 primary, prefix = self._elements[token][1:3]
54 if primary and not (prefix and self._hasnewterm()):
54 if primary and not (prefix and self._hasnewterm()):
55 expr = (primary, value)
55 expr = (primary, value)
56 elif prefix:
56 elif prefix:
57 expr = (prefix[0], self._parseoperand(*prefix[1:]))
57 expr = (prefix[0], self._parseoperand(*prefix[1:]))
58 else:
58 else:
59 raise error.ParseError(_("not a prefix: %s") % token, pos)
59 raise error.ParseError(_("not a prefix: %s") % token, pos)
60 # gather tokens until we meet a lower binding strength
60 # gather tokens until we meet a lower binding strength
61 while bind < self._elements[self.current[0]][0]:
61 while bind < self._elements[self.current[0]][0]:
62 token, value, pos = self._advance()
62 token, value, pos = self._advance()
63 # handle infix rules, take as suffix if unambiguous
63 # handle infix rules, take as suffix if unambiguous
64 infix, suffix = self._elements[token][3:]
64 infix, suffix = self._elements[token][3:]
65 if suffix and not self._hasnewterm():
65 if suffix and not (infix and self._hasnewterm()):
66 expr = (suffix[0], expr)
66 expr = (suffix[0], expr)
67 elif infix:
67 elif infix:
68 expr = (infix[0], expr, self._parseoperand(*infix[1:]))
68 expr = (infix[0], expr, self._parseoperand(*infix[1:]))
69 else:
69 else:
70 raise error.ParseError(_("not an infix: %s") % token, pos)
70 raise error.ParseError(_("not an infix: %s") % token, pos)
71 return expr
71 return expr
72 def parse(self, tokeniter):
72 def parse(self, tokeniter):
73 'generate a parse tree from tokens'
73 'generate a parse tree from tokens'
74 self._iter = tokeniter
74 self._iter = tokeniter
75 self._advance()
75 self._advance()
76 res = self._parse()
76 res = self._parse()
77 token, value, pos = self.current
77 token, value, pos = self.current
78 return res, pos
78 return res, pos
79 def eval(self, tree):
79 def eval(self, tree):
80 'recursively evaluate a parse tree using node methods'
80 'recursively evaluate a parse tree using node methods'
81 if not isinstance(tree, tuple):
81 if not isinstance(tree, tuple):
82 return tree
82 return tree
83 return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
83 return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
84 def __call__(self, tokeniter):
84 def __call__(self, tokeniter):
85 'parse tokens into a parse tree and evaluate if methods given'
85 'parse tokens into a parse tree and evaluate if methods given'
86 t = self.parse(tokeniter)
86 t = self.parse(tokeniter)
87 if self._methods:
87 if self._methods:
88 return self.eval(t)
88 return self.eval(t)
89 return t
89 return t
90
90
91 def buildargsdict(trees, funcname, keys, keyvaluenode, keynode):
91 def buildargsdict(trees, funcname, keys, keyvaluenode, keynode):
92 """Build dict from list containing positional and keyword arguments
92 """Build dict from list containing positional and keyword arguments
93
93
94 Invalid keywords or too many positional arguments are rejected, but
94 Invalid keywords or too many positional arguments are rejected, but
95 missing arguments are just omitted.
95 missing arguments are just omitted.
96 """
96 """
97 if len(trees) > len(keys):
97 if len(trees) > len(keys):
98 raise error.ParseError(_("%(func)s takes at most %(nargs)d arguments")
98 raise error.ParseError(_("%(func)s takes at most %(nargs)d arguments")
99 % {'func': funcname, 'nargs': len(keys)})
99 % {'func': funcname, 'nargs': len(keys)})
100 args = {}
100 args = {}
101 # consume positional arguments
101 # consume positional arguments
102 for k, x in zip(keys, trees):
102 for k, x in zip(keys, trees):
103 if x[0] == keyvaluenode:
103 if x[0] == keyvaluenode:
104 break
104 break
105 args[k] = x
105 args[k] = x
106 # remainder should be keyword arguments
106 # remainder should be keyword arguments
107 for x in trees[len(args):]:
107 for x in trees[len(args):]:
108 if x[0] != keyvaluenode or x[1][0] != keynode:
108 if x[0] != keyvaluenode or x[1][0] != keynode:
109 raise error.ParseError(_("%(func)s got an invalid argument")
109 raise error.ParseError(_("%(func)s got an invalid argument")
110 % {'func': funcname})
110 % {'func': funcname})
111 k = x[1][1]
111 k = x[1][1]
112 if k not in keys:
112 if k not in keys:
113 raise error.ParseError(_("%(func)s got an unexpected keyword "
113 raise error.ParseError(_("%(func)s got an unexpected keyword "
114 "argument '%(key)s'")
114 "argument '%(key)s'")
115 % {'func': funcname, 'key': k})
115 % {'func': funcname, 'key': k})
116 if k in args:
116 if k in args:
117 raise error.ParseError(_("%(func)s got multiple values for keyword "
117 raise error.ParseError(_("%(func)s got multiple values for keyword "
118 "argument '%(key)s'")
118 "argument '%(key)s'")
119 % {'func': funcname, 'key': k})
119 % {'func': funcname, 'key': k})
120 args[k] = x[2]
120 args[k] = x[2]
121 return args
121 return args
122
122
123 def _prettyformat(tree, leafnodes, level, lines):
123 def _prettyformat(tree, leafnodes, level, lines):
124 if not isinstance(tree, tuple) or tree[0] in leafnodes:
124 if not isinstance(tree, tuple) or tree[0] in leafnodes:
125 lines.append((level, str(tree)))
125 lines.append((level, str(tree)))
126 else:
126 else:
127 lines.append((level, '(%s' % tree[0]))
127 lines.append((level, '(%s' % tree[0]))
128 for s in tree[1:]:
128 for s in tree[1:]:
129 _prettyformat(s, leafnodes, level + 1, lines)
129 _prettyformat(s, leafnodes, level + 1, lines)
130 lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')]
130 lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')]
131
131
132 def prettyformat(tree, leafnodes):
132 def prettyformat(tree, leafnodes):
133 lines = []
133 lines = []
134 _prettyformat(tree, leafnodes, 0, lines)
134 _prettyformat(tree, leafnodes, 0, lines)
135 output = '\n'.join((' ' * l + s) for l, s in lines)
135 output = '\n'.join((' ' * l + s) for l, s in lines)
136 return output
136 return output
137
137
138 def simplifyinfixops(tree, targetnodes):
138 def simplifyinfixops(tree, targetnodes):
139 """Flatten chained infix operations to reduce usage of Python stack
139 """Flatten chained infix operations to reduce usage of Python stack
140
140
141 >>> def f(tree):
141 >>> def f(tree):
142 ... print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',))
142 ... print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',))
143 >>> f(('or',
143 >>> f(('or',
144 ... ('or',
144 ... ('or',
145 ... ('symbol', '1'),
145 ... ('symbol', '1'),
146 ... ('symbol', '2')),
146 ... ('symbol', '2')),
147 ... ('symbol', '3')))
147 ... ('symbol', '3')))
148 (or
148 (or
149 ('symbol', '1')
149 ('symbol', '1')
150 ('symbol', '2')
150 ('symbol', '2')
151 ('symbol', '3'))
151 ('symbol', '3'))
152 >>> f(('func',
152 >>> f(('func',
153 ... ('symbol', 'p1'),
153 ... ('symbol', 'p1'),
154 ... ('or',
154 ... ('or',
155 ... ('or',
155 ... ('or',
156 ... ('func',
156 ... ('func',
157 ... ('symbol', 'sort'),
157 ... ('symbol', 'sort'),
158 ... ('list',
158 ... ('list',
159 ... ('or',
159 ... ('or',
160 ... ('or',
160 ... ('or',
161 ... ('symbol', '1'),
161 ... ('symbol', '1'),
162 ... ('symbol', '2')),
162 ... ('symbol', '2')),
163 ... ('symbol', '3')),
163 ... ('symbol', '3')),
164 ... ('negate',
164 ... ('negate',
165 ... ('symbol', 'rev')))),
165 ... ('symbol', 'rev')))),
166 ... ('and',
166 ... ('and',
167 ... ('symbol', '4'),
167 ... ('symbol', '4'),
168 ... ('group',
168 ... ('group',
169 ... ('or',
169 ... ('or',
170 ... ('or',
170 ... ('or',
171 ... ('symbol', '5'),
171 ... ('symbol', '5'),
172 ... ('symbol', '6')),
172 ... ('symbol', '6')),
173 ... ('symbol', '7'))))),
173 ... ('symbol', '7'))))),
174 ... ('symbol', '8'))))
174 ... ('symbol', '8'))))
175 (func
175 (func
176 ('symbol', 'p1')
176 ('symbol', 'p1')
177 (or
177 (or
178 (func
178 (func
179 ('symbol', 'sort')
179 ('symbol', 'sort')
180 (list
180 (list
181 (or
181 (or
182 ('symbol', '1')
182 ('symbol', '1')
183 ('symbol', '2')
183 ('symbol', '2')
184 ('symbol', '3'))
184 ('symbol', '3'))
185 (negate
185 (negate
186 ('symbol', 'rev'))))
186 ('symbol', 'rev'))))
187 (and
187 (and
188 ('symbol', '4')
188 ('symbol', '4')
189 (group
189 (group
190 (or
190 (or
191 ('symbol', '5')
191 ('symbol', '5')
192 ('symbol', '6')
192 ('symbol', '6')
193 ('symbol', '7'))))
193 ('symbol', '7'))))
194 ('symbol', '8')))
194 ('symbol', '8')))
195 """
195 """
196 if not isinstance(tree, tuple):
196 if not isinstance(tree, tuple):
197 return tree
197 return tree
198 op = tree[0]
198 op = tree[0]
199 if op not in targetnodes:
199 if op not in targetnodes:
200 return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:])
200 return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:])
201
201
202 # walk down left nodes taking each right node. no recursion to left nodes
202 # walk down left nodes taking each right node. no recursion to left nodes
203 # because infix operators are left-associative, i.e. left tree is deep.
203 # because infix operators are left-associative, i.e. left tree is deep.
204 # e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3)
204 # e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3)
205 simplified = []
205 simplified = []
206 x = tree
206 x = tree
207 while x[0] == op:
207 while x[0] == op:
208 l, r = x[1:]
208 l, r = x[1:]
209 simplified.append(simplifyinfixops(r, targetnodes))
209 simplified.append(simplifyinfixops(r, targetnodes))
210 x = l
210 x = l
211 simplified.append(simplifyinfixops(x, targetnodes))
211 simplified.append(simplifyinfixops(x, targetnodes))
212 simplified.append(op)
212 simplified.append(op)
213 return tuple(reversed(simplified))
213 return tuple(reversed(simplified))
General Comments 0
You need to be logged in to leave comments. Login now