##// END OF EJS Templates
parser: extract function that tests if next token may start new term...
Yuya Nishihara -
r25804:f0a77cb6 default
parent child Browse files
Show More
@@ -1,211 +1,214 b''
1 # parser.py - simple top-down operator precedence parser for mercurial
1 # parser.py - simple top-down operator precedence parser for mercurial
2 #
2 #
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 # see http://effbot.org/zone/simple-top-down-parsing.htm and
8 # see http://effbot.org/zone/simple-top-down-parsing.htm and
9 # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
9 # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
10 # for background
10 # for background
11
11
12 # takes a tokenizer and elements
12 # takes a tokenizer and elements
13 # tokenizer is an iterator that returns (type, value, pos) tuples
13 # tokenizer is an iterator that returns (type, value, pos) tuples
14 # elements is a mapping of types to binding strength, prefix, infix and
14 # elements is a mapping of types to binding strength, prefix, infix and
15 # suffix actions
15 # suffix actions
16 # an action is a tree node name, a tree label, and an optional match
16 # an action is a tree node name, a tree label, and an optional match
17 # __call__(program) parses program into a labeled tree
17 # __call__(program) parses program into a labeled tree
18
18
19 import error
19 import error
20 from i18n import _
20 from i18n import _
21
21
22 class parser(object):
22 class parser(object):
23 def __init__(self, elements, methods=None):
23 def __init__(self, elements, methods=None):
24 self._elements = elements
24 self._elements = elements
25 self._methods = methods
25 self._methods = methods
26 self.current = None
26 self.current = None
27 def _advance(self):
27 def _advance(self):
28 'advance the tokenizer'
28 'advance the tokenizer'
29 t = self.current
29 t = self.current
30 self.current = next(self._iter, None)
30 self.current = next(self._iter, None)
31 return t
31 return t
32 def _hasnewterm(self):
33 'True if next token may start new term'
34 return bool(self._elements[self.current[0]][1])
32 def _match(self, m):
35 def _match(self, m):
33 'make sure the tokenizer matches an end condition'
36 'make sure the tokenizer matches an end condition'
34 if self.current[0] != m:
37 if self.current[0] != m:
35 raise error.ParseError(_("unexpected token: %s") % self.current[0],
38 raise error.ParseError(_("unexpected token: %s") % self.current[0],
36 self.current[2])
39 self.current[2])
37 self._advance()
40 self._advance()
38 def _parseoperand(self, bind, m=None):
41 def _parseoperand(self, bind, m=None):
39 'gather right-hand-side operand until an end condition or binding met'
42 'gather right-hand-side operand until an end condition or binding met'
40 if m and self.current[0] == m:
43 if m and self.current[0] == m:
41 expr = None
44 expr = None
42 else:
45 else:
43 expr = self._parse(bind)
46 expr = self._parse(bind)
44 if m:
47 if m:
45 self._match(m)
48 self._match(m)
46 return expr
49 return expr
47 def _parse(self, bind=0):
50 def _parse(self, bind=0):
48 token, value, pos = self._advance()
51 token, value, pos = self._advance()
49 # handle prefix rules on current token
52 # handle prefix rules on current token
50 prefix = self._elements[token][1]
53 prefix = self._elements[token][1]
51 if not prefix:
54 if not prefix:
52 raise error.ParseError(_("not a prefix: %s") % token, pos)
55 raise error.ParseError(_("not a prefix: %s") % token, pos)
53 if len(prefix) == 1:
56 if len(prefix) == 1:
54 expr = (prefix[0], value)
57 expr = (prefix[0], value)
55 else:
58 else:
56 expr = (prefix[0], self._parseoperand(*prefix[1:]))
59 expr = (prefix[0], self._parseoperand(*prefix[1:]))
57 # gather tokens until we meet a lower binding strength
60 # gather tokens until we meet a lower binding strength
58 while bind < self._elements[self.current[0]][0]:
61 while bind < self._elements[self.current[0]][0]:
59 token, value, pos = self._advance()
62 token, value, pos = self._advance()
60 infix, suffix = self._elements[token][2:]
63 infix, suffix = self._elements[token][2:]
61 # check for suffix - next token isn't a valid prefix
64 # check for suffix - next token isn't a valid prefix
62 if suffix and not self._elements[self.current[0]][1]:
65 if suffix and not self._hasnewterm():
63 expr = (suffix[0], expr)
66 expr = (suffix[0], expr)
64 else:
67 else:
65 # handle infix rules
68 # handle infix rules
66 if not infix:
69 if not infix:
67 raise error.ParseError(_("not an infix: %s") % token, pos)
70 raise error.ParseError(_("not an infix: %s") % token, pos)
68 expr = (infix[0], expr, self._parseoperand(*infix[1:]))
71 expr = (infix[0], expr, self._parseoperand(*infix[1:]))
69 return expr
72 return expr
70 def parse(self, tokeniter):
73 def parse(self, tokeniter):
71 'generate a parse tree from tokens'
74 'generate a parse tree from tokens'
72 self._iter = tokeniter
75 self._iter = tokeniter
73 self._advance()
76 self._advance()
74 res = self._parse()
77 res = self._parse()
75 token, value, pos = self.current
78 token, value, pos = self.current
76 return res, pos
79 return res, pos
77 def eval(self, tree):
80 def eval(self, tree):
78 'recursively evaluate a parse tree using node methods'
81 'recursively evaluate a parse tree using node methods'
79 if not isinstance(tree, tuple):
82 if not isinstance(tree, tuple):
80 return tree
83 return tree
81 return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
84 return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
82 def __call__(self, tokeniter):
85 def __call__(self, tokeniter):
83 'parse tokens into a parse tree and evaluate if methods given'
86 'parse tokens into a parse tree and evaluate if methods given'
84 t = self.parse(tokeniter)
87 t = self.parse(tokeniter)
85 if self._methods:
88 if self._methods:
86 return self.eval(t)
89 return self.eval(t)
87 return t
90 return t
88
91
89 def buildargsdict(trees, funcname, keys, keyvaluenode, keynode):
92 def buildargsdict(trees, funcname, keys, keyvaluenode, keynode):
90 """Build dict from list containing positional and keyword arguments
93 """Build dict from list containing positional and keyword arguments
91
94
92 Invalid keywords or too many positional arguments are rejected, but
95 Invalid keywords or too many positional arguments are rejected, but
93 missing arguments are just omitted.
96 missing arguments are just omitted.
94 """
97 """
95 if len(trees) > len(keys):
98 if len(trees) > len(keys):
96 raise error.ParseError(_("%(func)s takes at most %(nargs)d arguments")
99 raise error.ParseError(_("%(func)s takes at most %(nargs)d arguments")
97 % {'func': funcname, 'nargs': len(keys)})
100 % {'func': funcname, 'nargs': len(keys)})
98 args = {}
101 args = {}
99 # consume positional arguments
102 # consume positional arguments
100 for k, x in zip(keys, trees):
103 for k, x in zip(keys, trees):
101 if x[0] == keyvaluenode:
104 if x[0] == keyvaluenode:
102 break
105 break
103 args[k] = x
106 args[k] = x
104 # remainder should be keyword arguments
107 # remainder should be keyword arguments
105 for x in trees[len(args):]:
108 for x in trees[len(args):]:
106 if x[0] != keyvaluenode or x[1][0] != keynode:
109 if x[0] != keyvaluenode or x[1][0] != keynode:
107 raise error.ParseError(_("%(func)s got an invalid argument")
110 raise error.ParseError(_("%(func)s got an invalid argument")
108 % {'func': funcname})
111 % {'func': funcname})
109 k = x[1][1]
112 k = x[1][1]
110 if k not in keys:
113 if k not in keys:
111 raise error.ParseError(_("%(func)s got an unexpected keyword "
114 raise error.ParseError(_("%(func)s got an unexpected keyword "
112 "argument '%(key)s'")
115 "argument '%(key)s'")
113 % {'func': funcname, 'key': k})
116 % {'func': funcname, 'key': k})
114 if k in args:
117 if k in args:
115 raise error.ParseError(_("%(func)s got multiple values for keyword "
118 raise error.ParseError(_("%(func)s got multiple values for keyword "
116 "argument '%(key)s'")
119 "argument '%(key)s'")
117 % {'func': funcname, 'key': k})
120 % {'func': funcname, 'key': k})
118 args[k] = x[2]
121 args[k] = x[2]
119 return args
122 return args
120
123
121 def _prettyformat(tree, leafnodes, level, lines):
124 def _prettyformat(tree, leafnodes, level, lines):
122 if not isinstance(tree, tuple) or tree[0] in leafnodes:
125 if not isinstance(tree, tuple) or tree[0] in leafnodes:
123 lines.append((level, str(tree)))
126 lines.append((level, str(tree)))
124 else:
127 else:
125 lines.append((level, '(%s' % tree[0]))
128 lines.append((level, '(%s' % tree[0]))
126 for s in tree[1:]:
129 for s in tree[1:]:
127 _prettyformat(s, leafnodes, level + 1, lines)
130 _prettyformat(s, leafnodes, level + 1, lines)
128 lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')]
131 lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')]
129
132
130 def prettyformat(tree, leafnodes):
133 def prettyformat(tree, leafnodes):
131 lines = []
134 lines = []
132 _prettyformat(tree, leafnodes, 0, lines)
135 _prettyformat(tree, leafnodes, 0, lines)
133 output = '\n'.join((' ' * l + s) for l, s in lines)
136 output = '\n'.join((' ' * l + s) for l, s in lines)
134 return output
137 return output
135
138
136 def simplifyinfixops(tree, targetnodes):
139 def simplifyinfixops(tree, targetnodes):
137 """Flatten chained infix operations to reduce usage of Python stack
140 """Flatten chained infix operations to reduce usage of Python stack
138
141
139 >>> def f(tree):
142 >>> def f(tree):
140 ... print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',))
143 ... print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',))
141 >>> f(('or',
144 >>> f(('or',
142 ... ('or',
145 ... ('or',
143 ... ('symbol', '1'),
146 ... ('symbol', '1'),
144 ... ('symbol', '2')),
147 ... ('symbol', '2')),
145 ... ('symbol', '3')))
148 ... ('symbol', '3')))
146 (or
149 (or
147 ('symbol', '1')
150 ('symbol', '1')
148 ('symbol', '2')
151 ('symbol', '2')
149 ('symbol', '3'))
152 ('symbol', '3'))
150 >>> f(('func',
153 >>> f(('func',
151 ... ('symbol', 'p1'),
154 ... ('symbol', 'p1'),
152 ... ('or',
155 ... ('or',
153 ... ('or',
156 ... ('or',
154 ... ('func',
157 ... ('func',
155 ... ('symbol', 'sort'),
158 ... ('symbol', 'sort'),
156 ... ('list',
159 ... ('list',
157 ... ('or',
160 ... ('or',
158 ... ('or',
161 ... ('or',
159 ... ('symbol', '1'),
162 ... ('symbol', '1'),
160 ... ('symbol', '2')),
163 ... ('symbol', '2')),
161 ... ('symbol', '3')),
164 ... ('symbol', '3')),
162 ... ('negate',
165 ... ('negate',
163 ... ('symbol', 'rev')))),
166 ... ('symbol', 'rev')))),
164 ... ('and',
167 ... ('and',
165 ... ('symbol', '4'),
168 ... ('symbol', '4'),
166 ... ('group',
169 ... ('group',
167 ... ('or',
170 ... ('or',
168 ... ('or',
171 ... ('or',
169 ... ('symbol', '5'),
172 ... ('symbol', '5'),
170 ... ('symbol', '6')),
173 ... ('symbol', '6')),
171 ... ('symbol', '7'))))),
174 ... ('symbol', '7'))))),
172 ... ('symbol', '8'))))
175 ... ('symbol', '8'))))
173 (func
176 (func
174 ('symbol', 'p1')
177 ('symbol', 'p1')
175 (or
178 (or
176 (func
179 (func
177 ('symbol', 'sort')
180 ('symbol', 'sort')
178 (list
181 (list
179 (or
182 (or
180 ('symbol', '1')
183 ('symbol', '1')
181 ('symbol', '2')
184 ('symbol', '2')
182 ('symbol', '3'))
185 ('symbol', '3'))
183 (negate
186 (negate
184 ('symbol', 'rev'))))
187 ('symbol', 'rev'))))
185 (and
188 (and
186 ('symbol', '4')
189 ('symbol', '4')
187 (group
190 (group
188 (or
191 (or
189 ('symbol', '5')
192 ('symbol', '5')
190 ('symbol', '6')
193 ('symbol', '6')
191 ('symbol', '7'))))
194 ('symbol', '7'))))
192 ('symbol', '8')))
195 ('symbol', '8')))
193 """
196 """
194 if not isinstance(tree, tuple):
197 if not isinstance(tree, tuple):
195 return tree
198 return tree
196 op = tree[0]
199 op = tree[0]
197 if op not in targetnodes:
200 if op not in targetnodes:
198 return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:])
201 return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:])
199
202
200 # walk down left nodes taking each right node. no recursion to left nodes
203 # walk down left nodes taking each right node. no recursion to left nodes
201 # because infix operators are left-associative, i.e. left tree is deep.
204 # because infix operators are left-associative, i.e. left tree is deep.
202 # e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3)
205 # e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3)
203 simplified = []
206 simplified = []
204 x = tree
207 x = tree
205 while x[0] == op:
208 while x[0] == op:
206 l, r = x[1:]
209 l, r = x[1:]
207 simplified.append(simplifyinfixops(r, targetnodes))
210 simplified.append(simplifyinfixops(r, targetnodes))
208 x = l
211 x = l
209 simplified.append(simplifyinfixops(x, targetnodes))
212 simplified.append(simplifyinfixops(x, targetnodes))
210 simplified.append(op)
213 simplified.append(op)
211 return tuple(reversed(simplified))
214 return tuple(reversed(simplified))
General Comments 0
You need to be logged in to leave comments. Login now