##// END OF EJS Templates
parser: use absolute_import
Gregory Szorc -
r25963:7448df70 default
parent child Browse files
Show More
@@ -1,213 +1,215 b''
1 # parser.py - simple top-down operator precedence parser for mercurial
1 # parser.py - simple top-down operator precedence parser for mercurial
2 #
2 #
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 # see http://effbot.org/zone/simple-top-down-parsing.htm and
8 # see http://effbot.org/zone/simple-top-down-parsing.htm and
9 # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
9 # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
10 # for background
10 # for background
11
11
12 # takes a tokenizer and elements
12 # takes a tokenizer and elements
13 # tokenizer is an iterator that returns (type, value, pos) tuples
13 # tokenizer is an iterator that returns (type, value, pos) tuples
14 # elements is a mapping of types to binding strength, primary, prefix, infix
14 # elements is a mapping of types to binding strength, primary, prefix, infix
15 # and suffix actions
15 # and suffix actions
16 # an action is a tree node name, a tree label, and an optional match
16 # an action is a tree node name, a tree label, and an optional match
17 # __call__(program) parses program into a labeled tree
17 # __call__(program) parses program into a labeled tree
18
18
19 import error
19 from __future__ import absolute_import
20 from i18n import _
20
21 from .i18n import _
22 from . import error
21
23
22 class parser(object):
24 class parser(object):
23 def __init__(self, elements, methods=None):
25 def __init__(self, elements, methods=None):
24 self._elements = elements
26 self._elements = elements
25 self._methods = methods
27 self._methods = methods
26 self.current = None
28 self.current = None
27 def _advance(self):
29 def _advance(self):
28 'advance the tokenizer'
30 'advance the tokenizer'
29 t = self.current
31 t = self.current
30 self.current = next(self._iter, None)
32 self.current = next(self._iter, None)
31 return t
33 return t
32 def _hasnewterm(self):
34 def _hasnewterm(self):
33 'True if next token may start new term'
35 'True if next token may start new term'
34 return any(self._elements[self.current[0]][1:3])
36 return any(self._elements[self.current[0]][1:3])
35 def _match(self, m):
37 def _match(self, m):
36 'make sure the tokenizer matches an end condition'
38 'make sure the tokenizer matches an end condition'
37 if self.current[0] != m:
39 if self.current[0] != m:
38 raise error.ParseError(_("unexpected token: %s") % self.current[0],
40 raise error.ParseError(_("unexpected token: %s") % self.current[0],
39 self.current[2])
41 self.current[2])
40 self._advance()
42 self._advance()
41 def _parseoperand(self, bind, m=None):
43 def _parseoperand(self, bind, m=None):
42 'gather right-hand-side operand until an end condition or binding met'
44 'gather right-hand-side operand until an end condition or binding met'
43 if m and self.current[0] == m:
45 if m and self.current[0] == m:
44 expr = None
46 expr = None
45 else:
47 else:
46 expr = self._parse(bind)
48 expr = self._parse(bind)
47 if m:
49 if m:
48 self._match(m)
50 self._match(m)
49 return expr
51 return expr
50 def _parse(self, bind=0):
52 def _parse(self, bind=0):
51 token, value, pos = self._advance()
53 token, value, pos = self._advance()
52 # handle prefix rules on current token, take as primary if unambiguous
54 # handle prefix rules on current token, take as primary if unambiguous
53 primary, prefix = self._elements[token][1:3]
55 primary, prefix = self._elements[token][1:3]
54 if primary and not (prefix and self._hasnewterm()):
56 if primary and not (prefix and self._hasnewterm()):
55 expr = (primary, value)
57 expr = (primary, value)
56 elif prefix:
58 elif prefix:
57 expr = (prefix[0], self._parseoperand(*prefix[1:]))
59 expr = (prefix[0], self._parseoperand(*prefix[1:]))
58 else:
60 else:
59 raise error.ParseError(_("not a prefix: %s") % token, pos)
61 raise error.ParseError(_("not a prefix: %s") % token, pos)
60 # gather tokens until we meet a lower binding strength
62 # gather tokens until we meet a lower binding strength
61 while bind < self._elements[self.current[0]][0]:
63 while bind < self._elements[self.current[0]][0]:
62 token, value, pos = self._advance()
64 token, value, pos = self._advance()
63 # handle infix rules, take as suffix if unambiguous
65 # handle infix rules, take as suffix if unambiguous
64 infix, suffix = self._elements[token][3:]
66 infix, suffix = self._elements[token][3:]
65 if suffix and not (infix and self._hasnewterm()):
67 if suffix and not (infix and self._hasnewterm()):
66 expr = (suffix[0], expr)
68 expr = (suffix[0], expr)
67 elif infix:
69 elif infix:
68 expr = (infix[0], expr, self._parseoperand(*infix[1:]))
70 expr = (infix[0], expr, self._parseoperand(*infix[1:]))
69 else:
71 else:
70 raise error.ParseError(_("not an infix: %s") % token, pos)
72 raise error.ParseError(_("not an infix: %s") % token, pos)
71 return expr
73 return expr
72 def parse(self, tokeniter):
74 def parse(self, tokeniter):
73 'generate a parse tree from tokens'
75 'generate a parse tree from tokens'
74 self._iter = tokeniter
76 self._iter = tokeniter
75 self._advance()
77 self._advance()
76 res = self._parse()
78 res = self._parse()
77 token, value, pos = self.current
79 token, value, pos = self.current
78 return res, pos
80 return res, pos
79 def eval(self, tree):
81 def eval(self, tree):
80 'recursively evaluate a parse tree using node methods'
82 'recursively evaluate a parse tree using node methods'
81 if not isinstance(tree, tuple):
83 if not isinstance(tree, tuple):
82 return tree
84 return tree
83 return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
85 return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
84 def __call__(self, tokeniter):
86 def __call__(self, tokeniter):
85 'parse tokens into a parse tree and evaluate if methods given'
87 'parse tokens into a parse tree and evaluate if methods given'
86 t = self.parse(tokeniter)
88 t = self.parse(tokeniter)
87 if self._methods:
89 if self._methods:
88 return self.eval(t)
90 return self.eval(t)
89 return t
91 return t
90
92
91 def buildargsdict(trees, funcname, keys, keyvaluenode, keynode):
93 def buildargsdict(trees, funcname, keys, keyvaluenode, keynode):
92 """Build dict from list containing positional and keyword arguments
94 """Build dict from list containing positional and keyword arguments
93
95
94 Invalid keywords or too many positional arguments are rejected, but
96 Invalid keywords or too many positional arguments are rejected, but
95 missing arguments are just omitted.
97 missing arguments are just omitted.
96 """
98 """
97 if len(trees) > len(keys):
99 if len(trees) > len(keys):
98 raise error.ParseError(_("%(func)s takes at most %(nargs)d arguments")
100 raise error.ParseError(_("%(func)s takes at most %(nargs)d arguments")
99 % {'func': funcname, 'nargs': len(keys)})
101 % {'func': funcname, 'nargs': len(keys)})
100 args = {}
102 args = {}
101 # consume positional arguments
103 # consume positional arguments
102 for k, x in zip(keys, trees):
104 for k, x in zip(keys, trees):
103 if x[0] == keyvaluenode:
105 if x[0] == keyvaluenode:
104 break
106 break
105 args[k] = x
107 args[k] = x
106 # remainder should be keyword arguments
108 # remainder should be keyword arguments
107 for x in trees[len(args):]:
109 for x in trees[len(args):]:
108 if x[0] != keyvaluenode or x[1][0] != keynode:
110 if x[0] != keyvaluenode or x[1][0] != keynode:
109 raise error.ParseError(_("%(func)s got an invalid argument")
111 raise error.ParseError(_("%(func)s got an invalid argument")
110 % {'func': funcname})
112 % {'func': funcname})
111 k = x[1][1]
113 k = x[1][1]
112 if k not in keys:
114 if k not in keys:
113 raise error.ParseError(_("%(func)s got an unexpected keyword "
115 raise error.ParseError(_("%(func)s got an unexpected keyword "
114 "argument '%(key)s'")
116 "argument '%(key)s'")
115 % {'func': funcname, 'key': k})
117 % {'func': funcname, 'key': k})
116 if k in args:
118 if k in args:
117 raise error.ParseError(_("%(func)s got multiple values for keyword "
119 raise error.ParseError(_("%(func)s got multiple values for keyword "
118 "argument '%(key)s'")
120 "argument '%(key)s'")
119 % {'func': funcname, 'key': k})
121 % {'func': funcname, 'key': k})
120 args[k] = x[2]
122 args[k] = x[2]
121 return args
123 return args
122
124
123 def _prettyformat(tree, leafnodes, level, lines):
125 def _prettyformat(tree, leafnodes, level, lines):
124 if not isinstance(tree, tuple) or tree[0] in leafnodes:
126 if not isinstance(tree, tuple) or tree[0] in leafnodes:
125 lines.append((level, str(tree)))
127 lines.append((level, str(tree)))
126 else:
128 else:
127 lines.append((level, '(%s' % tree[0]))
129 lines.append((level, '(%s' % tree[0]))
128 for s in tree[1:]:
130 for s in tree[1:]:
129 _prettyformat(s, leafnodes, level + 1, lines)
131 _prettyformat(s, leafnodes, level + 1, lines)
130 lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')]
132 lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')]
131
133
132 def prettyformat(tree, leafnodes):
134 def prettyformat(tree, leafnodes):
133 lines = []
135 lines = []
134 _prettyformat(tree, leafnodes, 0, lines)
136 _prettyformat(tree, leafnodes, 0, lines)
135 output = '\n'.join((' ' * l + s) for l, s in lines)
137 output = '\n'.join((' ' * l + s) for l, s in lines)
136 return output
138 return output
137
139
138 def simplifyinfixops(tree, targetnodes):
140 def simplifyinfixops(tree, targetnodes):
139 """Flatten chained infix operations to reduce usage of Python stack
141 """Flatten chained infix operations to reduce usage of Python stack
140
142
141 >>> def f(tree):
143 >>> def f(tree):
142 ... print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',))
144 ... print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',))
143 >>> f(('or',
145 >>> f(('or',
144 ... ('or',
146 ... ('or',
145 ... ('symbol', '1'),
147 ... ('symbol', '1'),
146 ... ('symbol', '2')),
148 ... ('symbol', '2')),
147 ... ('symbol', '3')))
149 ... ('symbol', '3')))
148 (or
150 (or
149 ('symbol', '1')
151 ('symbol', '1')
150 ('symbol', '2')
152 ('symbol', '2')
151 ('symbol', '3'))
153 ('symbol', '3'))
152 >>> f(('func',
154 >>> f(('func',
153 ... ('symbol', 'p1'),
155 ... ('symbol', 'p1'),
154 ... ('or',
156 ... ('or',
155 ... ('or',
157 ... ('or',
156 ... ('func',
158 ... ('func',
157 ... ('symbol', 'sort'),
159 ... ('symbol', 'sort'),
158 ... ('list',
160 ... ('list',
159 ... ('or',
161 ... ('or',
160 ... ('or',
162 ... ('or',
161 ... ('symbol', '1'),
163 ... ('symbol', '1'),
162 ... ('symbol', '2')),
164 ... ('symbol', '2')),
163 ... ('symbol', '3')),
165 ... ('symbol', '3')),
164 ... ('negate',
166 ... ('negate',
165 ... ('symbol', 'rev')))),
167 ... ('symbol', 'rev')))),
166 ... ('and',
168 ... ('and',
167 ... ('symbol', '4'),
169 ... ('symbol', '4'),
168 ... ('group',
170 ... ('group',
169 ... ('or',
171 ... ('or',
170 ... ('or',
172 ... ('or',
171 ... ('symbol', '5'),
173 ... ('symbol', '5'),
172 ... ('symbol', '6')),
174 ... ('symbol', '6')),
173 ... ('symbol', '7'))))),
175 ... ('symbol', '7'))))),
174 ... ('symbol', '8'))))
176 ... ('symbol', '8'))))
175 (func
177 (func
176 ('symbol', 'p1')
178 ('symbol', 'p1')
177 (or
179 (or
178 (func
180 (func
179 ('symbol', 'sort')
181 ('symbol', 'sort')
180 (list
182 (list
181 (or
183 (or
182 ('symbol', '1')
184 ('symbol', '1')
183 ('symbol', '2')
185 ('symbol', '2')
184 ('symbol', '3'))
186 ('symbol', '3'))
185 (negate
187 (negate
186 ('symbol', 'rev'))))
188 ('symbol', 'rev'))))
187 (and
189 (and
188 ('symbol', '4')
190 ('symbol', '4')
189 (group
191 (group
190 (or
192 (or
191 ('symbol', '5')
193 ('symbol', '5')
192 ('symbol', '6')
194 ('symbol', '6')
193 ('symbol', '7'))))
195 ('symbol', '7'))))
194 ('symbol', '8')))
196 ('symbol', '8')))
195 """
197 """
196 if not isinstance(tree, tuple):
198 if not isinstance(tree, tuple):
197 return tree
199 return tree
198 op = tree[0]
200 op = tree[0]
199 if op not in targetnodes:
201 if op not in targetnodes:
200 return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:])
202 return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:])
201
203
202 # walk down left nodes taking each right node. no recursion to left nodes
204 # walk down left nodes taking each right node. no recursion to left nodes
203 # because infix operators are left-associative, i.e. left tree is deep.
205 # because infix operators are left-associative, i.e. left tree is deep.
204 # e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3)
206 # e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3)
205 simplified = []
207 simplified = []
206 x = tree
208 x = tree
207 while x[0] == op:
209 while x[0] == op:
208 l, r = x[1:]
210 l, r = x[1:]
209 simplified.append(simplifyinfixops(r, targetnodes))
211 simplified.append(simplifyinfixops(r, targetnodes))
210 x = l
212 x = l
211 simplified.append(simplifyinfixops(x, targetnodes))
213 simplified.append(simplifyinfixops(x, targetnodes))
212 simplified.append(op)
214 simplified.append(op)
213 return tuple(reversed(simplified))
215 return tuple(reversed(simplified))
General Comments 0
You need to be logged in to leave comments. Login now