Show More
@@ -1,213 +1,213 b'' | |||||
1 | # parser.py - simple top-down operator precedence parser for mercurial |
|
1 | # parser.py - simple top-down operator precedence parser for mercurial | |
2 | # |
|
2 | # | |
3 | # Copyright 2010 Matt Mackall <mpm@selenic.com> |
|
3 | # Copyright 2010 Matt Mackall <mpm@selenic.com> | |
4 | # |
|
4 | # | |
5 | # This software may be used and distributed according to the terms of the |
|
5 | # This software may be used and distributed according to the terms of the | |
6 | # GNU General Public License version 2 or any later version. |
|
6 | # GNU General Public License version 2 or any later version. | |
7 |
|
7 | |||
8 | # see http://effbot.org/zone/simple-top-down-parsing.htm and |
|
8 | # see http://effbot.org/zone/simple-top-down-parsing.htm and | |
9 | # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/ |
|
9 | # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/ | |
10 | # for background |
|
10 | # for background | |
11 |
|
11 | |||
12 | # takes a tokenizer and elements |
|
12 | # takes a tokenizer and elements | |
13 | # tokenizer is an iterator that returns (type, value, pos) tuples |
|
13 | # tokenizer is an iterator that returns (type, value, pos) tuples | |
14 | # elements is a mapping of types to binding strength, primary, prefix, infix |
|
14 | # elements is a mapping of types to binding strength, primary, prefix, infix | |
15 | # and suffix actions |
|
15 | # and suffix actions | |
16 | # an action is a tree node name, a tree label, and an optional match |
|
16 | # an action is a tree node name, a tree label, and an optional match | |
17 | # __call__(program) parses program into a labeled tree |
|
17 | # __call__(program) parses program into a labeled tree | |
18 |
|
18 | |||
19 | import error |
|
19 | import error | |
20 | from i18n import _ |
|
20 | from i18n import _ | |
21 |
|
21 | |||
22 | class parser(object): |
|
22 | class parser(object): | |
23 | def __init__(self, elements, methods=None): |
|
23 | def __init__(self, elements, methods=None): | |
24 | self._elements = elements |
|
24 | self._elements = elements | |
25 | self._methods = methods |
|
25 | self._methods = methods | |
26 | self.current = None |
|
26 | self.current = None | |
27 | def _advance(self): |
|
27 | def _advance(self): | |
28 | 'advance the tokenizer' |
|
28 | 'advance the tokenizer' | |
29 | t = self.current |
|
29 | t = self.current | |
30 | self.current = next(self._iter, None) |
|
30 | self.current = next(self._iter, None) | |
31 | return t |
|
31 | return t | |
32 | def _hasnewterm(self): |
|
32 | def _hasnewterm(self): | |
33 | 'True if next token may start new term' |
|
33 | 'True if next token may start new term' | |
34 | return any(self._elements[self.current[0]][1:3]) |
|
34 | return any(self._elements[self.current[0]][1:3]) | |
35 | def _match(self, m): |
|
35 | def _match(self, m): | |
36 | 'make sure the tokenizer matches an end condition' |
|
36 | 'make sure the tokenizer matches an end condition' | |
37 | if self.current[0] != m: |
|
37 | if self.current[0] != m: | |
38 | raise error.ParseError(_("unexpected token: %s") % self.current[0], |
|
38 | raise error.ParseError(_("unexpected token: %s") % self.current[0], | |
39 | self.current[2]) |
|
39 | self.current[2]) | |
40 | self._advance() |
|
40 | self._advance() | |
41 | def _parseoperand(self, bind, m=None): |
|
41 | def _parseoperand(self, bind, m=None): | |
42 | 'gather right-hand-side operand until an end condition or binding met' |
|
42 | 'gather right-hand-side operand until an end condition or binding met' | |
43 | if m and self.current[0] == m: |
|
43 | if m and self.current[0] == m: | |
44 | expr = None |
|
44 | expr = None | |
45 | else: |
|
45 | else: | |
46 | expr = self._parse(bind) |
|
46 | expr = self._parse(bind) | |
47 | if m: |
|
47 | if m: | |
48 | self._match(m) |
|
48 | self._match(m) | |
49 | return expr |
|
49 | return expr | |
50 | def _parse(self, bind=0): |
|
50 | def _parse(self, bind=0): | |
51 | token, value, pos = self._advance() |
|
51 | token, value, pos = self._advance() | |
52 | # handle prefix rules on current token, take as primary if unambiguous |
|
52 | # handle prefix rules on current token, take as primary if unambiguous | |
53 | primary, prefix = self._elements[token][1:3] |
|
53 | primary, prefix = self._elements[token][1:3] | |
54 | if primary and not (prefix and self._hasnewterm()): |
|
54 | if primary and not (prefix and self._hasnewterm()): | |
55 | expr = (primary, value) |
|
55 | expr = (primary, value) | |
56 | elif prefix: |
|
56 | elif prefix: | |
57 | expr = (prefix[0], self._parseoperand(*prefix[1:])) |
|
57 | expr = (prefix[0], self._parseoperand(*prefix[1:])) | |
58 | else: |
|
58 | else: | |
59 | raise error.ParseError(_("not a prefix: %s") % token, pos) |
|
59 | raise error.ParseError(_("not a prefix: %s") % token, pos) | |
60 | # gather tokens until we meet a lower binding strength |
|
60 | # gather tokens until we meet a lower binding strength | |
61 | while bind < self._elements[self.current[0]][0]: |
|
61 | while bind < self._elements[self.current[0]][0]: | |
62 | token, value, pos = self._advance() |
|
62 | token, value, pos = self._advance() | |
63 | # handle infix rules, take as suffix if unambiguous |
|
63 | # handle infix rules, take as suffix if unambiguous | |
64 | infix, suffix = self._elements[token][3:] |
|
64 | infix, suffix = self._elements[token][3:] | |
65 | if suffix and not self._hasnewterm(): |
|
65 | if suffix and not (infix and self._hasnewterm()): | |
66 | expr = (suffix[0], expr) |
|
66 | expr = (suffix[0], expr) | |
67 | elif infix: |
|
67 | elif infix: | |
68 | expr = (infix[0], expr, self._parseoperand(*infix[1:])) |
|
68 | expr = (infix[0], expr, self._parseoperand(*infix[1:])) | |
69 | else: |
|
69 | else: | |
70 | raise error.ParseError(_("not an infix: %s") % token, pos) |
|
70 | raise error.ParseError(_("not an infix: %s") % token, pos) | |
71 | return expr |
|
71 | return expr | |
72 | def parse(self, tokeniter): |
|
72 | def parse(self, tokeniter): | |
73 | 'generate a parse tree from tokens' |
|
73 | 'generate a parse tree from tokens' | |
74 | self._iter = tokeniter |
|
74 | self._iter = tokeniter | |
75 | self._advance() |
|
75 | self._advance() | |
76 | res = self._parse() |
|
76 | res = self._parse() | |
77 | token, value, pos = self.current |
|
77 | token, value, pos = self.current | |
78 | return res, pos |
|
78 | return res, pos | |
79 | def eval(self, tree): |
|
79 | def eval(self, tree): | |
80 | 'recursively evaluate a parse tree using node methods' |
|
80 | 'recursively evaluate a parse tree using node methods' | |
81 | if not isinstance(tree, tuple): |
|
81 | if not isinstance(tree, tuple): | |
82 | return tree |
|
82 | return tree | |
83 | return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]]) |
|
83 | return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]]) | |
84 | def __call__(self, tokeniter): |
|
84 | def __call__(self, tokeniter): | |
85 | 'parse tokens into a parse tree and evaluate if methods given' |
|
85 | 'parse tokens into a parse tree and evaluate if methods given' | |
86 | t = self.parse(tokeniter) |
|
86 | t = self.parse(tokeniter) | |
87 | if self._methods: |
|
87 | if self._methods: | |
88 | return self.eval(t) |
|
88 | return self.eval(t) | |
89 | return t |
|
89 | return t | |
90 |
|
90 | |||
91 | def buildargsdict(trees, funcname, keys, keyvaluenode, keynode): |
|
91 | def buildargsdict(trees, funcname, keys, keyvaluenode, keynode): | |
92 | """Build dict from list containing positional and keyword arguments |
|
92 | """Build dict from list containing positional and keyword arguments | |
93 |
|
93 | |||
94 | Invalid keywords or too many positional arguments are rejected, but |
|
94 | Invalid keywords or too many positional arguments are rejected, but | |
95 | missing arguments are just omitted. |
|
95 | missing arguments are just omitted. | |
96 | """ |
|
96 | """ | |
97 | if len(trees) > len(keys): |
|
97 | if len(trees) > len(keys): | |
98 | raise error.ParseError(_("%(func)s takes at most %(nargs)d arguments") |
|
98 | raise error.ParseError(_("%(func)s takes at most %(nargs)d arguments") | |
99 | % {'func': funcname, 'nargs': len(keys)}) |
|
99 | % {'func': funcname, 'nargs': len(keys)}) | |
100 | args = {} |
|
100 | args = {} | |
101 | # consume positional arguments |
|
101 | # consume positional arguments | |
102 | for k, x in zip(keys, trees): |
|
102 | for k, x in zip(keys, trees): | |
103 | if x[0] == keyvaluenode: |
|
103 | if x[0] == keyvaluenode: | |
104 | break |
|
104 | break | |
105 | args[k] = x |
|
105 | args[k] = x | |
106 | # remainder should be keyword arguments |
|
106 | # remainder should be keyword arguments | |
107 | for x in trees[len(args):]: |
|
107 | for x in trees[len(args):]: | |
108 | if x[0] != keyvaluenode or x[1][0] != keynode: |
|
108 | if x[0] != keyvaluenode or x[1][0] != keynode: | |
109 | raise error.ParseError(_("%(func)s got an invalid argument") |
|
109 | raise error.ParseError(_("%(func)s got an invalid argument") | |
110 | % {'func': funcname}) |
|
110 | % {'func': funcname}) | |
111 | k = x[1][1] |
|
111 | k = x[1][1] | |
112 | if k not in keys: |
|
112 | if k not in keys: | |
113 | raise error.ParseError(_("%(func)s got an unexpected keyword " |
|
113 | raise error.ParseError(_("%(func)s got an unexpected keyword " | |
114 | "argument '%(key)s'") |
|
114 | "argument '%(key)s'") | |
115 | % {'func': funcname, 'key': k}) |
|
115 | % {'func': funcname, 'key': k}) | |
116 | if k in args: |
|
116 | if k in args: | |
117 | raise error.ParseError(_("%(func)s got multiple values for keyword " |
|
117 | raise error.ParseError(_("%(func)s got multiple values for keyword " | |
118 | "argument '%(key)s'") |
|
118 | "argument '%(key)s'") | |
119 | % {'func': funcname, 'key': k}) |
|
119 | % {'func': funcname, 'key': k}) | |
120 | args[k] = x[2] |
|
120 | args[k] = x[2] | |
121 | return args |
|
121 | return args | |
122 |
|
122 | |||
123 | def _prettyformat(tree, leafnodes, level, lines): |
|
123 | def _prettyformat(tree, leafnodes, level, lines): | |
124 | if not isinstance(tree, tuple) or tree[0] in leafnodes: |
|
124 | if not isinstance(tree, tuple) or tree[0] in leafnodes: | |
125 | lines.append((level, str(tree))) |
|
125 | lines.append((level, str(tree))) | |
126 | else: |
|
126 | else: | |
127 | lines.append((level, '(%s' % tree[0])) |
|
127 | lines.append((level, '(%s' % tree[0])) | |
128 | for s in tree[1:]: |
|
128 | for s in tree[1:]: | |
129 | _prettyformat(s, leafnodes, level + 1, lines) |
|
129 | _prettyformat(s, leafnodes, level + 1, lines) | |
130 | lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')] |
|
130 | lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')] | |
131 |
|
131 | |||
132 | def prettyformat(tree, leafnodes): |
|
132 | def prettyformat(tree, leafnodes): | |
133 | lines = [] |
|
133 | lines = [] | |
134 | _prettyformat(tree, leafnodes, 0, lines) |
|
134 | _prettyformat(tree, leafnodes, 0, lines) | |
135 | output = '\n'.join((' ' * l + s) for l, s in lines) |
|
135 | output = '\n'.join((' ' * l + s) for l, s in lines) | |
136 | return output |
|
136 | return output | |
137 |
|
137 | |||
138 | def simplifyinfixops(tree, targetnodes): |
|
138 | def simplifyinfixops(tree, targetnodes): | |
139 | """Flatten chained infix operations to reduce usage of Python stack |
|
139 | """Flatten chained infix operations to reduce usage of Python stack | |
140 |
|
140 | |||
141 | >>> def f(tree): |
|
141 | >>> def f(tree): | |
142 | ... print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',)) |
|
142 | ... print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',)) | |
143 | >>> f(('or', |
|
143 | >>> f(('or', | |
144 | ... ('or', |
|
144 | ... ('or', | |
145 | ... ('symbol', '1'), |
|
145 | ... ('symbol', '1'), | |
146 | ... ('symbol', '2')), |
|
146 | ... ('symbol', '2')), | |
147 | ... ('symbol', '3'))) |
|
147 | ... ('symbol', '3'))) | |
148 | (or |
|
148 | (or | |
149 | ('symbol', '1') |
|
149 | ('symbol', '1') | |
150 | ('symbol', '2') |
|
150 | ('symbol', '2') | |
151 | ('symbol', '3')) |
|
151 | ('symbol', '3')) | |
152 | >>> f(('func', |
|
152 | >>> f(('func', | |
153 | ... ('symbol', 'p1'), |
|
153 | ... ('symbol', 'p1'), | |
154 | ... ('or', |
|
154 | ... ('or', | |
155 | ... ('or', |
|
155 | ... ('or', | |
156 | ... ('func', |
|
156 | ... ('func', | |
157 | ... ('symbol', 'sort'), |
|
157 | ... ('symbol', 'sort'), | |
158 | ... ('list', |
|
158 | ... ('list', | |
159 | ... ('or', |
|
159 | ... ('or', | |
160 | ... ('or', |
|
160 | ... ('or', | |
161 | ... ('symbol', '1'), |
|
161 | ... ('symbol', '1'), | |
162 | ... ('symbol', '2')), |
|
162 | ... ('symbol', '2')), | |
163 | ... ('symbol', '3')), |
|
163 | ... ('symbol', '3')), | |
164 | ... ('negate', |
|
164 | ... ('negate', | |
165 | ... ('symbol', 'rev')))), |
|
165 | ... ('symbol', 'rev')))), | |
166 | ... ('and', |
|
166 | ... ('and', | |
167 | ... ('symbol', '4'), |
|
167 | ... ('symbol', '4'), | |
168 | ... ('group', |
|
168 | ... ('group', | |
169 | ... ('or', |
|
169 | ... ('or', | |
170 | ... ('or', |
|
170 | ... ('or', | |
171 | ... ('symbol', '5'), |
|
171 | ... ('symbol', '5'), | |
172 | ... ('symbol', '6')), |
|
172 | ... ('symbol', '6')), | |
173 | ... ('symbol', '7'))))), |
|
173 | ... ('symbol', '7'))))), | |
174 | ... ('symbol', '8')))) |
|
174 | ... ('symbol', '8')))) | |
175 | (func |
|
175 | (func | |
176 | ('symbol', 'p1') |
|
176 | ('symbol', 'p1') | |
177 | (or |
|
177 | (or | |
178 | (func |
|
178 | (func | |
179 | ('symbol', 'sort') |
|
179 | ('symbol', 'sort') | |
180 | (list |
|
180 | (list | |
181 | (or |
|
181 | (or | |
182 | ('symbol', '1') |
|
182 | ('symbol', '1') | |
183 | ('symbol', '2') |
|
183 | ('symbol', '2') | |
184 | ('symbol', '3')) |
|
184 | ('symbol', '3')) | |
185 | (negate |
|
185 | (negate | |
186 | ('symbol', 'rev')))) |
|
186 | ('symbol', 'rev')))) | |
187 | (and |
|
187 | (and | |
188 | ('symbol', '4') |
|
188 | ('symbol', '4') | |
189 | (group |
|
189 | (group | |
190 | (or |
|
190 | (or | |
191 | ('symbol', '5') |
|
191 | ('symbol', '5') | |
192 | ('symbol', '6') |
|
192 | ('symbol', '6') | |
193 | ('symbol', '7')))) |
|
193 | ('symbol', '7')))) | |
194 | ('symbol', '8'))) |
|
194 | ('symbol', '8'))) | |
195 | """ |
|
195 | """ | |
196 | if not isinstance(tree, tuple): |
|
196 | if not isinstance(tree, tuple): | |
197 | return tree |
|
197 | return tree | |
198 | op = tree[0] |
|
198 | op = tree[0] | |
199 | if op not in targetnodes: |
|
199 | if op not in targetnodes: | |
200 | return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:]) |
|
200 | return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:]) | |
201 |
|
201 | |||
202 | # walk down left nodes taking each right node. no recursion to left nodes |
|
202 | # walk down left nodes taking each right node. no recursion to left nodes | |
203 | # because infix operators are left-associative, i.e. left tree is deep. |
|
203 | # because infix operators are left-associative, i.e. left tree is deep. | |
204 | # e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3) |
|
204 | # e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3) | |
205 | simplified = [] |
|
205 | simplified = [] | |
206 | x = tree |
|
206 | x = tree | |
207 | while x[0] == op: |
|
207 | while x[0] == op: | |
208 | l, r = x[1:] |
|
208 | l, r = x[1:] | |
209 | simplified.append(simplifyinfixops(r, targetnodes)) |
|
209 | simplified.append(simplifyinfixops(r, targetnodes)) | |
210 | x = l |
|
210 | x = l | |
211 | simplified.append(simplifyinfixops(x, targetnodes)) |
|
211 | simplified.append(simplifyinfixops(x, targetnodes)) | |
212 | simplified.append(op) |
|
212 | simplified.append(op) | |
213 | return tuple(reversed(simplified)) |
|
213 | return tuple(reversed(simplified)) |
General Comments 0
You need to be logged in to leave comments.
Login now