Show More
@@ -1,211 +1,214 b'' | |||||
1 | # parser.py - simple top-down operator precedence parser for mercurial |
|
1 | # parser.py - simple top-down operator precedence parser for mercurial | |
2 | # |
|
2 | # | |
3 | # Copyright 2010 Matt Mackall <mpm@selenic.com> |
|
3 | # Copyright 2010 Matt Mackall <mpm@selenic.com> | |
4 | # |
|
4 | # | |
5 | # This software may be used and distributed according to the terms of the |
|
5 | # This software may be used and distributed according to the terms of the | |
6 | # GNU General Public License version 2 or any later version. |
|
6 | # GNU General Public License version 2 or any later version. | |
7 |
|
7 | |||
8 | # see http://effbot.org/zone/simple-top-down-parsing.htm and |
|
8 | # see http://effbot.org/zone/simple-top-down-parsing.htm and | |
9 | # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/ |
|
9 | # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/ | |
10 | # for background |
|
10 | # for background | |
11 |
|
11 | |||
12 | # takes a tokenizer and elements |
|
12 | # takes a tokenizer and elements | |
13 | # tokenizer is an iterator that returns (type, value, pos) tuples |
|
13 | # tokenizer is an iterator that returns (type, value, pos) tuples | |
14 | # elements is a mapping of types to binding strength, prefix, infix and |
|
14 | # elements is a mapping of types to binding strength, prefix, infix and | |
15 | # suffix actions |
|
15 | # suffix actions | |
16 | # an action is a tree node name, a tree label, and an optional match |
|
16 | # an action is a tree node name, a tree label, and an optional match | |
17 | # __call__(program) parses program into a labeled tree |
|
17 | # __call__(program) parses program into a labeled tree | |
18 |
|
18 | |||
19 | import error |
|
19 | import error | |
20 | from i18n import _ |
|
20 | from i18n import _ | |
21 |
|
21 | |||
22 | class parser(object): |
|
22 | class parser(object): | |
23 | def __init__(self, elements, methods=None): |
|
23 | def __init__(self, elements, methods=None): | |
24 | self._elements = elements |
|
24 | self._elements = elements | |
25 | self._methods = methods |
|
25 | self._methods = methods | |
26 | self.current = None |
|
26 | self.current = None | |
27 | def _advance(self): |
|
27 | def _advance(self): | |
28 | 'advance the tokenizer' |
|
28 | 'advance the tokenizer' | |
29 | t = self.current |
|
29 | t = self.current | |
30 | self.current = next(self._iter, None) |
|
30 | self.current = next(self._iter, None) | |
31 | return t |
|
31 | return t | |
|
32 | def _hasnewterm(self): | |||
|
33 | 'True if next token may start new term' | |||
|
34 | return bool(self._elements[self.current[0]][1]) | |||
32 | def _match(self, m): |
|
35 | def _match(self, m): | |
33 | 'make sure the tokenizer matches an end condition' |
|
36 | 'make sure the tokenizer matches an end condition' | |
34 | if self.current[0] != m: |
|
37 | if self.current[0] != m: | |
35 | raise error.ParseError(_("unexpected token: %s") % self.current[0], |
|
38 | raise error.ParseError(_("unexpected token: %s") % self.current[0], | |
36 | self.current[2]) |
|
39 | self.current[2]) | |
37 | self._advance() |
|
40 | self._advance() | |
38 | def _parseoperand(self, bind, m=None): |
|
41 | def _parseoperand(self, bind, m=None): | |
39 | 'gather right-hand-side operand until an end condition or binding met' |
|
42 | 'gather right-hand-side operand until an end condition or binding met' | |
40 | if m and self.current[0] == m: |
|
43 | if m and self.current[0] == m: | |
41 | expr = None |
|
44 | expr = None | |
42 | else: |
|
45 | else: | |
43 | expr = self._parse(bind) |
|
46 | expr = self._parse(bind) | |
44 | if m: |
|
47 | if m: | |
45 | self._match(m) |
|
48 | self._match(m) | |
46 | return expr |
|
49 | return expr | |
47 | def _parse(self, bind=0): |
|
50 | def _parse(self, bind=0): | |
48 | token, value, pos = self._advance() |
|
51 | token, value, pos = self._advance() | |
49 | # handle prefix rules on current token |
|
52 | # handle prefix rules on current token | |
50 | prefix = self._elements[token][1] |
|
53 | prefix = self._elements[token][1] | |
51 | if not prefix: |
|
54 | if not prefix: | |
52 | raise error.ParseError(_("not a prefix: %s") % token, pos) |
|
55 | raise error.ParseError(_("not a prefix: %s") % token, pos) | |
53 | if len(prefix) == 1: |
|
56 | if len(prefix) == 1: | |
54 | expr = (prefix[0], value) |
|
57 | expr = (prefix[0], value) | |
55 | else: |
|
58 | else: | |
56 | expr = (prefix[0], self._parseoperand(*prefix[1:])) |
|
59 | expr = (prefix[0], self._parseoperand(*prefix[1:])) | |
57 | # gather tokens until we meet a lower binding strength |
|
60 | # gather tokens until we meet a lower binding strength | |
58 | while bind < self._elements[self.current[0]][0]: |
|
61 | while bind < self._elements[self.current[0]][0]: | |
59 | token, value, pos = self._advance() |
|
62 | token, value, pos = self._advance() | |
60 | infix, suffix = self._elements[token][2:] |
|
63 | infix, suffix = self._elements[token][2:] | |
61 | # check for suffix - next token isn't a valid prefix |
|
64 | # check for suffix - next token isn't a valid prefix | |
62 |
if suffix and not self._ |
|
65 | if suffix and not self._hasnewterm(): | |
63 | expr = (suffix[0], expr) |
|
66 | expr = (suffix[0], expr) | |
64 | else: |
|
67 | else: | |
65 | # handle infix rules |
|
68 | # handle infix rules | |
66 | if not infix: |
|
69 | if not infix: | |
67 | raise error.ParseError(_("not an infix: %s") % token, pos) |
|
70 | raise error.ParseError(_("not an infix: %s") % token, pos) | |
68 | expr = (infix[0], expr, self._parseoperand(*infix[1:])) |
|
71 | expr = (infix[0], expr, self._parseoperand(*infix[1:])) | |
69 | return expr |
|
72 | return expr | |
70 | def parse(self, tokeniter): |
|
73 | def parse(self, tokeniter): | |
71 | 'generate a parse tree from tokens' |
|
74 | 'generate a parse tree from tokens' | |
72 | self._iter = tokeniter |
|
75 | self._iter = tokeniter | |
73 | self._advance() |
|
76 | self._advance() | |
74 | res = self._parse() |
|
77 | res = self._parse() | |
75 | token, value, pos = self.current |
|
78 | token, value, pos = self.current | |
76 | return res, pos |
|
79 | return res, pos | |
77 | def eval(self, tree): |
|
80 | def eval(self, tree): | |
78 | 'recursively evaluate a parse tree using node methods' |
|
81 | 'recursively evaluate a parse tree using node methods' | |
79 | if not isinstance(tree, tuple): |
|
82 | if not isinstance(tree, tuple): | |
80 | return tree |
|
83 | return tree | |
81 | return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]]) |
|
84 | return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]]) | |
82 | def __call__(self, tokeniter): |
|
85 | def __call__(self, tokeniter): | |
83 | 'parse tokens into a parse tree and evaluate if methods given' |
|
86 | 'parse tokens into a parse tree and evaluate if methods given' | |
84 | t = self.parse(tokeniter) |
|
87 | t = self.parse(tokeniter) | |
85 | if self._methods: |
|
88 | if self._methods: | |
86 | return self.eval(t) |
|
89 | return self.eval(t) | |
87 | return t |
|
90 | return t | |
88 |
|
91 | |||
89 | def buildargsdict(trees, funcname, keys, keyvaluenode, keynode): |
|
92 | def buildargsdict(trees, funcname, keys, keyvaluenode, keynode): | |
90 | """Build dict from list containing positional and keyword arguments |
|
93 | """Build dict from list containing positional and keyword arguments | |
91 |
|
94 | |||
92 | Invalid keywords or too many positional arguments are rejected, but |
|
95 | Invalid keywords or too many positional arguments are rejected, but | |
93 | missing arguments are just omitted. |
|
96 | missing arguments are just omitted. | |
94 | """ |
|
97 | """ | |
95 | if len(trees) > len(keys): |
|
98 | if len(trees) > len(keys): | |
96 | raise error.ParseError(_("%(func)s takes at most %(nargs)d arguments") |
|
99 | raise error.ParseError(_("%(func)s takes at most %(nargs)d arguments") | |
97 | % {'func': funcname, 'nargs': len(keys)}) |
|
100 | % {'func': funcname, 'nargs': len(keys)}) | |
98 | args = {} |
|
101 | args = {} | |
99 | # consume positional arguments |
|
102 | # consume positional arguments | |
100 | for k, x in zip(keys, trees): |
|
103 | for k, x in zip(keys, trees): | |
101 | if x[0] == keyvaluenode: |
|
104 | if x[0] == keyvaluenode: | |
102 | break |
|
105 | break | |
103 | args[k] = x |
|
106 | args[k] = x | |
104 | # remainder should be keyword arguments |
|
107 | # remainder should be keyword arguments | |
105 | for x in trees[len(args):]: |
|
108 | for x in trees[len(args):]: | |
106 | if x[0] != keyvaluenode or x[1][0] != keynode: |
|
109 | if x[0] != keyvaluenode or x[1][0] != keynode: | |
107 | raise error.ParseError(_("%(func)s got an invalid argument") |
|
110 | raise error.ParseError(_("%(func)s got an invalid argument") | |
108 | % {'func': funcname}) |
|
111 | % {'func': funcname}) | |
109 | k = x[1][1] |
|
112 | k = x[1][1] | |
110 | if k not in keys: |
|
113 | if k not in keys: | |
111 | raise error.ParseError(_("%(func)s got an unexpected keyword " |
|
114 | raise error.ParseError(_("%(func)s got an unexpected keyword " | |
112 | "argument '%(key)s'") |
|
115 | "argument '%(key)s'") | |
113 | % {'func': funcname, 'key': k}) |
|
116 | % {'func': funcname, 'key': k}) | |
114 | if k in args: |
|
117 | if k in args: | |
115 | raise error.ParseError(_("%(func)s got multiple values for keyword " |
|
118 | raise error.ParseError(_("%(func)s got multiple values for keyword " | |
116 | "argument '%(key)s'") |
|
119 | "argument '%(key)s'") | |
117 | % {'func': funcname, 'key': k}) |
|
120 | % {'func': funcname, 'key': k}) | |
118 | args[k] = x[2] |
|
121 | args[k] = x[2] | |
119 | return args |
|
122 | return args | |
120 |
|
123 | |||
121 | def _prettyformat(tree, leafnodes, level, lines): |
|
124 | def _prettyformat(tree, leafnodes, level, lines): | |
122 | if not isinstance(tree, tuple) or tree[0] in leafnodes: |
|
125 | if not isinstance(tree, tuple) or tree[0] in leafnodes: | |
123 | lines.append((level, str(tree))) |
|
126 | lines.append((level, str(tree))) | |
124 | else: |
|
127 | else: | |
125 | lines.append((level, '(%s' % tree[0])) |
|
128 | lines.append((level, '(%s' % tree[0])) | |
126 | for s in tree[1:]: |
|
129 | for s in tree[1:]: | |
127 | _prettyformat(s, leafnodes, level + 1, lines) |
|
130 | _prettyformat(s, leafnodes, level + 1, lines) | |
128 | lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')] |
|
131 | lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')] | |
129 |
|
132 | |||
130 | def prettyformat(tree, leafnodes): |
|
133 | def prettyformat(tree, leafnodes): | |
131 | lines = [] |
|
134 | lines = [] | |
132 | _prettyformat(tree, leafnodes, 0, lines) |
|
135 | _prettyformat(tree, leafnodes, 0, lines) | |
133 | output = '\n'.join((' ' * l + s) for l, s in lines) |
|
136 | output = '\n'.join((' ' * l + s) for l, s in lines) | |
134 | return output |
|
137 | return output | |
135 |
|
138 | |||
136 | def simplifyinfixops(tree, targetnodes): |
|
139 | def simplifyinfixops(tree, targetnodes): | |
137 | """Flatten chained infix operations to reduce usage of Python stack |
|
140 | """Flatten chained infix operations to reduce usage of Python stack | |
138 |
|
141 | |||
139 | >>> def f(tree): |
|
142 | >>> def f(tree): | |
140 | ... print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',)) |
|
143 | ... print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',)) | |
141 | >>> f(('or', |
|
144 | >>> f(('or', | |
142 | ... ('or', |
|
145 | ... ('or', | |
143 | ... ('symbol', '1'), |
|
146 | ... ('symbol', '1'), | |
144 | ... ('symbol', '2')), |
|
147 | ... ('symbol', '2')), | |
145 | ... ('symbol', '3'))) |
|
148 | ... ('symbol', '3'))) | |
146 | (or |
|
149 | (or | |
147 | ('symbol', '1') |
|
150 | ('symbol', '1') | |
148 | ('symbol', '2') |
|
151 | ('symbol', '2') | |
149 | ('symbol', '3')) |
|
152 | ('symbol', '3')) | |
150 | >>> f(('func', |
|
153 | >>> f(('func', | |
151 | ... ('symbol', 'p1'), |
|
154 | ... ('symbol', 'p1'), | |
152 | ... ('or', |
|
155 | ... ('or', | |
153 | ... ('or', |
|
156 | ... ('or', | |
154 | ... ('func', |
|
157 | ... ('func', | |
155 | ... ('symbol', 'sort'), |
|
158 | ... ('symbol', 'sort'), | |
156 | ... ('list', |
|
159 | ... ('list', | |
157 | ... ('or', |
|
160 | ... ('or', | |
158 | ... ('or', |
|
161 | ... ('or', | |
159 | ... ('symbol', '1'), |
|
162 | ... ('symbol', '1'), | |
160 | ... ('symbol', '2')), |
|
163 | ... ('symbol', '2')), | |
161 | ... ('symbol', '3')), |
|
164 | ... ('symbol', '3')), | |
162 | ... ('negate', |
|
165 | ... ('negate', | |
163 | ... ('symbol', 'rev')))), |
|
166 | ... ('symbol', 'rev')))), | |
164 | ... ('and', |
|
167 | ... ('and', | |
165 | ... ('symbol', '4'), |
|
168 | ... ('symbol', '4'), | |
166 | ... ('group', |
|
169 | ... ('group', | |
167 | ... ('or', |
|
170 | ... ('or', | |
168 | ... ('or', |
|
171 | ... ('or', | |
169 | ... ('symbol', '5'), |
|
172 | ... ('symbol', '5'), | |
170 | ... ('symbol', '6')), |
|
173 | ... ('symbol', '6')), | |
171 | ... ('symbol', '7'))))), |
|
174 | ... ('symbol', '7'))))), | |
172 | ... ('symbol', '8')))) |
|
175 | ... ('symbol', '8')))) | |
173 | (func |
|
176 | (func | |
174 | ('symbol', 'p1') |
|
177 | ('symbol', 'p1') | |
175 | (or |
|
178 | (or | |
176 | (func |
|
179 | (func | |
177 | ('symbol', 'sort') |
|
180 | ('symbol', 'sort') | |
178 | (list |
|
181 | (list | |
179 | (or |
|
182 | (or | |
180 | ('symbol', '1') |
|
183 | ('symbol', '1') | |
181 | ('symbol', '2') |
|
184 | ('symbol', '2') | |
182 | ('symbol', '3')) |
|
185 | ('symbol', '3')) | |
183 | (negate |
|
186 | (negate | |
184 | ('symbol', 'rev')))) |
|
187 | ('symbol', 'rev')))) | |
185 | (and |
|
188 | (and | |
186 | ('symbol', '4') |
|
189 | ('symbol', '4') | |
187 | (group |
|
190 | (group | |
188 | (or |
|
191 | (or | |
189 | ('symbol', '5') |
|
192 | ('symbol', '5') | |
190 | ('symbol', '6') |
|
193 | ('symbol', '6') | |
191 | ('symbol', '7')))) |
|
194 | ('symbol', '7')))) | |
192 | ('symbol', '8'))) |
|
195 | ('symbol', '8'))) | |
193 | """ |
|
196 | """ | |
194 | if not isinstance(tree, tuple): |
|
197 | if not isinstance(tree, tuple): | |
195 | return tree |
|
198 | return tree | |
196 | op = tree[0] |
|
199 | op = tree[0] | |
197 | if op not in targetnodes: |
|
200 | if op not in targetnodes: | |
198 | return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:]) |
|
201 | return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:]) | |
199 |
|
202 | |||
200 | # walk down left nodes taking each right node. no recursion to left nodes |
|
203 | # walk down left nodes taking each right node. no recursion to left nodes | |
201 | # because infix operators are left-associative, i.e. left tree is deep. |
|
204 | # because infix operators are left-associative, i.e. left tree is deep. | |
202 | # e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3) |
|
205 | # e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3) | |
203 | simplified = [] |
|
206 | simplified = [] | |
204 | x = tree |
|
207 | x = tree | |
205 | while x[0] == op: |
|
208 | while x[0] == op: | |
206 | l, r = x[1:] |
|
209 | l, r = x[1:] | |
207 | simplified.append(simplifyinfixops(r, targetnodes)) |
|
210 | simplified.append(simplifyinfixops(r, targetnodes)) | |
208 | x = l |
|
211 | x = l | |
209 | simplified.append(simplifyinfixops(x, targetnodes)) |
|
212 | simplified.append(simplifyinfixops(x, targetnodes)) | |
210 | simplified.append(op) |
|
213 | simplified.append(op) | |
211 | return tuple(reversed(simplified)) |
|
214 | return tuple(reversed(simplified)) |
General Comments 0
You need to be logged in to leave comments.
Login now