Show More
@@ -1,213 +1,213 b'' | |||
|
1 | 1 | # parser.py - simple top-down operator precedence parser for mercurial |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2010 Matt Mackall <mpm@selenic.com> |
|
4 | 4 | # |
|
5 | 5 | # This software may be used and distributed according to the terms of the |
|
6 | 6 | # GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | 8 | # see http://effbot.org/zone/simple-top-down-parsing.htm and |
|
9 | 9 | # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/ |
|
10 | 10 | # for background |
|
11 | 11 | |
|
12 | 12 | # takes a tokenizer and elements |
|
13 | 13 | # tokenizer is an iterator that returns (type, value, pos) tuples |
|
14 | 14 | # elements is a mapping of types to binding strength, primary, prefix, infix |
|
15 | 15 | # and suffix actions |
|
16 | 16 | # an action is a tree node name, a tree label, and an optional match |
|
17 | 17 | # __call__(program) parses program into a labeled tree |
|
18 | 18 | |
|
19 | 19 | import error |
|
20 | 20 | from i18n import _ |
|
21 | 21 | |
|
22 | 22 | class parser(object): |
|
23 | 23 | def __init__(self, elements, methods=None): |
|
24 | 24 | self._elements = elements |
|
25 | 25 | self._methods = methods |
|
26 | 26 | self.current = None |
|
27 | 27 | def _advance(self): |
|
28 | 28 | 'advance the tokenizer' |
|
29 | 29 | t = self.current |
|
30 | 30 | self.current = next(self._iter, None) |
|
31 | 31 | return t |
|
32 | 32 | def _hasnewterm(self): |
|
33 | 33 | 'True if next token may start new term' |
|
34 | 34 | return any(self._elements[self.current[0]][1:3]) |
|
35 | 35 | def _match(self, m): |
|
36 | 36 | 'make sure the tokenizer matches an end condition' |
|
37 | 37 | if self.current[0] != m: |
|
38 | 38 | raise error.ParseError(_("unexpected token: %s") % self.current[0], |
|
39 | 39 | self.current[2]) |
|
40 | 40 | self._advance() |
|
41 | 41 | def _parseoperand(self, bind, m=None): |
|
42 | 42 | 'gather right-hand-side operand until an end condition or binding met' |
|
43 | 43 | if m and self.current[0] == m: |
|
44 | 44 | expr = None |
|
45 | 45 | else: |
|
46 | 46 | expr = self._parse(bind) |
|
47 | 47 | if m: |
|
48 | 48 | self._match(m) |
|
49 | 49 | return expr |
|
50 | 50 | def _parse(self, bind=0): |
|
51 | 51 | token, value, pos = self._advance() |
|
52 | 52 | # handle prefix rules on current token, take as primary if unambiguous |
|
53 | 53 | primary, prefix = self._elements[token][1:3] |
|
54 | 54 | if primary and not (prefix and self._hasnewterm()): |
|
55 | 55 | expr = (primary, value) |
|
56 | 56 | elif prefix: |
|
57 | 57 | expr = (prefix[0], self._parseoperand(*prefix[1:])) |
|
58 | 58 | else: |
|
59 | 59 | raise error.ParseError(_("not a prefix: %s") % token, pos) |
|
60 | 60 | # gather tokens until we meet a lower binding strength |
|
61 | 61 | while bind < self._elements[self.current[0]][0]: |
|
62 | 62 | token, value, pos = self._advance() |
|
63 | 63 | # handle infix rules, take as suffix if unambiguous |
|
64 | 64 | infix, suffix = self._elements[token][3:] |
|
65 | if suffix and not self._hasnewterm(): | |
|
65 | if suffix and not (infix and self._hasnewterm()): | |
|
66 | 66 | expr = (suffix[0], expr) |
|
67 | 67 | elif infix: |
|
68 | 68 | expr = (infix[0], expr, self._parseoperand(*infix[1:])) |
|
69 | 69 | else: |
|
70 | 70 | raise error.ParseError(_("not an infix: %s") % token, pos) |
|
71 | 71 | return expr |
|
72 | 72 | def parse(self, tokeniter): |
|
73 | 73 | 'generate a parse tree from tokens' |
|
74 | 74 | self._iter = tokeniter |
|
75 | 75 | self._advance() |
|
76 | 76 | res = self._parse() |
|
77 | 77 | token, value, pos = self.current |
|
78 | 78 | return res, pos |
|
79 | 79 | def eval(self, tree): |
|
80 | 80 | 'recursively evaluate a parse tree using node methods' |
|
81 | 81 | if not isinstance(tree, tuple): |
|
82 | 82 | return tree |
|
83 | 83 | return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]]) |
|
84 | 84 | def __call__(self, tokeniter): |
|
85 | 85 | 'parse tokens into a parse tree and evaluate if methods given' |
|
86 | 86 | t = self.parse(tokeniter) |
|
87 | 87 | if self._methods: |
|
88 | 88 | return self.eval(t) |
|
89 | 89 | return t |
|
90 | 90 | |
|
91 | 91 | def buildargsdict(trees, funcname, keys, keyvaluenode, keynode): |
|
92 | 92 | """Build dict from list containing positional and keyword arguments |
|
93 | 93 | |
|
94 | 94 | Invalid keywords or too many positional arguments are rejected, but |
|
95 | 95 | missing arguments are just omitted. |
|
96 | 96 | """ |
|
97 | 97 | if len(trees) > len(keys): |
|
98 | 98 | raise error.ParseError(_("%(func)s takes at most %(nargs)d arguments") |
|
99 | 99 | % {'func': funcname, 'nargs': len(keys)}) |
|
100 | 100 | args = {} |
|
101 | 101 | # consume positional arguments |
|
102 | 102 | for k, x in zip(keys, trees): |
|
103 | 103 | if x[0] == keyvaluenode: |
|
104 | 104 | break |
|
105 | 105 | args[k] = x |
|
106 | 106 | # remainder should be keyword arguments |
|
107 | 107 | for x in trees[len(args):]: |
|
108 | 108 | if x[0] != keyvaluenode or x[1][0] != keynode: |
|
109 | 109 | raise error.ParseError(_("%(func)s got an invalid argument") |
|
110 | 110 | % {'func': funcname}) |
|
111 | 111 | k = x[1][1] |
|
112 | 112 | if k not in keys: |
|
113 | 113 | raise error.ParseError(_("%(func)s got an unexpected keyword " |
|
114 | 114 | "argument '%(key)s'") |
|
115 | 115 | % {'func': funcname, 'key': k}) |
|
116 | 116 | if k in args: |
|
117 | 117 | raise error.ParseError(_("%(func)s got multiple values for keyword " |
|
118 | 118 | "argument '%(key)s'") |
|
119 | 119 | % {'func': funcname, 'key': k}) |
|
120 | 120 | args[k] = x[2] |
|
121 | 121 | return args |
|
122 | 122 | |
|
123 | 123 | def _prettyformat(tree, leafnodes, level, lines): |
|
124 | 124 | if not isinstance(tree, tuple) or tree[0] in leafnodes: |
|
125 | 125 | lines.append((level, str(tree))) |
|
126 | 126 | else: |
|
127 | 127 | lines.append((level, '(%s' % tree[0])) |
|
128 | 128 | for s in tree[1:]: |
|
129 | 129 | _prettyformat(s, leafnodes, level + 1, lines) |
|
130 | 130 | lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')] |
|
131 | 131 | |
|
132 | 132 | def prettyformat(tree, leafnodes): |
|
133 | 133 | lines = [] |
|
134 | 134 | _prettyformat(tree, leafnodes, 0, lines) |
|
135 | 135 | output = '\n'.join((' ' * l + s) for l, s in lines) |
|
136 | 136 | return output |
|
137 | 137 | |
|
138 | 138 | def simplifyinfixops(tree, targetnodes): |
|
139 | 139 | """Flatten chained infix operations to reduce usage of Python stack |
|
140 | 140 | |
|
141 | 141 | >>> def f(tree): |
|
142 | 142 | ... print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',)) |
|
143 | 143 | >>> f(('or', |
|
144 | 144 | ... ('or', |
|
145 | 145 | ... ('symbol', '1'), |
|
146 | 146 | ... ('symbol', '2')), |
|
147 | 147 | ... ('symbol', '3'))) |
|
148 | 148 | (or |
|
149 | 149 | ('symbol', '1') |
|
150 | 150 | ('symbol', '2') |
|
151 | 151 | ('symbol', '3')) |
|
152 | 152 | >>> f(('func', |
|
153 | 153 | ... ('symbol', 'p1'), |
|
154 | 154 | ... ('or', |
|
155 | 155 | ... ('or', |
|
156 | 156 | ... ('func', |
|
157 | 157 | ... ('symbol', 'sort'), |
|
158 | 158 | ... ('list', |
|
159 | 159 | ... ('or', |
|
160 | 160 | ... ('or', |
|
161 | 161 | ... ('symbol', '1'), |
|
162 | 162 | ... ('symbol', '2')), |
|
163 | 163 | ... ('symbol', '3')), |
|
164 | 164 | ... ('negate', |
|
165 | 165 | ... ('symbol', 'rev')))), |
|
166 | 166 | ... ('and', |
|
167 | 167 | ... ('symbol', '4'), |
|
168 | 168 | ... ('group', |
|
169 | 169 | ... ('or', |
|
170 | 170 | ... ('or', |
|
171 | 171 | ... ('symbol', '5'), |
|
172 | 172 | ... ('symbol', '6')), |
|
173 | 173 | ... ('symbol', '7'))))), |
|
174 | 174 | ... ('symbol', '8')))) |
|
175 | 175 | (func |
|
176 | 176 | ('symbol', 'p1') |
|
177 | 177 | (or |
|
178 | 178 | (func |
|
179 | 179 | ('symbol', 'sort') |
|
180 | 180 | (list |
|
181 | 181 | (or |
|
182 | 182 | ('symbol', '1') |
|
183 | 183 | ('symbol', '2') |
|
184 | 184 | ('symbol', '3')) |
|
185 | 185 | (negate |
|
186 | 186 | ('symbol', 'rev')))) |
|
187 | 187 | (and |
|
188 | 188 | ('symbol', '4') |
|
189 | 189 | (group |
|
190 | 190 | (or |
|
191 | 191 | ('symbol', '5') |
|
192 | 192 | ('symbol', '6') |
|
193 | 193 | ('symbol', '7')))) |
|
194 | 194 | ('symbol', '8'))) |
|
195 | 195 | """ |
|
196 | 196 | if not isinstance(tree, tuple): |
|
197 | 197 | return tree |
|
198 | 198 | op = tree[0] |
|
199 | 199 | if op not in targetnodes: |
|
200 | 200 | return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:]) |
|
201 | 201 | |
|
202 | 202 | # walk down left nodes taking each right node. no recursion to left nodes |
|
203 | 203 | # because infix operators are left-associative, i.e. left tree is deep. |
|
204 | 204 | # e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3) |
|
205 | 205 | simplified = [] |
|
206 | 206 | x = tree |
|
207 | 207 | while x[0] == op: |
|
208 | 208 | l, r = x[1:] |
|
209 | 209 | simplified.append(simplifyinfixops(r, targetnodes)) |
|
210 | 210 | x = l |
|
211 | 211 | simplified.append(simplifyinfixops(x, targetnodes)) |
|
212 | 212 | simplified.append(op) |
|
213 | 213 | return tuple(reversed(simplified)) |
General Comments 0
You need to be logged in to leave comments.
Login now