##// END OF EJS Templates
parser: make buildargsdict() precompute position where keyword args start...
Yuya Nishihara -
r30752:ffd324ea default
parent child Browse files
Show More
@@ -1,540 +1,541 b''
1 1 # parser.py - simple top-down operator precedence parser for mercurial
2 2 #
3 3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 # see http://effbot.org/zone/simple-top-down-parsing.htm and
9 9 # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
10 10 # for background
11 11
12 12 # takes a tokenizer and elements
13 13 # tokenizer is an iterator that returns (type, value, pos) tuples
14 14 # elements is a mapping of types to binding strength, primary, prefix, infix
15 15 # and suffix actions
16 16 # an action is a tree node name, a tree label, and an optional match
17 17 # __call__(program) parses program into a labeled tree
18 18
19 19 from __future__ import absolute_import
20 20
21 21 from .i18n import _
22 22 from . import error
23 23
24 24 class parser(object):
25 25 def __init__(self, elements, methods=None):
26 26 self._elements = elements
27 27 self._methods = methods
28 28 self.current = None
29 29 def _advance(self):
30 30 'advance the tokenizer'
31 31 t = self.current
32 32 self.current = next(self._iter, None)
33 33 return t
34 34 def _hasnewterm(self):
35 35 'True if next token may start new term'
36 36 return any(self._elements[self.current[0]][1:3])
37 37 def _match(self, m):
38 38 'make sure the tokenizer matches an end condition'
39 39 if self.current[0] != m:
40 40 raise error.ParseError(_("unexpected token: %s") % self.current[0],
41 41 self.current[2])
42 42 self._advance()
43 43 def _parseoperand(self, bind, m=None):
44 44 'gather right-hand-side operand until an end condition or binding met'
45 45 if m and self.current[0] == m:
46 46 expr = None
47 47 else:
48 48 expr = self._parse(bind)
49 49 if m:
50 50 self._match(m)
51 51 return expr
52 52 def _parse(self, bind=0):
53 53 token, value, pos = self._advance()
54 54 # handle prefix rules on current token, take as primary if unambiguous
55 55 primary, prefix = self._elements[token][1:3]
56 56 if primary and not (prefix and self._hasnewterm()):
57 57 expr = (primary, value)
58 58 elif prefix:
59 59 expr = (prefix[0], self._parseoperand(*prefix[1:]))
60 60 else:
61 61 raise error.ParseError(_("not a prefix: %s") % token, pos)
62 62 # gather tokens until we meet a lower binding strength
63 63 while bind < self._elements[self.current[0]][0]:
64 64 token, value, pos = self._advance()
65 65 # handle infix rules, take as suffix if unambiguous
66 66 infix, suffix = self._elements[token][3:]
67 67 if suffix and not (infix and self._hasnewterm()):
68 68 expr = (suffix, expr)
69 69 elif infix:
70 70 expr = (infix[0], expr, self._parseoperand(*infix[1:]))
71 71 else:
72 72 raise error.ParseError(_("not an infix: %s") % token, pos)
73 73 return expr
74 74 def parse(self, tokeniter):
75 75 'generate a parse tree from tokens'
76 76 self._iter = tokeniter
77 77 self._advance()
78 78 res = self._parse()
79 79 token, value, pos = self.current
80 80 return res, pos
81 81 def eval(self, tree):
82 82 'recursively evaluate a parse tree using node methods'
83 83 if not isinstance(tree, tuple):
84 84 return tree
85 85 return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
86 86 def __call__(self, tokeniter):
87 87 'parse tokens into a parse tree and evaluate if methods given'
88 88 t = self.parse(tokeniter)
89 89 if self._methods:
90 90 return self.eval(t)
91 91 return t
92 92
93 93 def buildargsdict(trees, funcname, keys, keyvaluenode, keynode):
94 94 """Build dict from list containing positional and keyword arguments
95 95
96 96 Invalid keywords or too many positional arguments are rejected, but
97 97 missing arguments are just omitted.
98 98 """
99 kwstart = next((i for i, x in enumerate(trees) if x[0] == keyvaluenode),
100 len(trees))
99 101 if len(trees) > len(keys):
100 102 raise error.ParseError(_("%(func)s takes at most %(nargs)d arguments")
101 103 % {'func': funcname, 'nargs': len(keys)})
102 104 args = {}
103 105 # consume positional arguments
104 for k, x in zip(keys, trees):
105 if x[0] == keyvaluenode:
106 break
106 for k, x in zip(keys, trees[:kwstart]):
107 107 args[k] = x
108 assert len(args) == kwstart
108 109 # remainder should be keyword arguments
109 for x in trees[len(args):]:
110 for x in trees[kwstart:]:
110 111 if x[0] != keyvaluenode or x[1][0] != keynode:
111 112 raise error.ParseError(_("%(func)s got an invalid argument")
112 113 % {'func': funcname})
113 114 k = x[1][1]
114 115 if k not in keys:
115 116 raise error.ParseError(_("%(func)s got an unexpected keyword "
116 117 "argument '%(key)s'")
117 118 % {'func': funcname, 'key': k})
118 119 if k in args:
119 120 raise error.ParseError(_("%(func)s got multiple values for keyword "
120 121 "argument '%(key)s'")
121 122 % {'func': funcname, 'key': k})
122 123 args[k] = x[2]
123 124 return args
124 125
125 126 def unescapestr(s):
126 127 try:
127 128 return s.decode("string_escape")
128 129 except ValueError as e:
129 130 # mangle Python's exception into our format
130 131 raise error.ParseError(str(e).lower())
131 132
132 133 def _prettyformat(tree, leafnodes, level, lines):
133 134 if not isinstance(tree, tuple) or tree[0] in leafnodes:
134 135 lines.append((level, str(tree)))
135 136 else:
136 137 lines.append((level, '(%s' % tree[0]))
137 138 for s in tree[1:]:
138 139 _prettyformat(s, leafnodes, level + 1, lines)
139 140 lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')]
140 141
141 142 def prettyformat(tree, leafnodes):
142 143 lines = []
143 144 _prettyformat(tree, leafnodes, 0, lines)
144 145 output = '\n'.join((' ' * l + s) for l, s in lines)
145 146 return output
146 147
147 148 def simplifyinfixops(tree, targetnodes):
148 149 """Flatten chained infix operations to reduce usage of Python stack
149 150
150 151 >>> def f(tree):
151 152 ... print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',))
152 153 >>> f(('or',
153 154 ... ('or',
154 155 ... ('symbol', '1'),
155 156 ... ('symbol', '2')),
156 157 ... ('symbol', '3')))
157 158 (or
158 159 ('symbol', '1')
159 160 ('symbol', '2')
160 161 ('symbol', '3'))
161 162 >>> f(('func',
162 163 ... ('symbol', 'p1'),
163 164 ... ('or',
164 165 ... ('or',
165 166 ... ('func',
166 167 ... ('symbol', 'sort'),
167 168 ... ('list',
168 169 ... ('or',
169 170 ... ('or',
170 171 ... ('symbol', '1'),
171 172 ... ('symbol', '2')),
172 173 ... ('symbol', '3')),
173 174 ... ('negate',
174 175 ... ('symbol', 'rev')))),
175 176 ... ('and',
176 177 ... ('symbol', '4'),
177 178 ... ('group',
178 179 ... ('or',
179 180 ... ('or',
180 181 ... ('symbol', '5'),
181 182 ... ('symbol', '6')),
182 183 ... ('symbol', '7'))))),
183 184 ... ('symbol', '8'))))
184 185 (func
185 186 ('symbol', 'p1')
186 187 (or
187 188 (func
188 189 ('symbol', 'sort')
189 190 (list
190 191 (or
191 192 ('symbol', '1')
192 193 ('symbol', '2')
193 194 ('symbol', '3'))
194 195 (negate
195 196 ('symbol', 'rev'))))
196 197 (and
197 198 ('symbol', '4')
198 199 (group
199 200 (or
200 201 ('symbol', '5')
201 202 ('symbol', '6')
202 203 ('symbol', '7'))))
203 204 ('symbol', '8')))
204 205 """
205 206 if not isinstance(tree, tuple):
206 207 return tree
207 208 op = tree[0]
208 209 if op not in targetnodes:
209 210 return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:])
210 211
211 212 # walk down left nodes taking each right node. no recursion to left nodes
212 213 # because infix operators are left-associative, i.e. left tree is deep.
213 214 # e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3)
214 215 simplified = []
215 216 x = tree
216 217 while x[0] == op:
217 218 l, r = x[1:]
218 219 simplified.append(simplifyinfixops(r, targetnodes))
219 220 x = l
220 221 simplified.append(simplifyinfixops(x, targetnodes))
221 222 simplified.append(op)
222 223 return tuple(reversed(simplified))
223 224
224 225 def parseerrordetail(inst):
225 226 """Compose error message from specified ParseError object
226 227 """
227 228 if len(inst.args) > 1:
228 229 return _('at %s: %s') % (inst.args[1], inst.args[0])
229 230 else:
230 231 return inst.args[0]
231 232
232 233 class alias(object):
233 234 """Parsed result of alias"""
234 235
235 236 def __init__(self, name, args, err, replacement):
236 237 self.name = name
237 238 self.args = args
238 239 self.error = err
239 240 self.replacement = replacement
240 241 # whether own `error` information is already shown or not.
241 242 # this avoids showing same warning multiple times at each
242 243 # `expandaliases`.
243 244 self.warned = False
244 245
245 246 class basealiasrules(object):
246 247 """Parsing and expansion rule set of aliases
247 248
248 249 This is a helper for fileset/revset/template aliases. A concrete rule set
249 250 should be made by sub-classing this and implementing class/static methods.
250 251
251 252 It supports alias expansion of symbol and function-call styles::
252 253
253 254 # decl = defn
254 255 h = heads(default)
255 256 b($1) = ancestors($1) - ancestors(default)
256 257 """
257 258 # typically a config section, which will be included in error messages
258 259 _section = None
259 260 # tag of symbol node
260 261 _symbolnode = 'symbol'
261 262
262 263 def __new__(cls):
263 264 raise TypeError("'%s' is not instantiatable" % cls.__name__)
264 265
265 266 @staticmethod
266 267 def _parse(spec):
267 268 """Parse an alias name, arguments and definition"""
268 269 raise NotImplementedError
269 270
270 271 @staticmethod
271 272 def _trygetfunc(tree):
272 273 """Return (name, args) if tree is a function; otherwise None"""
273 274 raise NotImplementedError
274 275
275 276 @classmethod
276 277 def _builddecl(cls, decl):
277 278 """Parse an alias declaration into ``(name, args, errorstr)``
278 279
279 280 This function analyzes the parsed tree. The parsing rule is provided
280 281 by ``_parse()``.
281 282
282 283 - ``name``: of declared alias (may be ``decl`` itself at error)
283 284 - ``args``: list of argument names (or None for symbol declaration)
284 285 - ``errorstr``: detail about detected error (or None)
285 286
286 287 >>> sym = lambda x: ('symbol', x)
287 288 >>> symlist = lambda *xs: ('list',) + tuple(sym(x) for x in xs)
288 289 >>> func = lambda n, a: ('func', sym(n), a)
289 290 >>> parsemap = {
290 291 ... 'foo': sym('foo'),
291 292 ... '$foo': sym('$foo'),
292 293 ... 'foo::bar': ('dagrange', sym('foo'), sym('bar')),
293 294 ... 'foo()': func('foo', None),
294 295 ... '$foo()': func('$foo', None),
295 296 ... 'foo($1, $2)': func('foo', symlist('$1', '$2')),
296 297 ... 'foo(bar_bar, baz.baz)':
297 298 ... func('foo', symlist('bar_bar', 'baz.baz')),
298 299 ... 'foo(bar($1, $2))':
299 300 ... func('foo', func('bar', symlist('$1', '$2'))),
300 301 ... 'foo($1, $2, nested($1, $2))':
301 302 ... func('foo', (symlist('$1', '$2') +
302 303 ... (func('nested', symlist('$1', '$2')),))),
303 304 ... 'foo("bar")': func('foo', ('string', 'bar')),
304 305 ... 'foo($1, $2': error.ParseError('unexpected token: end', 10),
305 306 ... 'foo("bar': error.ParseError('unterminated string', 5),
306 307 ... 'foo($1, $2, $1)': func('foo', symlist('$1', '$2', '$1')),
307 308 ... }
308 309 >>> def parse(expr):
309 310 ... x = parsemap[expr]
310 311 ... if isinstance(x, Exception):
311 312 ... raise x
312 313 ... return x
313 314 >>> def trygetfunc(tree):
314 315 ... if not tree or tree[0] != 'func' or tree[1][0] != 'symbol':
315 316 ... return None
316 317 ... if not tree[2]:
317 318 ... return tree[1][1], []
318 319 ... if tree[2][0] == 'list':
319 320 ... return tree[1][1], list(tree[2][1:])
320 321 ... return tree[1][1], [tree[2]]
321 322 >>> class aliasrules(basealiasrules):
322 323 ... _parse = staticmethod(parse)
323 324 ... _trygetfunc = staticmethod(trygetfunc)
324 325 >>> builddecl = aliasrules._builddecl
325 326 >>> builddecl('foo')
326 327 ('foo', None, None)
327 328 >>> builddecl('$foo')
328 329 ('$foo', None, "invalid symbol '$foo'")
329 330 >>> builddecl('foo::bar')
330 331 ('foo::bar', None, 'invalid format')
331 332 >>> builddecl('foo()')
332 333 ('foo', [], None)
333 334 >>> builddecl('$foo()')
334 335 ('$foo()', None, "invalid function '$foo'")
335 336 >>> builddecl('foo($1, $2)')
336 337 ('foo', ['$1', '$2'], None)
337 338 >>> builddecl('foo(bar_bar, baz.baz)')
338 339 ('foo', ['bar_bar', 'baz.baz'], None)
339 340 >>> builddecl('foo($1, $2, nested($1, $2))')
340 341 ('foo($1, $2, nested($1, $2))', None, 'invalid argument list')
341 342 >>> builddecl('foo(bar($1, $2))')
342 343 ('foo(bar($1, $2))', None, 'invalid argument list')
343 344 >>> builddecl('foo("bar")')
344 345 ('foo("bar")', None, 'invalid argument list')
345 346 >>> builddecl('foo($1, $2')
346 347 ('foo($1, $2', None, 'at 10: unexpected token: end')
347 348 >>> builddecl('foo("bar')
348 349 ('foo("bar', None, 'at 5: unterminated string')
349 350 >>> builddecl('foo($1, $2, $1)')
350 351 ('foo', None, 'argument names collide with each other')
351 352 """
352 353 try:
353 354 tree = cls._parse(decl)
354 355 except error.ParseError as inst:
355 356 return (decl, None, parseerrordetail(inst))
356 357
357 358 if tree[0] == cls._symbolnode:
358 359 # "name = ...." style
359 360 name = tree[1]
360 361 if name.startswith('$'):
361 362 return (decl, None, _("invalid symbol '%s'") % name)
362 363 return (name, None, None)
363 364
364 365 func = cls._trygetfunc(tree)
365 366 if func:
366 367 # "name(arg, ....) = ...." style
367 368 name, args = func
368 369 if name.startswith('$'):
369 370 return (decl, None, _("invalid function '%s'") % name)
370 371 if any(t[0] != cls._symbolnode for t in args):
371 372 return (decl, None, _("invalid argument list"))
372 373 if len(args) != len(set(args)):
373 374 return (name, None, _("argument names collide with each other"))
374 375 return (name, [t[1] for t in args], None)
375 376
376 377 return (decl, None, _("invalid format"))
377 378
378 379 @classmethod
379 380 def _relabelargs(cls, tree, args):
380 381 """Mark alias arguments as ``_aliasarg``"""
381 382 if not isinstance(tree, tuple):
382 383 return tree
383 384 op = tree[0]
384 385 if op != cls._symbolnode:
385 386 return (op,) + tuple(cls._relabelargs(x, args) for x in tree[1:])
386 387
387 388 assert len(tree) == 2
388 389 sym = tree[1]
389 390 if sym in args:
390 391 op = '_aliasarg'
391 392 elif sym.startswith('$'):
392 393 raise error.ParseError(_("invalid symbol '%s'") % sym)
393 394 return (op, sym)
394 395
395 396 @classmethod
396 397 def _builddefn(cls, defn, args):
397 398 """Parse an alias definition into a tree and marks substitutions
398 399
399 400 This function marks alias argument references as ``_aliasarg``. The
400 401 parsing rule is provided by ``_parse()``.
401 402
402 403 ``args`` is a list of alias argument names, or None if the alias
403 404 is declared as a symbol.
404 405
405 406 >>> parsemap = {
406 407 ... '$1 or foo': ('or', ('symbol', '$1'), ('symbol', 'foo')),
407 408 ... '$1 or $bar': ('or', ('symbol', '$1'), ('symbol', '$bar')),
408 409 ... '$10 or baz': ('or', ('symbol', '$10'), ('symbol', 'baz')),
409 410 ... '"$1" or "foo"': ('or', ('string', '$1'), ('string', 'foo')),
410 411 ... }
411 412 >>> class aliasrules(basealiasrules):
412 413 ... _parse = staticmethod(parsemap.__getitem__)
413 414 ... _trygetfunc = staticmethod(lambda x: None)
414 415 >>> builddefn = aliasrules._builddefn
415 416 >>> def pprint(tree):
416 417 ... print prettyformat(tree, ('_aliasarg', 'string', 'symbol'))
417 418 >>> args = ['$1', '$2', 'foo']
418 419 >>> pprint(builddefn('$1 or foo', args))
419 420 (or
420 421 ('_aliasarg', '$1')
421 422 ('_aliasarg', 'foo'))
422 423 >>> try:
423 424 ... builddefn('$1 or $bar', args)
424 425 ... except error.ParseError as inst:
425 426 ... print parseerrordetail(inst)
426 427 invalid symbol '$bar'
427 428 >>> args = ['$1', '$10', 'foo']
428 429 >>> pprint(builddefn('$10 or baz', args))
429 430 (or
430 431 ('_aliasarg', '$10')
431 432 ('symbol', 'baz'))
432 433 >>> pprint(builddefn('"$1" or "foo"', args))
433 434 (or
434 435 ('string', '$1')
435 436 ('string', 'foo'))
436 437 """
437 438 tree = cls._parse(defn)
438 439 if args:
439 440 args = set(args)
440 441 else:
441 442 args = set()
442 443 return cls._relabelargs(tree, args)
443 444
444 445 @classmethod
445 446 def build(cls, decl, defn):
446 447 """Parse an alias declaration and definition into an alias object"""
447 448 repl = efmt = None
448 449 name, args, err = cls._builddecl(decl)
449 450 if err:
450 451 efmt = _('bad declaration of %(section)s "%(name)s": %(error)s')
451 452 else:
452 453 try:
453 454 repl = cls._builddefn(defn, args)
454 455 except error.ParseError as inst:
455 456 err = parseerrordetail(inst)
456 457 efmt = _('bad definition of %(section)s "%(name)s": %(error)s')
457 458 if err:
458 459 err = efmt % {'section': cls._section, 'name': name, 'error': err}
459 460 return alias(name, args, err, repl)
460 461
461 462 @classmethod
462 463 def buildmap(cls, items):
463 464 """Parse a list of alias (name, replacement) pairs into a dict of
464 465 alias objects"""
465 466 aliases = {}
466 467 for decl, defn in items:
467 468 a = cls.build(decl, defn)
468 469 aliases[a.name] = a
469 470 return aliases
470 471
471 472 @classmethod
472 473 def _getalias(cls, aliases, tree):
473 474 """If tree looks like an unexpanded alias, return (alias, pattern-args)
474 475 pair. Return None otherwise.
475 476 """
476 477 if not isinstance(tree, tuple):
477 478 return None
478 479 if tree[0] == cls._symbolnode:
479 480 name = tree[1]
480 481 a = aliases.get(name)
481 482 if a and a.args is None:
482 483 return a, None
483 484 func = cls._trygetfunc(tree)
484 485 if func:
485 486 name, args = func
486 487 a = aliases.get(name)
487 488 if a and a.args is not None:
488 489 return a, args
489 490 return None
490 491
491 492 @classmethod
492 493 def _expandargs(cls, tree, args):
493 494 """Replace _aliasarg instances with the substitution value of the
494 495 same name in args, recursively.
495 496 """
496 497 if not isinstance(tree, tuple):
497 498 return tree
498 499 if tree[0] == '_aliasarg':
499 500 sym = tree[1]
500 501 return args[sym]
501 502 return tuple(cls._expandargs(t, args) for t in tree)
502 503
503 504 @classmethod
504 505 def _expand(cls, aliases, tree, expanding, cache):
505 506 if not isinstance(tree, tuple):
506 507 return tree
507 508 r = cls._getalias(aliases, tree)
508 509 if r is None:
509 510 return tuple(cls._expand(aliases, t, expanding, cache)
510 511 for t in tree)
511 512 a, l = r
512 513 if a.error:
513 514 raise error.Abort(a.error)
514 515 if a in expanding:
515 516 raise error.ParseError(_('infinite expansion of %(section)s '
516 517 '"%(name)s" detected')
517 518 % {'section': cls._section, 'name': a.name})
518 519 # get cacheable replacement tree by expanding aliases recursively
519 520 expanding.append(a)
520 521 if a.name not in cache:
521 522 cache[a.name] = cls._expand(aliases, a.replacement, expanding,
522 523 cache)
523 524 result = cache[a.name]
524 525 expanding.pop()
525 526 if a.args is None:
526 527 return result
527 528 # substitute function arguments in replacement tree
528 529 if len(l) != len(a.args):
529 530 raise error.ParseError(_('invalid number of arguments: %d')
530 531 % len(l))
531 532 l = [cls._expand(aliases, t, [], cache) for t in l]
532 533 return cls._expandargs(result, dict(zip(a.args, l)))
533 534
534 535 @classmethod
535 536 def expand(cls, aliases, tree):
536 537 """Expand aliases in tree, recursively.
537 538
538 539 'aliases' is a dictionary mapping user defined aliases to alias objects.
539 540 """
540 541 return cls._expand(aliases, tree, [], {})
General Comments 0
You need to be logged in to leave comments. Login now