##// END OF EJS Templates
parser: use %d instead of %s for interpolating error position...
Augie Fackler -
r31353:089e3780 default
parent child Browse files
Show More
@@ -1,580 +1,580 b''
1 1 # parser.py - simple top-down operator precedence parser for mercurial
2 2 #
3 3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 # see http://effbot.org/zone/simple-top-down-parsing.htm and
9 9 # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
10 10 # for background
11 11
12 12 # takes a tokenizer and elements
13 13 # tokenizer is an iterator that returns (type, value, pos) tuples
14 14 # elements is a mapping of types to binding strength, primary, prefix, infix
15 15 # and suffix actions
16 16 # an action is a tree node name, a tree label, and an optional match
17 17 # __call__(program) parses program into a labeled tree
18 18
19 19 from __future__ import absolute_import
20 20
21 21 from .i18n import _
22 22 from . import error
23 23
24 24 class parser(object):
25 25 def __init__(self, elements, methods=None):
26 26 self._elements = elements
27 27 self._methods = methods
28 28 self.current = None
29 29 def _advance(self):
30 30 'advance the tokenizer'
31 31 t = self.current
32 32 self.current = next(self._iter, None)
33 33 return t
34 34 def _hasnewterm(self):
35 35 'True if next token may start new term'
36 36 return any(self._elements[self.current[0]][1:3])
37 37 def _match(self, m):
38 38 'make sure the tokenizer matches an end condition'
39 39 if self.current[0] != m:
40 40 raise error.ParseError(_("unexpected token: %s") % self.current[0],
41 41 self.current[2])
42 42 self._advance()
43 43 def _parseoperand(self, bind, m=None):
44 44 'gather right-hand-side operand until an end condition or binding met'
45 45 if m and self.current[0] == m:
46 46 expr = None
47 47 else:
48 48 expr = self._parse(bind)
49 49 if m:
50 50 self._match(m)
51 51 return expr
52 52 def _parse(self, bind=0):
53 53 token, value, pos = self._advance()
54 54 # handle prefix rules on current token, take as primary if unambiguous
55 55 primary, prefix = self._elements[token][1:3]
56 56 if primary and not (prefix and self._hasnewterm()):
57 57 expr = (primary, value)
58 58 elif prefix:
59 59 expr = (prefix[0], self._parseoperand(*prefix[1:]))
60 60 else:
61 61 raise error.ParseError(_("not a prefix: %s") % token, pos)
62 62 # gather tokens until we meet a lower binding strength
63 63 while bind < self._elements[self.current[0]][0]:
64 64 token, value, pos = self._advance()
65 65 # handle infix rules, take as suffix if unambiguous
66 66 infix, suffix = self._elements[token][3:]
67 67 if suffix and not (infix and self._hasnewterm()):
68 68 expr = (suffix, expr)
69 69 elif infix:
70 70 expr = (infix[0], expr, self._parseoperand(*infix[1:]))
71 71 else:
72 72 raise error.ParseError(_("not an infix: %s") % token, pos)
73 73 return expr
74 74 def parse(self, tokeniter):
75 75 'generate a parse tree from tokens'
76 76 self._iter = tokeniter
77 77 self._advance()
78 78 res = self._parse()
79 79 token, value, pos = self.current
80 80 return res, pos
81 81 def eval(self, tree):
82 82 'recursively evaluate a parse tree using node methods'
83 83 if not isinstance(tree, tuple):
84 84 return tree
85 85 return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
86 86 def __call__(self, tokeniter):
87 87 'parse tokens into a parse tree and evaluate if methods given'
88 88 t = self.parse(tokeniter)
89 89 if self._methods:
90 90 return self.eval(t)
91 91 return t
92 92
93 93 def splitargspec(spec):
94 94 """Parse spec of function arguments into (poskeys, varkey, keys)
95 95
96 96 >>> splitargspec('')
97 97 ([], None, [])
98 98 >>> splitargspec('foo bar')
99 99 ([], None, ['foo', 'bar'])
100 100 >>> splitargspec('foo *bar baz')
101 101 (['foo'], 'bar', ['baz'])
102 102 >>> splitargspec('*foo')
103 103 ([], 'foo', [])
104 104 """
105 105 pre, sep, post = spec.partition('*')
106 106 pres = pre.split()
107 107 posts = post.split()
108 108 if sep:
109 109 if not posts:
110 110 raise error.ProgrammingError('no *varkey name provided')
111 111 return pres, posts[0], posts[1:]
112 112 return [], None, pres
113 113
114 114 def buildargsdict(trees, funcname, argspec, keyvaluenode, keynode):
115 115 """Build dict from list containing positional and keyword arguments
116 116
117 117 Arguments are specified by a tuple of ``(poskeys, varkey, keys)`` where
118 118
119 119 - ``poskeys``: list of names of positional arguments
120 120 - ``varkey``: optional argument name that takes up remainder
121 121 - ``keys``: list of names that can be either positional or keyword arguments
122 122
123 123 If ``varkey`` specified, all ``keys`` must be given as keyword arguments.
124 124
125 125 Invalid keywords, too few positional arguments, or too many positional
126 126 arguments are rejected, but missing keyword arguments are just omitted.
127 127 """
128 128 poskeys, varkey, keys = argspec
129 129 kwstart = next((i for i, x in enumerate(trees) if x[0] == keyvaluenode),
130 130 len(trees))
131 131 if kwstart < len(poskeys):
132 132 raise error.ParseError(_("%(func)s takes at least %(nargs)d positional "
133 133 "arguments")
134 134 % {'func': funcname, 'nargs': len(poskeys)})
135 135 if not varkey and len(trees) > len(poskeys) + len(keys):
136 136 raise error.ParseError(_("%(func)s takes at most %(nargs)d arguments")
137 137 % {'func': funcname,
138 138 'nargs': len(poskeys) + len(keys)})
139 139 args = {}
140 140 # consume positional arguments
141 141 for k, x in zip(poskeys, trees[:kwstart]):
142 142 args[k] = x
143 143 if varkey:
144 144 args[varkey] = trees[len(args):kwstart]
145 145 else:
146 146 for k, x in zip(keys, trees[len(args):kwstart]):
147 147 args[k] = x
148 148 # remainder should be keyword arguments
149 149 for x in trees[kwstart:]:
150 150 if x[0] != keyvaluenode or x[1][0] != keynode:
151 151 raise error.ParseError(_("%(func)s got an invalid argument")
152 152 % {'func': funcname})
153 153 k = x[1][1]
154 154 if k not in keys:
155 155 raise error.ParseError(_("%(func)s got an unexpected keyword "
156 156 "argument '%(key)s'")
157 157 % {'func': funcname, 'key': k})
158 158 if k in args:
159 159 raise error.ParseError(_("%(func)s got multiple values for keyword "
160 160 "argument '%(key)s'")
161 161 % {'func': funcname, 'key': k})
162 162 args[k] = x[2]
163 163 return args
164 164
165 165 def unescapestr(s):
166 166 try:
167 167 return s.decode("string_escape")
168 168 except ValueError as e:
169 169 # mangle Python's exception into our format
170 170 raise error.ParseError(str(e).lower())
171 171
172 172 def _prettyformat(tree, leafnodes, level, lines):
173 173 if not isinstance(tree, tuple) or tree[0] in leafnodes:
174 174 lines.append((level, str(tree)))
175 175 else:
176 176 lines.append((level, '(%s' % tree[0]))
177 177 for s in tree[1:]:
178 178 _prettyformat(s, leafnodes, level + 1, lines)
179 179 lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')]
180 180
181 181 def prettyformat(tree, leafnodes):
182 182 lines = []
183 183 _prettyformat(tree, leafnodes, 0, lines)
184 184 output = '\n'.join((' ' * l + s) for l, s in lines)
185 185 return output
186 186
187 187 def simplifyinfixops(tree, targetnodes):
188 188 """Flatten chained infix operations to reduce usage of Python stack
189 189
190 190 >>> def f(tree):
191 191 ... print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',))
192 192 >>> f(('or',
193 193 ... ('or',
194 194 ... ('symbol', '1'),
195 195 ... ('symbol', '2')),
196 196 ... ('symbol', '3')))
197 197 (or
198 198 ('symbol', '1')
199 199 ('symbol', '2')
200 200 ('symbol', '3'))
201 201 >>> f(('func',
202 202 ... ('symbol', 'p1'),
203 203 ... ('or',
204 204 ... ('or',
205 205 ... ('func',
206 206 ... ('symbol', 'sort'),
207 207 ... ('list',
208 208 ... ('or',
209 209 ... ('or',
210 210 ... ('symbol', '1'),
211 211 ... ('symbol', '2')),
212 212 ... ('symbol', '3')),
213 213 ... ('negate',
214 214 ... ('symbol', 'rev')))),
215 215 ... ('and',
216 216 ... ('symbol', '4'),
217 217 ... ('group',
218 218 ... ('or',
219 219 ... ('or',
220 220 ... ('symbol', '5'),
221 221 ... ('symbol', '6')),
222 222 ... ('symbol', '7'))))),
223 223 ... ('symbol', '8'))))
224 224 (func
225 225 ('symbol', 'p1')
226 226 (or
227 227 (func
228 228 ('symbol', 'sort')
229 229 (list
230 230 (or
231 231 ('symbol', '1')
232 232 ('symbol', '2')
233 233 ('symbol', '3'))
234 234 (negate
235 235 ('symbol', 'rev'))))
236 236 (and
237 237 ('symbol', '4')
238 238 (group
239 239 (or
240 240 ('symbol', '5')
241 241 ('symbol', '6')
242 242 ('symbol', '7'))))
243 243 ('symbol', '8')))
244 244 """
245 245 if not isinstance(tree, tuple):
246 246 return tree
247 247 op = tree[0]
248 248 if op not in targetnodes:
249 249 return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:])
250 250
251 251 # walk down left nodes taking each right node. no recursion to left nodes
252 252 # because infix operators are left-associative, i.e. left tree is deep.
253 253 # e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3)
254 254 simplified = []
255 255 x = tree
256 256 while x[0] == op:
257 257 l, r = x[1:]
258 258 simplified.append(simplifyinfixops(r, targetnodes))
259 259 x = l
260 260 simplified.append(simplifyinfixops(x, targetnodes))
261 261 simplified.append(op)
262 262 return tuple(reversed(simplified))
263 263
264 264 def parseerrordetail(inst):
265 265 """Compose error message from specified ParseError object
266 266 """
267 267 if len(inst.args) > 1:
268 return _('at %s: %s') % (inst.args[1], inst.args[0])
268 return _('at %d: %s') % (inst.args[1], inst.args[0])
269 269 else:
270 270 return inst.args[0]
271 271
272 272 class alias(object):
273 273 """Parsed result of alias"""
274 274
275 275 def __init__(self, name, args, err, replacement):
276 276 self.name = name
277 277 self.args = args
278 278 self.error = err
279 279 self.replacement = replacement
280 280 # whether own `error` information is already shown or not.
281 281 # this avoids showing same warning multiple times at each
282 282 # `expandaliases`.
283 283 self.warned = False
284 284
285 285 class basealiasrules(object):
286 286 """Parsing and expansion rule set of aliases
287 287
288 288 This is a helper for fileset/revset/template aliases. A concrete rule set
289 289 should be made by sub-classing this and implementing class/static methods.
290 290
291 291 It supports alias expansion of symbol and function-call styles::
292 292
293 293 # decl = defn
294 294 h = heads(default)
295 295 b($1) = ancestors($1) - ancestors(default)
296 296 """
297 297 # typically a config section, which will be included in error messages
298 298 _section = None
299 299 # tag of symbol node
300 300 _symbolnode = 'symbol'
301 301
302 302 def __new__(cls):
303 303 raise TypeError("'%s' is not instantiatable" % cls.__name__)
304 304
305 305 @staticmethod
306 306 def _parse(spec):
307 307 """Parse an alias name, arguments and definition"""
308 308 raise NotImplementedError
309 309
310 310 @staticmethod
311 311 def _trygetfunc(tree):
312 312 """Return (name, args) if tree is a function; otherwise None"""
313 313 raise NotImplementedError
314 314
315 315 @classmethod
316 316 def _builddecl(cls, decl):
317 317 """Parse an alias declaration into ``(name, args, errorstr)``
318 318
319 319 This function analyzes the parsed tree. The parsing rule is provided
320 320 by ``_parse()``.
321 321
322 322 - ``name``: of declared alias (may be ``decl`` itself at error)
323 323 - ``args``: list of argument names (or None for symbol declaration)
324 324 - ``errorstr``: detail about detected error (or None)
325 325
326 326 >>> sym = lambda x: ('symbol', x)
327 327 >>> symlist = lambda *xs: ('list',) + tuple(sym(x) for x in xs)
328 328 >>> func = lambda n, a: ('func', sym(n), a)
329 329 >>> parsemap = {
330 330 ... 'foo': sym('foo'),
331 331 ... '$foo': sym('$foo'),
332 332 ... 'foo::bar': ('dagrange', sym('foo'), sym('bar')),
333 333 ... 'foo()': func('foo', None),
334 334 ... '$foo()': func('$foo', None),
335 335 ... 'foo($1, $2)': func('foo', symlist('$1', '$2')),
336 336 ... 'foo(bar_bar, baz.baz)':
337 337 ... func('foo', symlist('bar_bar', 'baz.baz')),
338 338 ... 'foo(bar($1, $2))':
339 339 ... func('foo', func('bar', symlist('$1', '$2'))),
340 340 ... 'foo($1, $2, nested($1, $2))':
341 341 ... func('foo', (symlist('$1', '$2') +
342 342 ... (func('nested', symlist('$1', '$2')),))),
343 343 ... 'foo("bar")': func('foo', ('string', 'bar')),
344 344 ... 'foo($1, $2': error.ParseError('unexpected token: end', 10),
345 345 ... 'foo("bar': error.ParseError('unterminated string', 5),
346 346 ... 'foo($1, $2, $1)': func('foo', symlist('$1', '$2', '$1')),
347 347 ... }
348 348 >>> def parse(expr):
349 349 ... x = parsemap[expr]
350 350 ... if isinstance(x, Exception):
351 351 ... raise x
352 352 ... return x
353 353 >>> def trygetfunc(tree):
354 354 ... if not tree or tree[0] != 'func' or tree[1][0] != 'symbol':
355 355 ... return None
356 356 ... if not tree[2]:
357 357 ... return tree[1][1], []
358 358 ... if tree[2][0] == 'list':
359 359 ... return tree[1][1], list(tree[2][1:])
360 360 ... return tree[1][1], [tree[2]]
361 361 >>> class aliasrules(basealiasrules):
362 362 ... _parse = staticmethod(parse)
363 363 ... _trygetfunc = staticmethod(trygetfunc)
364 364 >>> builddecl = aliasrules._builddecl
365 365 >>> builddecl('foo')
366 366 ('foo', None, None)
367 367 >>> builddecl('$foo')
368 368 ('$foo', None, "invalid symbol '$foo'")
369 369 >>> builddecl('foo::bar')
370 370 ('foo::bar', None, 'invalid format')
371 371 >>> builddecl('foo()')
372 372 ('foo', [], None)
373 373 >>> builddecl('$foo()')
374 374 ('$foo()', None, "invalid function '$foo'")
375 375 >>> builddecl('foo($1, $2)')
376 376 ('foo', ['$1', '$2'], None)
377 377 >>> builddecl('foo(bar_bar, baz.baz)')
378 378 ('foo', ['bar_bar', 'baz.baz'], None)
379 379 >>> builddecl('foo($1, $2, nested($1, $2))')
380 380 ('foo($1, $2, nested($1, $2))', None, 'invalid argument list')
381 381 >>> builddecl('foo(bar($1, $2))')
382 382 ('foo(bar($1, $2))', None, 'invalid argument list')
383 383 >>> builddecl('foo("bar")')
384 384 ('foo("bar")', None, 'invalid argument list')
385 385 >>> builddecl('foo($1, $2')
386 386 ('foo($1, $2', None, 'at 10: unexpected token: end')
387 387 >>> builddecl('foo("bar')
388 388 ('foo("bar', None, 'at 5: unterminated string')
389 389 >>> builddecl('foo($1, $2, $1)')
390 390 ('foo', None, 'argument names collide with each other')
391 391 """
392 392 try:
393 393 tree = cls._parse(decl)
394 394 except error.ParseError as inst:
395 395 return (decl, None, parseerrordetail(inst))
396 396
397 397 if tree[0] == cls._symbolnode:
398 398 # "name = ...." style
399 399 name = tree[1]
400 400 if name.startswith('$'):
401 401 return (decl, None, _("invalid symbol '%s'") % name)
402 402 return (name, None, None)
403 403
404 404 func = cls._trygetfunc(tree)
405 405 if func:
406 406 # "name(arg, ....) = ...." style
407 407 name, args = func
408 408 if name.startswith('$'):
409 409 return (decl, None, _("invalid function '%s'") % name)
410 410 if any(t[0] != cls._symbolnode for t in args):
411 411 return (decl, None, _("invalid argument list"))
412 412 if len(args) != len(set(args)):
413 413 return (name, None, _("argument names collide with each other"))
414 414 return (name, [t[1] for t in args], None)
415 415
416 416 return (decl, None, _("invalid format"))
417 417
418 418 @classmethod
419 419 def _relabelargs(cls, tree, args):
420 420 """Mark alias arguments as ``_aliasarg``"""
421 421 if not isinstance(tree, tuple):
422 422 return tree
423 423 op = tree[0]
424 424 if op != cls._symbolnode:
425 425 return (op,) + tuple(cls._relabelargs(x, args) for x in tree[1:])
426 426
427 427 assert len(tree) == 2
428 428 sym = tree[1]
429 429 if sym in args:
430 430 op = '_aliasarg'
431 431 elif sym.startswith('$'):
432 432 raise error.ParseError(_("invalid symbol '%s'") % sym)
433 433 return (op, sym)
434 434
435 435 @classmethod
436 436 def _builddefn(cls, defn, args):
437 437 """Parse an alias definition into a tree and marks substitutions
438 438
439 439 This function marks alias argument references as ``_aliasarg``. The
440 440 parsing rule is provided by ``_parse()``.
441 441
442 442 ``args`` is a list of alias argument names, or None if the alias
443 443 is declared as a symbol.
444 444
445 445 >>> parsemap = {
446 446 ... '$1 or foo': ('or', ('symbol', '$1'), ('symbol', 'foo')),
447 447 ... '$1 or $bar': ('or', ('symbol', '$1'), ('symbol', '$bar')),
448 448 ... '$10 or baz': ('or', ('symbol', '$10'), ('symbol', 'baz')),
449 449 ... '"$1" or "foo"': ('or', ('string', '$1'), ('string', 'foo')),
450 450 ... }
451 451 >>> class aliasrules(basealiasrules):
452 452 ... _parse = staticmethod(parsemap.__getitem__)
453 453 ... _trygetfunc = staticmethod(lambda x: None)
454 454 >>> builddefn = aliasrules._builddefn
455 455 >>> def pprint(tree):
456 456 ... print prettyformat(tree, ('_aliasarg', 'string', 'symbol'))
457 457 >>> args = ['$1', '$2', 'foo']
458 458 >>> pprint(builddefn('$1 or foo', args))
459 459 (or
460 460 ('_aliasarg', '$1')
461 461 ('_aliasarg', 'foo'))
462 462 >>> try:
463 463 ... builddefn('$1 or $bar', args)
464 464 ... except error.ParseError as inst:
465 465 ... print parseerrordetail(inst)
466 466 invalid symbol '$bar'
467 467 >>> args = ['$1', '$10', 'foo']
468 468 >>> pprint(builddefn('$10 or baz', args))
469 469 (or
470 470 ('_aliasarg', '$10')
471 471 ('symbol', 'baz'))
472 472 >>> pprint(builddefn('"$1" or "foo"', args))
473 473 (or
474 474 ('string', '$1')
475 475 ('string', 'foo'))
476 476 """
477 477 tree = cls._parse(defn)
478 478 if args:
479 479 args = set(args)
480 480 else:
481 481 args = set()
482 482 return cls._relabelargs(tree, args)
483 483
484 484 @classmethod
485 485 def build(cls, decl, defn):
486 486 """Parse an alias declaration and definition into an alias object"""
487 487 repl = efmt = None
488 488 name, args, err = cls._builddecl(decl)
489 489 if err:
490 490 efmt = _('bad declaration of %(section)s "%(name)s": %(error)s')
491 491 else:
492 492 try:
493 493 repl = cls._builddefn(defn, args)
494 494 except error.ParseError as inst:
495 495 err = parseerrordetail(inst)
496 496 efmt = _('bad definition of %(section)s "%(name)s": %(error)s')
497 497 if err:
498 498 err = efmt % {'section': cls._section, 'name': name, 'error': err}
499 499 return alias(name, args, err, repl)
500 500
501 501 @classmethod
502 502 def buildmap(cls, items):
503 503 """Parse a list of alias (name, replacement) pairs into a dict of
504 504 alias objects"""
505 505 aliases = {}
506 506 for decl, defn in items:
507 507 a = cls.build(decl, defn)
508 508 aliases[a.name] = a
509 509 return aliases
510 510
511 511 @classmethod
512 512 def _getalias(cls, aliases, tree):
513 513 """If tree looks like an unexpanded alias, return (alias, pattern-args)
514 514 pair. Return None otherwise.
515 515 """
516 516 if not isinstance(tree, tuple):
517 517 return None
518 518 if tree[0] == cls._symbolnode:
519 519 name = tree[1]
520 520 a = aliases.get(name)
521 521 if a and a.args is None:
522 522 return a, None
523 523 func = cls._trygetfunc(tree)
524 524 if func:
525 525 name, args = func
526 526 a = aliases.get(name)
527 527 if a and a.args is not None:
528 528 return a, args
529 529 return None
530 530
531 531 @classmethod
532 532 def _expandargs(cls, tree, args):
533 533 """Replace _aliasarg instances with the substitution value of the
534 534 same name in args, recursively.
535 535 """
536 536 if not isinstance(tree, tuple):
537 537 return tree
538 538 if tree[0] == '_aliasarg':
539 539 sym = tree[1]
540 540 return args[sym]
541 541 return tuple(cls._expandargs(t, args) for t in tree)
542 542
543 543 @classmethod
544 544 def _expand(cls, aliases, tree, expanding, cache):
545 545 if not isinstance(tree, tuple):
546 546 return tree
547 547 r = cls._getalias(aliases, tree)
548 548 if r is None:
549 549 return tuple(cls._expand(aliases, t, expanding, cache)
550 550 for t in tree)
551 551 a, l = r
552 552 if a.error:
553 553 raise error.Abort(a.error)
554 554 if a in expanding:
555 555 raise error.ParseError(_('infinite expansion of %(section)s '
556 556 '"%(name)s" detected')
557 557 % {'section': cls._section, 'name': a.name})
558 558 # get cacheable replacement tree by expanding aliases recursively
559 559 expanding.append(a)
560 560 if a.name not in cache:
561 561 cache[a.name] = cls._expand(aliases, a.replacement, expanding,
562 562 cache)
563 563 result = cache[a.name]
564 564 expanding.pop()
565 565 if a.args is None:
566 566 return result
567 567 # substitute function arguments in replacement tree
568 568 if len(l) != len(a.args):
569 569 raise error.ParseError(_('invalid number of arguments: %d')
570 570 % len(l))
571 571 l = [cls._expand(aliases, t, [], cache) for t in l]
572 572 return cls._expandargs(result, dict(zip(a.args, l)))
573 573
574 574 @classmethod
575 575 def expand(cls, aliases, tree):
576 576 """Expand aliases in tree, recursively.
577 577
578 578 'aliases' is a dictionary mapping user defined aliases to alias objects.
579 579 """
580 580 return cls._expand(aliases, tree, [], {})
General Comments 0
You need to be logged in to leave comments. Login now