##// END OF EJS Templates
parser: drop redundant comparison between alias declaration tree and pattern...
Yuya Nishihara -
r28908:7a772def default
parent child Browse files
Show More
@@ -1,545 +1,542 b''
1 1 # parser.py - simple top-down operator precedence parser for mercurial
2 2 #
3 3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 # see http://effbot.org/zone/simple-top-down-parsing.htm and
9 9 # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
10 10 # for background
11 11
12 12 # takes a tokenizer and elements
13 13 # tokenizer is an iterator that returns (type, value, pos) tuples
14 14 # elements is a mapping of types to binding strength, primary, prefix, infix
15 15 # and suffix actions
16 16 # an action is a tree node name, a tree label, and an optional match
17 17 # __call__(program) parses program into a labeled tree
18 18
19 19 from __future__ import absolute_import
20 20
21 21 from .i18n import _
22 22 from . import error
23 23
24 24 class parser(object):
25 25 def __init__(self, elements, methods=None):
26 26 self._elements = elements
27 27 self._methods = methods
28 28 self.current = None
29 29 def _advance(self):
30 30 'advance the tokenizer'
31 31 t = self.current
32 32 self.current = next(self._iter, None)
33 33 return t
34 34 def _hasnewterm(self):
35 35 'True if next token may start new term'
36 36 return any(self._elements[self.current[0]][1:3])
37 37 def _match(self, m):
38 38 'make sure the tokenizer matches an end condition'
39 39 if self.current[0] != m:
40 40 raise error.ParseError(_("unexpected token: %s") % self.current[0],
41 41 self.current[2])
42 42 self._advance()
43 43 def _parseoperand(self, bind, m=None):
44 44 'gather right-hand-side operand until an end condition or binding met'
45 45 if m and self.current[0] == m:
46 46 expr = None
47 47 else:
48 48 expr = self._parse(bind)
49 49 if m:
50 50 self._match(m)
51 51 return expr
52 52 def _parse(self, bind=0):
53 53 token, value, pos = self._advance()
54 54 # handle prefix rules on current token, take as primary if unambiguous
55 55 primary, prefix = self._elements[token][1:3]
56 56 if primary and not (prefix and self._hasnewterm()):
57 57 expr = (primary, value)
58 58 elif prefix:
59 59 expr = (prefix[0], self._parseoperand(*prefix[1:]))
60 60 else:
61 61 raise error.ParseError(_("not a prefix: %s") % token, pos)
62 62 # gather tokens until we meet a lower binding strength
63 63 while bind < self._elements[self.current[0]][0]:
64 64 token, value, pos = self._advance()
65 65 # handle infix rules, take as suffix if unambiguous
66 66 infix, suffix = self._elements[token][3:]
67 67 if suffix and not (infix and self._hasnewterm()):
68 68 expr = (suffix[0], expr)
69 69 elif infix:
70 70 expr = (infix[0], expr, self._parseoperand(*infix[1:]))
71 71 else:
72 72 raise error.ParseError(_("not an infix: %s") % token, pos)
73 73 return expr
74 74 def parse(self, tokeniter):
75 75 'generate a parse tree from tokens'
76 76 self._iter = tokeniter
77 77 self._advance()
78 78 res = self._parse()
79 79 token, value, pos = self.current
80 80 return res, pos
81 81 def eval(self, tree):
82 82 'recursively evaluate a parse tree using node methods'
83 83 if not isinstance(tree, tuple):
84 84 return tree
85 85 return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
86 86 def __call__(self, tokeniter):
87 87 'parse tokens into a parse tree and evaluate if methods given'
88 88 t = self.parse(tokeniter)
89 89 if self._methods:
90 90 return self.eval(t)
91 91 return t
92 92
93 93 def buildargsdict(trees, funcname, keys, keyvaluenode, keynode):
94 94 """Build dict from list containing positional and keyword arguments
95 95
96 96 Invalid keywords or too many positional arguments are rejected, but
97 97 missing arguments are just omitted.
98 98 """
99 99 if len(trees) > len(keys):
100 100 raise error.ParseError(_("%(func)s takes at most %(nargs)d arguments")
101 101 % {'func': funcname, 'nargs': len(keys)})
102 102 args = {}
103 103 # consume positional arguments
104 104 for k, x in zip(keys, trees):
105 105 if x[0] == keyvaluenode:
106 106 break
107 107 args[k] = x
108 108 # remainder should be keyword arguments
109 109 for x in trees[len(args):]:
110 110 if x[0] != keyvaluenode or x[1][0] != keynode:
111 111 raise error.ParseError(_("%(func)s got an invalid argument")
112 112 % {'func': funcname})
113 113 k = x[1][1]
114 114 if k not in keys:
115 115 raise error.ParseError(_("%(func)s got an unexpected keyword "
116 116 "argument '%(key)s'")
117 117 % {'func': funcname, 'key': k})
118 118 if k in args:
119 119 raise error.ParseError(_("%(func)s got multiple values for keyword "
120 120 "argument '%(key)s'")
121 121 % {'func': funcname, 'key': k})
122 122 args[k] = x[2]
123 123 return args
124 124
125 125 def unescapestr(s):
126 126 try:
127 127 return s.decode("string_escape")
128 128 except ValueError as e:
129 129 # mangle Python's exception into our format
130 130 raise error.ParseError(str(e).lower())
131 131
132 132 def _prettyformat(tree, leafnodes, level, lines):
133 133 if not isinstance(tree, tuple) or tree[0] in leafnodes:
134 134 lines.append((level, str(tree)))
135 135 else:
136 136 lines.append((level, '(%s' % tree[0]))
137 137 for s in tree[1:]:
138 138 _prettyformat(s, leafnodes, level + 1, lines)
139 139 lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')]
140 140
141 141 def prettyformat(tree, leafnodes):
142 142 lines = []
143 143 _prettyformat(tree, leafnodes, 0, lines)
144 144 output = '\n'.join((' ' * l + s) for l, s in lines)
145 145 return output
146 146
147 147 def simplifyinfixops(tree, targetnodes):
148 148 """Flatten chained infix operations to reduce usage of Python stack
149 149
150 150 >>> def f(tree):
151 151 ... print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',))
152 152 >>> f(('or',
153 153 ... ('or',
154 154 ... ('symbol', '1'),
155 155 ... ('symbol', '2')),
156 156 ... ('symbol', '3')))
157 157 (or
158 158 ('symbol', '1')
159 159 ('symbol', '2')
160 160 ('symbol', '3'))
161 161 >>> f(('func',
162 162 ... ('symbol', 'p1'),
163 163 ... ('or',
164 164 ... ('or',
165 165 ... ('func',
166 166 ... ('symbol', 'sort'),
167 167 ... ('list',
168 168 ... ('or',
169 169 ... ('or',
170 170 ... ('symbol', '1'),
171 171 ... ('symbol', '2')),
172 172 ... ('symbol', '3')),
173 173 ... ('negate',
174 174 ... ('symbol', 'rev')))),
175 175 ... ('and',
176 176 ... ('symbol', '4'),
177 177 ... ('group',
178 178 ... ('or',
179 179 ... ('or',
180 180 ... ('symbol', '5'),
181 181 ... ('symbol', '6')),
182 182 ... ('symbol', '7'))))),
183 183 ... ('symbol', '8'))))
184 184 (func
185 185 ('symbol', 'p1')
186 186 (or
187 187 (func
188 188 ('symbol', 'sort')
189 189 (list
190 190 (or
191 191 ('symbol', '1')
192 192 ('symbol', '2')
193 193 ('symbol', '3'))
194 194 (negate
195 195 ('symbol', 'rev'))))
196 196 (and
197 197 ('symbol', '4')
198 198 (group
199 199 (or
200 200 ('symbol', '5')
201 201 ('symbol', '6')
202 202 ('symbol', '7'))))
203 203 ('symbol', '8')))
204 204 """
205 205 if not isinstance(tree, tuple):
206 206 return tree
207 207 op = tree[0]
208 208 if op not in targetnodes:
209 209 return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:])
210 210
211 211 # walk down left nodes taking each right node. no recursion to left nodes
212 212 # because infix operators are left-associative, i.e. left tree is deep.
213 213 # e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3)
214 214 simplified = []
215 215 x = tree
216 216 while x[0] == op:
217 217 l, r = x[1:]
218 218 simplified.append(simplifyinfixops(r, targetnodes))
219 219 x = l
220 220 simplified.append(simplifyinfixops(x, targetnodes))
221 221 simplified.append(op)
222 222 return tuple(reversed(simplified))
223 223
224 224 def parseerrordetail(inst):
225 225 """Compose error message from specified ParseError object
226 226 """
227 227 if len(inst.args) > 1:
228 228 return _('at %s: %s') % (inst.args[1], inst.args[0])
229 229 else:
230 230 return inst.args[0]
231 231
232 232 class alias(object):
233 233 """Parsed result of alias"""
234 234
235 def __init__(self, name, tree, args, err, replacement):
235 def __init__(self, name, args, err, replacement):
236 236 self.name = name
237 self.tree = tree
238 237 self.args = args
239 238 self.error = err
240 239 self.replacement = replacement
241 240 # whether own `error` information is already shown or not.
242 241 # this avoids showing same warning multiple times at each
243 242 # `expandaliases`.
244 243 self.warned = False
245 244
246 245 class basealiasrules(object):
247 246 """Parsing and expansion rule set of aliases
248 247
249 248 This is a helper for fileset/revset/template aliases. A concrete rule set
250 249 should be made by sub-classing this and implementing class/static methods.
251 250
252 251 It supports alias expansion of symbol and funciton-call styles::
253 252
254 253 # decl = defn
255 254 h = heads(default)
256 255 b($1) = ancestors($1) - ancestors(default)
257 256 """
258 257 # typically a config section, which will be included in error messages
259 258 _section = None
260 259 # tags of symbol and function nodes
261 260 _symbolnode = 'symbol'
262 261 _funcnode = 'func'
263 262
264 263 def __new__(cls):
265 264 raise TypeError("'%s' is not instantiatable" % cls.__name__)
266 265
267 266 @staticmethod
268 267 def _parse(spec):
269 268 """Parse an alias name, arguments and definition"""
270 269 raise NotImplementedError
271 270
272 271 @staticmethod
273 272 def _getlist(tree):
274 273 """Extract a list of arguments from parsed tree"""
275 274 raise NotImplementedError
276 275
277 276 @classmethod
278 277 def _builddecl(cls, decl):
279 """Parse an alias declaration into ``(name, tree, args, errorstr)``
278 """Parse an alias declaration into ``(name, args, errorstr)``
280 279
281 280 This function analyzes the parsed tree. The parsing rule is provided
282 281 by ``_parse()``.
283 282
284 283 - ``name``: of declared alias (may be ``decl`` itself at error)
285 - ``tree``: parse result (or ``None`` at error)
286 284 - ``args``: list of argument names (or None for symbol declaration)
287 285 - ``errorstr``: detail about detected error (or None)
288 286
289 287 >>> sym = lambda x: ('symbol', x)
290 288 >>> symlist = lambda *xs: ('list',) + tuple(sym(x) for x in xs)
291 289 >>> func = lambda n, a: ('func', sym(n), a)
292 290 >>> parsemap = {
293 291 ... 'foo': sym('foo'),
294 292 ... '$foo': sym('$foo'),
295 293 ... 'foo::bar': ('dagrange', sym('foo'), sym('bar')),
296 294 ... 'foo()': func('foo', None),
297 295 ... '$foo()': func('$foo', None),
298 296 ... 'foo($1, $2)': func('foo', symlist('$1', '$2')),
299 297 ... 'foo(bar_bar, baz.baz)':
300 298 ... func('foo', symlist('bar_bar', 'baz.baz')),
301 299 ... 'foo(bar($1, $2))':
302 300 ... func('foo', func('bar', symlist('$1', '$2'))),
303 301 ... 'foo($1, $2, nested($1, $2))':
304 302 ... func('foo', (symlist('$1', '$2') +
305 303 ... (func('nested', symlist('$1', '$2')),))),
306 304 ... 'foo("bar")': func('foo', ('string', 'bar')),
307 305 ... 'foo($1, $2': error.ParseError('unexpected token: end', 10),
308 306 ... 'foo("bar': error.ParseError('unterminated string', 5),
309 307 ... 'foo($1, $2, $1)': func('foo', symlist('$1', '$2', '$1')),
310 308 ... }
311 309 >>> def parse(expr):
312 310 ... x = parsemap[expr]
313 311 ... if isinstance(x, Exception):
314 312 ... raise x
315 313 ... return x
316 314 >>> def getlist(tree):
317 315 ... if not tree:
318 316 ... return []
319 317 ... if tree[0] == 'list':
320 318 ... return list(tree[1:])
321 319 ... return [tree]
322 320 >>> class aliasrules(basealiasrules):
323 321 ... _parse = staticmethod(parse)
324 322 ... _getlist = staticmethod(getlist)
325 323 >>> builddecl = aliasrules._builddecl
326 324 >>> builddecl('foo')
327 ('foo', ('symbol', 'foo'), None, None)
325 ('foo', None, None)
328 326 >>> builddecl('$foo')
329 ('$foo', None, None, "'$' not for alias arguments")
327 ('$foo', None, "'$' not for alias arguments")
330 328 >>> builddecl('foo::bar')
331 ('foo::bar', None, None, 'invalid format')
329 ('foo::bar', None, 'invalid format')
332 330 >>> builddecl('foo()')
333 ('foo', ('func', ('symbol', 'foo')), [], None)
331 ('foo', [], None)
334 332 >>> builddecl('$foo()')
335 ('$foo()', None, None, "'$' not for alias arguments")
333 ('$foo()', None, "'$' not for alias arguments")
336 334 >>> builddecl('foo($1, $2)')
337 ('foo', ('func', ('symbol', 'foo')), ['$1', '$2'], None)
335 ('foo', ['$1', '$2'], None)
338 336 >>> builddecl('foo(bar_bar, baz.baz)')
339 ('foo', ('func', ('symbol', 'foo')), ['bar_bar', 'baz.baz'], None)
337 ('foo', ['bar_bar', 'baz.baz'], None)
340 338 >>> builddecl('foo($1, $2, nested($1, $2))')
341 ('foo($1, $2, nested($1, $2))', None, None, 'invalid argument list')
339 ('foo($1, $2, nested($1, $2))', None, 'invalid argument list')
342 340 >>> builddecl('foo(bar($1, $2))')
343 ('foo(bar($1, $2))', None, None, 'invalid argument list')
341 ('foo(bar($1, $2))', None, 'invalid argument list')
344 342 >>> builddecl('foo("bar")')
345 ('foo("bar")', None, None, 'invalid argument list')
343 ('foo("bar")', None, 'invalid argument list')
346 344 >>> builddecl('foo($1, $2')
347 ('foo($1, $2', None, None, 'at 10: unexpected token: end')
345 ('foo($1, $2', None, 'at 10: unexpected token: end')
348 346 >>> builddecl('foo("bar')
349 ('foo("bar', None, None, 'at 5: unterminated string')
347 ('foo("bar', None, 'at 5: unterminated string')
350 348 >>> builddecl('foo($1, $2, $1)')
351 ('foo', None, None, 'argument names collide with each other')
349 ('foo', None, 'argument names collide with each other')
352 350 """
353 351 try:
354 352 tree = cls._parse(decl)
355 353 except error.ParseError as inst:
356 return (decl, None, None, parseerrordetail(inst))
354 return (decl, None, parseerrordetail(inst))
357 355
358 356 if tree[0] == cls._symbolnode:
359 357 # "name = ...." style
360 358 name = tree[1]
361 359 if name.startswith('$'):
362 return (decl, None, None, _("'$' not for alias arguments"))
363 return (name, tree, None, None)
360 return (decl, None, _("'$' not for alias arguments"))
361 return (name, None, None)
364 362
365 363 if tree[0] == cls._funcnode and tree[1][0] == cls._symbolnode:
366 364 # "name(arg, ....) = ...." style
367 365 name = tree[1][1]
368 366 if name.startswith('$'):
369 return (decl, None, None, _("'$' not for alias arguments"))
367 return (decl, None, _("'$' not for alias arguments"))
370 368 args = []
371 369 for arg in cls._getlist(tree[2]):
372 370 if arg[0] != cls._symbolnode:
373 return (decl, None, None, _("invalid argument list"))
371 return (decl, None, _("invalid argument list"))
374 372 args.append(arg[1])
375 373 if len(args) != len(set(args)):
376 return (name, None, None,
377 _("argument names collide with each other"))
378 return (name, tree[:2], args, None)
374 return (name, None, _("argument names collide with each other"))
375 return (name, args, None)
379 376
380 return (decl, None, None, _("invalid format"))
377 return (decl, None, _("invalid format"))
381 378
382 379 @classmethod
383 380 def _relabelargs(cls, tree, args):
384 381 """Mark alias arguments as ``_aliasarg``"""
385 382 if not isinstance(tree, tuple):
386 383 return tree
387 384 op = tree[0]
388 385 if op != cls._symbolnode:
389 386 return (op,) + tuple(cls._relabelargs(x, args) for x in tree[1:])
390 387
391 388 assert len(tree) == 2
392 389 sym = tree[1]
393 390 if sym in args:
394 391 op = '_aliasarg'
395 392 elif sym.startswith('$'):
396 393 raise error.ParseError(_("'$' not for alias arguments"))
397 394 return (op, sym)
398 395
399 396 @classmethod
400 397 def _builddefn(cls, defn, args):
401 398 """Parse an alias definition into a tree and marks substitutions
402 399
403 400 This function marks alias argument references as ``_aliasarg``. The
404 401 parsing rule is provided by ``_parse()``.
405 402
406 403 ``args`` is a list of alias argument names, or None if the alias
407 404 is declared as a symbol.
408 405
409 406 >>> parsemap = {
410 407 ... '$1 or foo': ('or', ('symbol', '$1'), ('symbol', 'foo')),
411 408 ... '$1 or $bar': ('or', ('symbol', '$1'), ('symbol', '$bar')),
412 409 ... '$10 or baz': ('or', ('symbol', '$10'), ('symbol', 'baz')),
413 410 ... '"$1" or "foo"': ('or', ('string', '$1'), ('string', 'foo')),
414 411 ... }
415 412 >>> class aliasrules(basealiasrules):
416 413 ... _parse = staticmethod(parsemap.__getitem__)
417 414 ... _getlist = staticmethod(lambda x: [])
418 415 >>> builddefn = aliasrules._builddefn
419 416 >>> def pprint(tree):
420 417 ... print prettyformat(tree, ('_aliasarg', 'string', 'symbol'))
421 418 >>> args = ['$1', '$2', 'foo']
422 419 >>> pprint(builddefn('$1 or foo', args))
423 420 (or
424 421 ('_aliasarg', '$1')
425 422 ('_aliasarg', 'foo'))
426 423 >>> try:
427 424 ... builddefn('$1 or $bar', args)
428 425 ... except error.ParseError as inst:
429 426 ... print parseerrordetail(inst)
430 427 '$' not for alias arguments
431 428 >>> args = ['$1', '$10', 'foo']
432 429 >>> pprint(builddefn('$10 or baz', args))
433 430 (or
434 431 ('_aliasarg', '$10')
435 432 ('symbol', 'baz'))
436 433 >>> pprint(builddefn('"$1" or "foo"', args))
437 434 (or
438 435 ('string', '$1')
439 436 ('string', 'foo'))
440 437 """
441 438 tree = cls._parse(defn)
442 439 if args:
443 440 args = set(args)
444 441 else:
445 442 args = set()
446 443 return cls._relabelargs(tree, args)
447 444
448 445 @classmethod
449 446 def build(cls, decl, defn):
450 447 """Parse an alias declaration and definition into an alias object"""
451 448 repl = efmt = None
452 name, tree, args, err = cls._builddecl(decl)
449 name, args, err = cls._builddecl(decl)
453 450 if err:
454 451 efmt = _('failed to parse the declaration of %(section)s '
455 452 '"%(name)s": %(error)s')
456 453 else:
457 454 try:
458 455 repl = cls._builddefn(defn, args)
459 456 except error.ParseError as inst:
460 457 err = parseerrordetail(inst)
461 458 efmt = _('failed to parse the definition of %(section)s '
462 459 '"%(name)s": %(error)s')
463 460 if err:
464 461 err = efmt % {'section': cls._section, 'name': name, 'error': err}
465 return alias(name, tree, args, err, repl)
462 return alias(name, args, err, repl)
466 463
467 464 @classmethod
468 465 def buildmap(cls, items):
469 466 """Parse a list of alias (name, replacement) pairs into a dict of
470 467 alias objects"""
471 468 aliases = {}
472 469 for decl, defn in items:
473 470 a = cls.build(decl, defn)
474 471 aliases[a.name] = a
475 472 return aliases
476 473
477 474 @classmethod
478 475 def _getalias(cls, aliases, tree):
479 476 """If tree looks like an unexpanded alias, return it. Return None
480 477 otherwise.
481 478 """
482 479 if not isinstance(tree, tuple):
483 480 return None
484 481 if tree[0] == cls._symbolnode:
485 482 name = tree[1]
486 483 a = aliases.get(name)
487 if a and a.args is None and a.tree == tree:
484 if a and a.args is None:
488 485 return a
489 486 if tree[0] == cls._funcnode and tree[1][0] == cls._symbolnode:
490 487 name = tree[1][1]
491 488 a = aliases.get(name)
492 if a and a.args is not None and a.tree == tree[:2]:
489 if a and a.args is not None:
493 490 return a
494 491 return None
495 492
496 493 @classmethod
497 494 def _expandargs(cls, tree, args):
498 495 """Replace _aliasarg instances with the substitution value of the
499 496 same name in args, recursively.
500 497 """
501 498 if not isinstance(tree, tuple):
502 499 return tree
503 500 if tree[0] == '_aliasarg':
504 501 sym = tree[1]
505 502 return args[sym]
506 503 return tuple(cls._expandargs(t, args) for t in tree)
507 504
508 505 @classmethod
509 506 def _expand(cls, aliases, tree, expanding, cache):
510 507 if not isinstance(tree, tuple):
511 508 return tree
512 509 a = cls._getalias(aliases, tree)
513 510 if a is None:
514 511 return tuple(cls._expand(aliases, t, expanding, cache)
515 512 for t in tree)
516 513 if a.error:
517 514 raise error.Abort(a.error)
518 515 if a in expanding:
519 516 raise error.ParseError(_('infinite expansion of %(section)s '
520 517 '"%(name)s" detected')
521 518 % {'section': cls._section, 'name': a.name})
522 519 # get cacheable replacement tree by expanding aliases recursively
523 520 expanding.append(a)
524 521 if a.name not in cache:
525 522 cache[a.name] = cls._expand(aliases, a.replacement, expanding,
526 523 cache)
527 524 result = cache[a.name]
528 525 expanding.pop()
529 526 if a.args is None:
530 527 return result
531 528 # substitute function arguments in replacement tree
532 529 l = cls._getlist(tree[2])
533 530 if len(l) != len(a.args):
534 531 raise error.ParseError(_('invalid number of arguments: %d')
535 532 % len(l))
536 533 l = [cls._expand(aliases, t, [], cache) for t in l]
537 534 return cls._expandargs(result, dict(zip(a.args, l)))
538 535
539 536 @classmethod
540 537 def expand(cls, aliases, tree):
541 538 """Expand aliases in tree, recursively.
542 539
543 540 'aliases' is a dictionary mapping user defined aliases to alias objects.
544 541 """
545 542 return cls._expand(aliases, tree, [], {})
General Comments 0
You need to be logged in to leave comments. Login now