upstream/mercurial-mirror Commit - r25306:c87b0592

parser: add helper to reduce nesting of chained infix operations...

Yuya Nishihara -

r25306:c87b0592 default

parent child

mercurial/parser.py

0 +77 0

             # parser.py - simple top-down operator precedence parser for mercurial
             #
             # Copyright 2010 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             # see http://effbot.org/zone/simple-top-down-parsing.htm and
             # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
             # for background
             # takes a tokenizer and elements
             # tokenizer is an iterator that returns type, value pairs
             # elements is a mapping of types to binding strength, prefix and infix actions
             # an action is a tree node name, a tree label, and an optional match
             # __call__(program) parses program into a labeled tree
             import error
             from i18n import _
             class parser(object):
                 def __init__(self, tokenizer, elements, methods=None):
                     self._tokenizer = tokenizer
                     self._elements = elements
                     self._methods = methods
                     self.current = None
                 def _advance(self):
                     'advance the tokenizer'
                     t = self.current
                     self.current = next(self._iter, None)
                     return t
                 def _match(self, m, pos):
                     'make sure the tokenizer matches an end condition'
                     if self.current[0] != m:
                         raise error.ParseError(_("unexpected token: %s") % self.current[0],
                                                self.current[2])
                     self._advance()
                 def _parse(self, bind=0):
                     token, value, pos = self._advance()
                     # handle prefix rules on current token
                     prefix = self._elements[token][1]
                     if not prefix:
                         raise error.ParseError(_("not a prefix: %s") % token, pos)
                     if len(prefix) == 1:
                         expr = (prefix[0], value)
                     else:
                         if len(prefix) > 2 and prefix[2] == self.current[0]:
                             self._match(prefix[2], pos)
                             expr = (prefix[0], None)
                         else:
                             expr = (prefix[0], self._parse(prefix[1]))
                             if len(prefix) > 2:
                                 self._match(prefix[2], pos)
                     # gather tokens until we meet a lower binding strength
                     while bind < self._elements[self.current[0]][0]:
                         token, value, pos = self._advance()
                         e = self._elements[token]
                         # check for suffix - next token isn't a valid prefix
                         if len(e) == 4 and not self._elements[self.current[0]][1]:
                             suffix = e[3]
                             expr = (suffix[0], expr)
                         else:
                             # handle infix rules
                             if len(e) < 3 or not e[2]:
                                 raise error.ParseError(_("not an infix: %s") % token, pos)
                             infix = e[2]
                             if len(infix) == 3 and infix[2] == self.current[0]:
                                 self._match(infix[2], pos)
                                 expr = (infix[0], expr, (None))
                             else:
                                 expr = (infix[0], expr, self._parse(infix[1]))
                                 if len(infix) == 3:
                                     self._match(infix[2], pos)
                     return expr
                 def parse(self, message, lookup=None):
                     'generate a parse tree from a message'
                     if lookup:
                         self._iter = self._tokenizer(message, lookup)
                     else:
                         self._iter = self._tokenizer(message)
                     self._advance()
                     res = self._parse()
                     token, value, pos = self.current
                     return res, pos
                 def eval(self, tree):
                     'recursively evaluate a parse tree using node methods'
                     if not isinstance(tree, tuple):
                         return tree
                     return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
                 def __call__(self, message):
                     'parse a message into a parse tree and evaluate if methods given'
                     t = self.parse(message)
                     if self._methods:
                         return self.eval(t)
                     return t
             def _prettyformat(tree, leafnodes, level, lines):
                 if not isinstance(tree, tuple) or tree[0] in leafnodes:
                     lines.append((level, str(tree)))
                 else:
                     lines.append((level, '(%s' % tree[0]))
                     for s in tree[1:]:
                         _prettyformat(s, leafnodes, level + 1, lines)
                     lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')]
             def prettyformat(tree, leafnodes):
                 lines = []
                 _prettyformat(tree, leafnodes, 0, lines)
                 output = '\n'.join(('  ' * l + s) for l, s in lines)
                 return output
+            def simplifyinfixops(tree, targetnodes):
+                """Flatten chained infix operations to reduce usage of Python stack
+                >>> def f(tree):
+                ...     print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',))
+                >>> f(('or',
+                ...     ('or',
+                ...       ('symbol', '1'),
+                ...       ('symbol', '2')),
+                ...     ('symbol', '3')))
+                (or
+                  ('symbol', '1')
+                  ('symbol', '2')
+                  ('symbol', '3'))
+                >>> f(('func',
+                ...     ('symbol', 'p1'),
+                ...     ('or',
+                ...       ('or',
+                ...         ('func',
+                ...           ('symbol', 'sort'),
+                ...           ('list',
+                ...             ('or',
+                ...               ('or',
+                ...                 ('symbol', '1'),
+                ...                 ('symbol', '2')),
+                ...               ('symbol', '3')),
+                ...             ('negate',
+                ...               ('symbol', 'rev')))),
+                ...         ('and',
+                ...           ('symbol', '4'),
+                ...           ('group',
+                ...             ('or',
+                ...               ('or',
+                ...                 ('symbol', '5'),
+                ...                 ('symbol', '6')),
+                ...               ('symbol', '7'))))),
+                ...       ('symbol', '8'))))
+                (func
+                  ('symbol', 'p1')
+                  (or
+                    (func
+                      ('symbol', 'sort')
+                      (list
+                        (or
+                          ('symbol', '1')
+                          ('symbol', '2')
+                          ('symbol', '3'))
+                        (negate
+                          ('symbol', 'rev'))))
+                    (and
+                      ('symbol', '4')
+                      (group
+                        (or
+                          ('symbol', '5')
+                          ('symbol', '6')
+                          ('symbol', '7'))))
+                    ('symbol', '8')))
+                """
+                if not isinstance(tree, tuple):
+                    return tree
+                op = tree[0]
+                if op not in targetnodes:
+                    return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:])
+                # walk down left nodes taking each right node. no recursion to left nodes
+                # because infix operators are left-associative, i.e. left tree is deep.
+                # e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3)
+                simplified = []
+                x = tree
+                while x[0] == op:
+                    l, r = x[1:]
+                    simplified.append(simplifyinfixops(r, targetnodes))
+                    x = l
+                simplified.append(simplifyinfixops(x, targetnodes))
+                simplified.append(op)
+                return tuple(reversed(simplified))

tests/test-doctest.py

0 +1 0

             # this is hack to make sure no escape characters are inserted into the output
             import os, sys
             if 'TERM' in os.environ:
                 del os.environ['TERM']
             import doctest
             def testmod(name, optionflags=0, testtarget=None):
                 __import__(name)
                 mod = sys.modules[name]
                 if testtarget is not None:
                     mod = getattr(mod, testtarget)
                 doctest.testmod(mod, optionflags=optionflags)
             testmod('mercurial.changelog')
             testmod('mercurial.dagparser', optionflags=doctest.NORMALIZE_WHITESPACE)
             testmod('mercurial.dispatch')
             testmod('mercurial.encoding')
             testmod('mercurial.hg')
             testmod('mercurial.hgweb.hgwebdir_mod')
             testmod('mercurial.match')
             testmod('mercurial.minirst')
             testmod('mercurial.patch')
             testmod('mercurial.pathutil')
+            testmod('mercurial.parser')
             testmod('mercurial.revset')
             testmod('mercurial.store')
             testmod('mercurial.subrepo')
             testmod('mercurial.templatefilters')
             testmod('mercurial.ui')
             testmod('mercurial.url')
             testmod('mercurial.util')
             testmod('mercurial.util', testtarget='platform')
             testmod('hgext.convert.cvsps')
             testmod('hgext.convert.filemap')
             testmod('hgext.convert.subversion')
             testmod('hgext.mq')

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages