upstream/mercurial-mirror Commit - r25306:c87b0592

parser: add helper to reduce nesting of chained infix operations...

Yuya Nishihara -

r25306:c87b0592 default

parent child

mercurial/parser.py

0 +77 0

              # parser.py - simple top-down operator precedence parser for mercurial
              #
              # Copyright 2010 Matt Mackall <mpm@selenic.com>
              #
              # This software may be used and distributed according to the terms of the
              # GNU General Public License version 2 or any later version.
              # see http://effbot.org/zone/simple-top-down-parsing.htm and
              # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
              # for background
              # takes a tokenizer and elements
              # tokenizer is an iterator that returns type, value pairs
              # elements is a mapping of types to binding strength, prefix and infix actions
              # an action is a tree node name, a tree label, and an optional match
              # __call__(program) parses program into a labeled tree
              import error
              from i18n import _
              class parser(object):
                  def __init__(self, tokenizer, elements, methods=None):
                      self._tokenizer = tokenizer
                      self._elements = elements
                      self._methods = methods
                      self.current = None
                  def _advance(self):
                      'advance the tokenizer'
                      t = self.current
                      self.current = next(self._iter, None)
                      return t
                  def _match(self, m, pos):
                      'make sure the tokenizer matches an end condition'
                      if self.current[0] != m:
                          raise error.ParseError(_("unexpected token: %s") % self.current[0],
                                                 self.current[2])
                      self._advance()
                  def _parse(self, bind=0):
                      token, value, pos = self._advance()
                      # handle prefix rules on current token
                      prefix = self._elements[token][1]
                      if not prefix:
                          raise error.ParseError(_("not a prefix: %s") % token, pos)
                      if len(prefix) == 1:
                          expr = (prefix[0], value)
                      else:
                          if len(prefix) > 2 and prefix[2] == self.current[0]:
                              self._match(prefix[2], pos)
                              expr = (prefix[0], None)
                          else:
                              expr = (prefix[0], self._parse(prefix[1]))
                              if len(prefix) > 2:
                                  self._match(prefix[2], pos)
                      # gather tokens until we meet a lower binding strength
                      while bind < self._elements[self.current[0]][0]:
                          token, value, pos = self._advance()
                          e = self._elements[token]
                          # check for suffix - next token isn't a valid prefix
                          if len(e) == 4 and not self._elements[self.current[0]][1]:
                              suffix = e[3]
                              expr = (suffix[0], expr)
                          else:
                              # handle infix rules
                              if len(e) < 3 or not e[2]:
                                  raise error.ParseError(_("not an infix: %s") % token, pos)
                              infix = e[2]
                              if len(infix) == 3 and infix[2] == self.current[0]:
                                  self._match(infix[2], pos)
                                  expr = (infix[0], expr, (None))
                              else:
                                  expr = (infix[0], expr, self._parse(infix[1]))
                                  if len(infix) == 3:
                                      self._match(infix[2], pos)
                      return expr
                  def parse(self, message, lookup=None):
                      'generate a parse tree from a message'
                      if lookup:
                          self._iter = self._tokenizer(message, lookup)
                      else:
                          self._iter = self._tokenizer(message)
                      self._advance()
                      res = self._parse()
                      token, value, pos = self.current
                      return res, pos
                  def eval(self, tree):
                      'recursively evaluate a parse tree using node methods'
                      if not isinstance(tree, tuple):
                          return tree
                      return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
                  def __call__(self, message):
                      'parse a message into a parse tree and evaluate if methods given'
                      t = self.parse(message)
                      if self._methods:
                          return self.eval(t)
                      return t
              def _prettyformat(tree, leafnodes, level, lines):
                  if not isinstance(tree, tuple) or tree[0] in leafnodes:
                      lines.append((level, str(tree)))
                  else:
                      lines.append((level, '(%s' % tree[0]))
                      for s in tree[1:]:
                          _prettyformat(s, leafnodes, level + 1, lines)
                      lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')]
              def prettyformat(tree, leafnodes):
                  lines = []
                  _prettyformat(tree, leafnodes, 0, lines)
                  output = '\n'.join(('  ' * l + s) for l, s in lines)
                  return output
+             def simplifyinfixops(tree, targetnodes):
+                 """Flatten chained infix operations to reduce usage of Python stack
+                 >>> def f(tree):
+                 ...     print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',))
+                 >>> f(('or',
+                 ...     ('or',
+                 ...       ('symbol', '1'),
+                 ...       ('symbol', '2')),
+                 ...     ('symbol', '3')))
+                 (or
+                   ('symbol', '1')
+                   ('symbol', '2')
+                   ('symbol', '3'))
+                 >>> f(('func',
+                 ...     ('symbol', 'p1'),
+                 ...     ('or',
+                 ...       ('or',
+                 ...         ('func',
+                 ...           ('symbol', 'sort'),
+                 ...           ('list',
+                 ...             ('or',
+                 ...               ('or',
+                 ...                 ('symbol', '1'),
+                 ...                 ('symbol', '2')),
+                 ...               ('symbol', '3')),
+                 ...             ('negate',
+                 ...               ('symbol', 'rev')))),
+                 ...         ('and',
+                 ...           ('symbol', '4'),
+                 ...           ('group',
+                 ...             ('or',
+                 ...               ('or',
+                 ...                 ('symbol', '5'),
+                 ...                 ('symbol', '6')),
+                 ...               ('symbol', '7'))))),
+                 ...       ('symbol', '8'))))
+                 (func
+                   ('symbol', 'p1')
+                   (or
+                     (func
+                       ('symbol', 'sort')
+                       (list
+                         (or
+                           ('symbol', '1')
+                           ('symbol', '2')
+                           ('symbol', '3'))
+                         (negate
+                           ('symbol', 'rev'))))
+                     (and
+                       ('symbol', '4')
+                       (group
+                         (or
+                           ('symbol', '5')
+                           ('symbol', '6')
+                           ('symbol', '7'))))
+                     ('symbol', '8')))
+                 """
+                 if not isinstance(tree, tuple):
+                     return tree
+                 op = tree[0]
+                 if op not in targetnodes:
+                     return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:])
+                 # walk down left nodes taking each right node. no recursion to left nodes
+                 # because infix operators are left-associative, i.e. left tree is deep.
+                 # e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3)
+                 simplified = []
+                 x = tree
+                 while x[0] == op:
+                     l, r = x[1:]
+                     simplified.append(simplifyinfixops(r, targetnodes))
+                     x = l
+                 simplified.append(simplifyinfixops(x, targetnodes))
+                 simplified.append(op)
+                 return tuple(reversed(simplified))

tests/test-doctest.py

0 +1 0

              # this is hack to make sure no escape characters are inserted into the output
              import os, sys
              if 'TERM' in os.environ:
                  del os.environ['TERM']
              import doctest
              def testmod(name, optionflags=0, testtarget=None):
                  __import__(name)
                  mod = sys.modules[name]
                  if testtarget is not None:
                      mod = getattr(mod, testtarget)
                  doctest.testmod(mod, optionflags=optionflags)
              testmod('mercurial.changelog')
              testmod('mercurial.dagparser', optionflags=doctest.NORMALIZE_WHITESPACE)
              testmod('mercurial.dispatch')
              testmod('mercurial.encoding')
              testmod('mercurial.hg')
              testmod('mercurial.hgweb.hgwebdir_mod')
              testmod('mercurial.match')
              testmod('mercurial.minirst')
              testmod('mercurial.patch')
              testmod('mercurial.pathutil')
+             testmod('mercurial.parser')
              testmod('mercurial.revset')
              testmod('mercurial.store')
              testmod('mercurial.subrepo')
              testmod('mercurial.templatefilters')
              testmod('mercurial.ui')
              testmod('mercurial.url')
              testmod('mercurial.util')
              testmod('mercurial.util', testtarget='platform')
              testmod('hgext.convert.cvsps')
              testmod('hgext.convert.filemap')
              testmod('hgext.convert.subversion')
              testmod('hgext.mq')

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages