upstream/mercurial-mirror Files · mercurial/parser.py

convert: add config option for disabling ancestor parent checks...

convert: add config option for disabling ancestor parent checks When converting merge commits, convert checks if any of the parents are ancestors of any of the other parents. To do this, it builds an ancestor list for every commit in the repository. On large repos this can take a long time (30min+). Let's add an option for disabling this check to preserve performance. The downside of this is that it may create unnecessary parent connections when enabled (which is unfortunate, but not incorrect). To verify, I ran the convert tests with the flag enabled, and verified the graph changes were all just to add new parents that were ancestors of existing parents.

Yuya Nishihara - - Load All Authors

File last commit:

r25705:48919d24 default


                r25742:d859123e

default

Download file

             parser.py
        
                    216 lines
            
             | 7.6 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / mercurial / parser.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
      # parser.py - simple top-down operator precedence parser for mercurial

      #

      # Copyright 2010 Matt Mackall <mpm@selenic.com>

      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

        Julian Cowley
    
parser: fix URL to effbot

              r11449
            
      # see http://effbot.org/zone/simple-top-down-parsing.htm and

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
      # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/

      # for background

      # takes a tokenizer and elements

        Yuya Nishihara
    
parser: update documentation about tokenizer and elements

              r25655
            
      # tokenizer is an iterator that returns (type, value, pos) tuples

      # elements is a mapping of types to binding strength, prefix, infix and

      # optional suffix actions

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
      # an action is a tree node name, a tree label, and an optional match

        timeless@mozdev.org
    
en-us: labeled

              r17500
            
      # __call__(program) parses program into a labeled tree

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
        Matt Mackall
    
revset: raise ParseError exceptions

              r11289
            
      import error

        Mads Kiilerich
    
parsers: fix localization markup of parser errors

              r14701
            
      from i18n import _

        Matt Mackall
    
revset: raise ParseError exceptions

              r11289
            
        Matt Mackall
    
revset: introduce basic parser

              r11274
            
      class parser(object):

        Yuya Nishihara
    
parser: accept iterator of tokens instead of tokenizer function and program...

              r25654
            
          def __init__(self, elements, methods=None):

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
              self._elements = elements

              self._methods = methods

        Matt Mackall
    
templater: use the parser.py parser to extend the templater syntax

              r13176
            
              self.current = None

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
          def _advance(self):

              'advance the tokenizer'

              t = self.current

        Pierre-Yves David
    
parsers: use 'next' instead of try/except...

              r25171
            
              self.current = next(self._iter, None)

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
              return t

        Peter Arrenbrecht
    
parser: fix missing param in _match

              r11319
            
          def _match(self, m, pos):

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
              'make sure the tokenizer matches an end condition'

              if self.current[0] != m:

        Mads Kiilerich
    
parsers: fix localization markup of parser errors

              r14701
            
                  raise error.ParseError(_("unexpected token: %s") % self.current[0],

        Dirkjan Ochtman
    
cleanups: undefined variables

              r11305
            
                                         self.current[2])

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
              self._advance()

          def _parse(self, bind=0):

        Matt Mackall
    
revset: raise ParseError exceptions

              r11289
            
              token, value, pos = self._advance()

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
              # handle prefix rules on current token

              prefix = self._elements[token][1]

              if not prefix:

        Mads Kiilerich
    
parsers: fix localization markup of parser errors

              r14701
            
                  raise error.ParseError(_("not a prefix: %s") % token, pos)

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
              if len(prefix) == 1:

                  expr = (prefix[0], value)

              else:

                  if len(prefix) > 2 and prefix[2] == self.current[0]:

        Peter Arrenbrecht
    
parser: fix missing param in _match

              r11319
            
                      self._match(prefix[2], pos)

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
                      expr = (prefix[0], None)

                  else:

                      expr = (prefix[0], self._parse(prefix[1]))

                      if len(prefix) > 2:

        Peter Arrenbrecht
    
parser: fix missing param in _match

              r11319
            
                          self._match(prefix[2], pos)

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
              # gather tokens until we meet a lower binding strength

              while bind < self._elements[self.current[0]][0]:

        Matt Mackall
    
revset: raise ParseError exceptions

              r11289
            
                  token, value, pos = self._advance()

        Matt Mackall
    
revset: add support for prefix and suffix versions of : and ::

              r11278
            
                  e = self._elements[token]

                  # check for suffix - next token isn't a valid prefix

                  if len(e) == 4 and not self._elements[self.current[0]][1]:

                      suffix = e[3]

                      expr = (suffix[0], expr)

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
                  else:

        Matt Mackall
    
revset: add support for prefix and suffix versions of : and ::

              r11278
            
                      # handle infix rules

        Matt Mackall
    
parser: improve infix error checking...

              r11412
            
                      if len(e) < 3 or not e[2]:

        Mads Kiilerich
    
parsers: fix localization markup of parser errors

              r14701
            
                          raise error.ParseError(_("not an infix: %s") % token, pos)

        Matt Mackall
    
parser: improve infix error checking...

              r11412
            
                      infix = e[2]

        Matt Mackall
    
revset: add support for prefix and suffix versions of : and ::

              r11278
            
                      if len(infix) == 3 and infix[2] == self.current[0]:

        Peter Arrenbrecht
    
parser: fix missing param in _match

              r11319
            
                          self._match(infix[2], pos)

        Matt Mackall
    
revset: add support for prefix and suffix versions of : and ::

              r11278
            
                          expr = (infix[0], expr, (None))

                      else:

                          expr = (infix[0], expr, self._parse(infix[1]))

                          if len(infix) == 3:

        Peter Arrenbrecht
    
parser: fix missing param in _match

              r11319
            
                              self._match(infix[2], pos)

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
              return expr

        Yuya Nishihara
    
parser: accept iterator of tokens instead of tokenizer function and program...

              r25654
            
          def parse(self, tokeniter):

              'generate a parse tree from tokens'

              self._iter = tokeniter

        Matt Mackall
    
templater: use the parser.py parser to extend the templater syntax

              r13176
            
              self._advance()

        Bernhard Leiner
    
revset: report a parse error if a revset is not parsed completely (issue2654)

              r13665
            
              res = self._parse()

              token, value, pos = self.current

              return res, pos

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
          def eval(self, tree):

              'recursively evaluate a parse tree using node methods'

              if not isinstance(tree, tuple):

                  return tree

              return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])

        Yuya Nishihara
    
parser: accept iterator of tokens instead of tokenizer function and program...

              r25654
            
          def __call__(self, tokeniter):

              'parse tokens into a parse tree and evaluate if methods given'

              t = self.parse(tokeniter)

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
              if self._methods:

                  return self.eval(t)

              return t

        Yuya Nishihara
    
parser: move prettyformat() function from revset module...

              r25253
            
        Yuya Nishihara
    
revset: add function to build dict of positional and keyword arguments...

              r25705
            
      def buildargsdict(trees, funcname, keys, keyvaluenode, keynode):

          """Build dict from list containing positional and keyword arguments

          Invalid keywords or too many positional arguments are rejected, but

          missing arguments are just omitted.

          """

          if len(trees) > len(keys):

              raise error.ParseError(_("%(func)s takes at most %(nargs)d arguments")

                                     % {'func': funcname, 'nargs': len(keys)})

          args = {}

          # consume positional arguments

          for k, x in zip(keys, trees):

              if x[0] == keyvaluenode:

                  break

              args[k] = x

          # remainder should be keyword arguments

          for x in trees[len(args):]:

              if x[0] != keyvaluenode or x[1][0] != keynode:

                  raise error.ParseError(_("%(func)s got an invalid argument")

                                         % {'func': funcname})

              k = x[1][1]

              if k not in keys:

                  raise error.ParseError(_("%(func)s got an unexpected keyword "

                                           "argument '%(key)s'")

                                         % {'func': funcname, 'key': k})

              if k in args:

                  raise error.ParseError(_("%(func)s got multiple values for keyword "

                                           "argument '%(key)s'")

                                         % {'func': funcname, 'key': k})

              args[k] = x[2]

          return args

        Yuya Nishihara
    
parser: extract closure of prettyformat() to a top-level function...

              r25254
            
      def _prettyformat(tree, leafnodes, level, lines):

          if not isinstance(tree, tuple) or tree[0] in leafnodes:

              lines.append((level, str(tree)))

          else:

              lines.append((level, '(%s' % tree[0]))

              for s in tree[1:]:

                  _prettyformat(s, leafnodes, level + 1, lines)

              lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')]

        Yuya Nishihara
    
parser: move prettyformat() function from revset module...

              r25253
            
      def prettyformat(tree, leafnodes):

          lines = []

        Yuya Nishihara
    
parser: extract closure of prettyformat() to a top-level function...

              r25254
            
          _prettyformat(tree, leafnodes, 0, lines)

        Yuya Nishihara
    
parser: move prettyformat() function from revset module...

              r25253
            
          output = '\n'.join(('  ' * l + s) for l, s in lines)

          return output

        Yuya Nishihara
    
parser: add helper to reduce nesting of chained infix operations...

              r25306
            
      def simplifyinfixops(tree, targetnodes):

          """Flatten chained infix operations to reduce usage of Python stack

          >>> def f(tree):

          ...     print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',))

          >>> f(('or',

          ...     ('or',

          ...       ('symbol', '1'),

          ...       ('symbol', '2')),

          ...     ('symbol', '3')))

          (or

            ('symbol', '1')

            ('symbol', '2')

            ('symbol', '3'))

          >>> f(('func',

          ...     ('symbol', 'p1'),

          ...     ('or',

          ...       ('or',

          ...         ('func',

          ...           ('symbol', 'sort'),

          ...           ('list',

          ...             ('or',

          ...               ('or',

          ...                 ('symbol', '1'),

          ...                 ('symbol', '2')),

          ...               ('symbol', '3')),

          ...             ('negate',

          ...               ('symbol', 'rev')))),

          ...         ('and',

          ...           ('symbol', '4'),

          ...           ('group',

          ...             ('or',

          ...               ('or',

          ...                 ('symbol', '5'),

          ...                 ('symbol', '6')),

          ...               ('symbol', '7'))))),

          ...       ('symbol', '8'))))

          (func

            ('symbol', 'p1')

            (or

              (func

                ('symbol', 'sort')

                (list

                  (or

                    ('symbol', '1')

                    ('symbol', '2')

                    ('symbol', '3'))

                  (negate

                    ('symbol', 'rev'))))

              (and

                ('symbol', '4')

                (group

                  (or

                    ('symbol', '5')

                    ('symbol', '6')

                    ('symbol', '7'))))

              ('symbol', '8')))

          """

          if not isinstance(tree, tuple):

              return tree

          op = tree[0]

          if op not in targetnodes:

              return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:])

          # walk down left nodes taking each right node. no recursion to left nodes

          # because infix operators are left-associative, i.e. left tree is deep.

          # e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3)

          simplified = []

          x = tree

          while x[0] == op:

              l, r = x[1:]

              simplified.append(simplifyinfixops(r, targetnodes))

              x = l

          simplified.append(simplifyinfixops(x, targetnodes))

          simplified.append(op)

          return tuple(reversed(simplified))

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

Matt Mackall revset: introduce basic parser	r11274	# parser.py - simple top-down operator precedence parser for mercurial
		#
		# Copyright 2010 Matt Mackall <mpm@selenic.com>
		#
		# This software may be used and distributed according to the terms of the
		# GNU General Public License version 2 or any later version.

Julian Cowley parser: fix URL to effbot	r11449	# see http://effbot.org/zone/simple-top-down-parsing.htm and
Matt Mackall revset: introduce basic parser	r11274	# http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
		# for background

		# takes a tokenizer and elements
Yuya Nishihara parser: update documentation about tokenizer and elements	r25655	# tokenizer is an iterator that returns (type, value, pos) tuples
		# elements is a mapping of types to binding strength, prefix, infix and
		# optional suffix actions
Matt Mackall revset: introduce basic parser	r11274	# an action is a tree node name, a tree label, and an optional match
timeless@mozdev.org en-us: labeled	r17500	# __call__(program) parses program into a labeled tree
Matt Mackall revset: introduce basic parser	r11274
Matt Mackall revset: raise ParseError exceptions	r11289	import error
Mads Kiilerich parsers: fix localization markup of parser errors	r14701	from i18n import _
Matt Mackall revset: raise ParseError exceptions	r11289
Matt Mackall revset: introduce basic parser	r11274	class parser(object):
Yuya Nishihara parser: accept iterator of tokens instead of tokenizer function and program...	r25654	def __init__(self, elements, methods=None):
Matt Mackall revset: introduce basic parser	r11274	self._elements = elements
		self._methods = methods
Matt Mackall templater: use the parser.py parser to extend the templater syntax	r13176	self.current = None
Matt Mackall revset: introduce basic parser	r11274	def _advance(self):
		'advance the tokenizer'
		t = self.current
Pierre-Yves David parsers: use 'next' instead of try/except...	r25171	self.current = next(self._iter, None)
Matt Mackall revset: introduce basic parser	r11274	return t
Peter Arrenbrecht parser: fix missing param in _match	r11319	def _match(self, m, pos):
Matt Mackall revset: introduce basic parser	r11274	'make sure the tokenizer matches an end condition'
		if self.current[0] != m:
Mads Kiilerich parsers: fix localization markup of parser errors	r14701	raise error.ParseError(_("unexpected token: %s") % self.current[0],
Dirkjan Ochtman cleanups: undefined variables	r11305	self.current[2])
Matt Mackall revset: introduce basic parser	r11274	self._advance()
		def _parse(self, bind=0):
Matt Mackall revset: raise ParseError exceptions	r11289	token, value, pos = self._advance()
Matt Mackall revset: introduce basic parser	r11274	# handle prefix rules on current token
		prefix = self._elements[token][1]
		if not prefix:
Mads Kiilerich parsers: fix localization markup of parser errors	r14701	raise error.ParseError(_("not a prefix: %s") % token, pos)
Matt Mackall revset: introduce basic parser	r11274	if len(prefix) == 1:
		expr = (prefix[0], value)
		else:
		if len(prefix) > 2 and prefix[2] == self.current[0]:
Peter Arrenbrecht parser: fix missing param in _match	r11319	self._match(prefix[2], pos)
Matt Mackall revset: introduce basic parser	r11274	expr = (prefix[0], None)
		else:
		expr = (prefix[0], self._parse(prefix[1]))
		if len(prefix) > 2:
Peter Arrenbrecht parser: fix missing param in _match	r11319	self._match(prefix[2], pos)
Matt Mackall revset: introduce basic parser	r11274	# gather tokens until we meet a lower binding strength
		while bind < self._elements[self.current[0]][0]:
Matt Mackall revset: raise ParseError exceptions	r11289	token, value, pos = self._advance()
Matt Mackall revset: add support for prefix and suffix versions of : and ::	r11278	e = self._elements[token]
		# check for suffix - next token isn't a valid prefix
		if len(e) == 4 and not self._elements[self.current[0]][1]:
		suffix = e[3]
		expr = (suffix[0], expr)
Matt Mackall revset: introduce basic parser	r11274	else:
Matt Mackall revset: add support for prefix and suffix versions of : and ::	r11278	# handle infix rules
Matt Mackall parser: improve infix error checking...	r11412	if len(e) < 3 or not e[2]:
Mads Kiilerich parsers: fix localization markup of parser errors	r14701	raise error.ParseError(_("not an infix: %s") % token, pos)
Matt Mackall parser: improve infix error checking...	r11412	infix = e[2]
Matt Mackall revset: add support for prefix and suffix versions of : and ::	r11278	if len(infix) == 3 and infix[2] == self.current[0]:
Peter Arrenbrecht parser: fix missing param in _match	r11319	self._match(infix[2], pos)
Matt Mackall revset: add support for prefix and suffix versions of : and ::	r11278	expr = (infix[0], expr, (None))
		else:
		expr = (infix[0], expr, self._parse(infix[1]))
		if len(infix) == 3:
Peter Arrenbrecht parser: fix missing param in _match	r11319	self._match(infix[2], pos)
Matt Mackall revset: introduce basic parser	r11274	return expr
Yuya Nishihara parser: accept iterator of tokens instead of tokenizer function and program...	r25654	def parse(self, tokeniter):
		'generate a parse tree from tokens'
		self._iter = tokeniter
Matt Mackall templater: use the parser.py parser to extend the templater syntax	r13176	self._advance()
Bernhard Leiner revset: report a parse error if a revset is not parsed completely (issue2654)	r13665	res = self._parse()
		token, value, pos = self.current
		return res, pos
Matt Mackall revset: introduce basic parser	r11274	def eval(self, tree):
		'recursively evaluate a parse tree using node methods'
		if not isinstance(tree, tuple):
		return tree
		return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
Yuya Nishihara parser: accept iterator of tokens instead of tokenizer function and program...	r25654	def __call__(self, tokeniter):
		'parse tokens into a parse tree and evaluate if methods given'
		t = self.parse(tokeniter)
Matt Mackall revset: introduce basic parser	r11274	if self._methods:
		return self.eval(t)
		return t
Yuya Nishihara parser: move prettyformat() function from revset module...	r25253
Yuya Nishihara revset: add function to build dict of positional and keyword arguments...	r25705	def buildargsdict(trees, funcname, keys, keyvaluenode, keynode):
		"""Build dict from list containing positional and keyword arguments

		Invalid keywords or too many positional arguments are rejected, but
		missing arguments are just omitted.
		"""
		if len(trees) > len(keys):
		raise error.ParseError(_("%(func)s takes at most %(nargs)d arguments")
		% {'func': funcname, 'nargs': len(keys)})
		args = {}
		# consume positional arguments
		for k, x in zip(keys, trees):
		if x[0] == keyvaluenode:
		break
		args[k] = x
		# remainder should be keyword arguments
		for x in trees[len(args):]:
		if x[0] != keyvaluenode or x[1][0] != keynode:
		raise error.ParseError(_("%(func)s got an invalid argument")
		% {'func': funcname})
		k = x[1][1]
		if k not in keys:
		raise error.ParseError(_("%(func)s got an unexpected keyword "
		"argument '%(key)s'")
		% {'func': funcname, 'key': k})
		if k in args:
		raise error.ParseError(_("%(func)s got multiple values for keyword "
		"argument '%(key)s'")
		% {'func': funcname, 'key': k})
		args[k] = x[2]
		return args

Yuya Nishihara parser: extract closure of prettyformat() to a top-level function...	r25254	def _prettyformat(tree, leafnodes, level, lines):
		if not isinstance(tree, tuple) or tree[0] in leafnodes:
		lines.append((level, str(tree)))
		else:
		lines.append((level, '(%s' % tree[0]))
		for s in tree[1:]:
		_prettyformat(s, leafnodes, level + 1, lines)
		lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')]

Yuya Nishihara parser: move prettyformat() function from revset module...	r25253	def prettyformat(tree, leafnodes):
		lines = []
Yuya Nishihara parser: extract closure of prettyformat() to a top-level function...	r25254	_prettyformat(tree, leafnodes, 0, lines)
Yuya Nishihara parser: move prettyformat() function from revset module...	r25253	output = '\n'.join((' ' * l + s) for l, s in lines)
		return output
Yuya Nishihara parser: add helper to reduce nesting of chained infix operations...	r25306
		def simplifyinfixops(tree, targetnodes):
		"""Flatten chained infix operations to reduce usage of Python stack

		>>> def f(tree):
		... print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',))
		>>> f(('or',
		... ('or',
		... ('symbol', '1'),
		... ('symbol', '2')),
		... ('symbol', '3')))
		(or
		('symbol', '1')
		('symbol', '2')
		('symbol', '3'))
		>>> f(('func',
		... ('symbol', 'p1'),
		... ('or',
		... ('or',
		... ('func',
		... ('symbol', 'sort'),
		... ('list',
		... ('or',
		... ('or',
		... ('symbol', '1'),
		... ('symbol', '2')),
		... ('symbol', '3')),
		... ('negate',
		... ('symbol', 'rev')))),
		... ('and',
		... ('symbol', '4'),
		... ('group',
		... ('or',
		... ('or',
		... ('symbol', '5'),
		... ('symbol', '6')),
		... ('symbol', '7'))))),
		... ('symbol', '8'))))
		(func
		('symbol', 'p1')
		(or
		(func
		('symbol', 'sort')
		(list
		(or
		('symbol', '1')
		('symbol', '2')
		('symbol', '3'))
		(negate
		('symbol', 'rev'))))
		(and
		('symbol', '4')
		(group
		(or
		('symbol', '5')
		('symbol', '6')
		('symbol', '7'))))
		('symbol', '8')))
		"""
		if not isinstance(tree, tuple):
		return tree
		op = tree[0]
		if op not in targetnodes:
		return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:])

		# walk down left nodes taking each right node. no recursion to left nodes
		# because infix operators are left-associative, i.e. left tree is deep.
		# e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3)
		simplified = []
		x = tree
		while x[0] == op:
		l, r = x[1:]
		simplified.append(simplifyinfixops(r, targetnodes))
		x = l
		simplified.append(simplifyinfixops(x, targetnodes))
		simplified.append(op)
		return tuple(reversed(simplified))