upstream/mercurial-mirror Files · mercurial/parser.py

encoding: add option to escape non-ascii characters in JSON...

encoding: add option to escape non-ascii characters in JSON This is necessary for hgweb to embed JSON data in HTML. JSON data must be able to be embedded in non-UTF-8 HTML page so long as the page encoding is compatible with ASCII. According to RFC 7159, non-BMP character is represented as UTF-16 surrogate pair. This function first splits an input string into an array of UTF-16 code points. https://tools.ietf.org/html/rfc7159.html#section-7

Yuya Nishihara - - Load All Authors

File last commit:

r26231:87c9c562 default


                r28068:9ece901f

default

Download file

             parser.py
        
                    222 lines
            
             | 7.8 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / mercurial / parser.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
      # parser.py - simple top-down operator precedence parser for mercurial

      #

      # Copyright 2010 Matt Mackall <mpm@selenic.com>

      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

        Julian Cowley
    
parser: fix URL to effbot

              r11449
            
      # see http://effbot.org/zone/simple-top-down-parsing.htm and

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
      # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/

      # for background

      # takes a tokenizer and elements

        Yuya Nishihara
    
parser: update documentation about tokenizer and elements

              r25655
            
      # tokenizer is an iterator that returns (type, value, pos) tuples

        Yuya Nishihara
    
parser: separate actions for primary expression and prefix operator...

              r25815
            
      # elements is a mapping of types to binding strength, primary, prefix, infix

      # and suffix actions

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
      # an action is a tree node name, a tree label, and an optional match

        timeless@mozdev.org
    
en-us: labeled

              r17500
            
      # __call__(program) parses program into a labeled tree

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
        Gregory Szorc
    
parser: use absolute_import

              r25963
            
      from __future__ import absolute_import

      from .i18n import _

      from . import error

        Matt Mackall
    
revset: raise ParseError exceptions

              r11289
            
        Matt Mackall
    
revset: introduce basic parser

              r11274
            
      class parser(object):

        Yuya Nishihara
    
parser: accept iterator of tokens instead of tokenizer function and program...

              r25654
            
          def __init__(self, elements, methods=None):

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
              self._elements = elements

              self._methods = methods

        Matt Mackall
    
templater: use the parser.py parser to extend the templater syntax

              r13176
            
              self.current = None

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
          def _advance(self):

              'advance the tokenizer'

              t = self.current

        Pierre-Yves David
    
parsers: use 'next' instead of try/except...

              r25171
            
              self.current = next(self._iter, None)

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
              return t

        Yuya Nishihara
    
parser: extract function that tests if next token may start new term...

              r25804
            
          def _hasnewterm(self):

              'True if next token may start new term'

        Yuya Nishihara
    
parser: separate actions for primary expression and prefix operator...

              r25815
            
              return any(self._elements[self.current[0]][1:3])

        Yuya Nishihara
    
parser: remove unused parameter 'pos' from _match()...

              r25802
            
          def _match(self, m):

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
              'make sure the tokenizer matches an end condition'

              if self.current[0] != m:

        Mads Kiilerich
    
parsers: fix localization markup of parser errors

              r14701
            
                  raise error.ParseError(_("unexpected token: %s") % self.current[0],

        Dirkjan Ochtman
    
cleanups: undefined variables

              r11305
            
                                         self.current[2])

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
              self._advance()

        Yuya Nishihara
    
parser: factor out function that parses right-hand side of prefix/infix ops...

              r25803
            
          def _parseoperand(self, bind, m=None):

              'gather right-hand-side operand until an end condition or binding met'

              if m and self.current[0] == m:

                  expr = None

              else:

                  expr = self._parse(bind)

              if m:

                  self._match(m)

              return expr

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
          def _parse(self, bind=0):

        Matt Mackall
    
revset: raise ParseError exceptions

              r11289
            
              token, value, pos = self._advance()

        Yuya Nishihara
    
parser: resolve ambiguity where both prefix and primary actions are defined...

              r25816
            
              # handle prefix rules on current token, take as primary if unambiguous

        Yuya Nishihara
    
parser: separate actions for primary expression and prefix operator...

              r25815
            
              primary, prefix = self._elements[token][1:3]

        Yuya Nishihara
    
parser: resolve ambiguity where both prefix and primary actions are defined...

              r25816
            
              if primary and not (prefix and self._hasnewterm()):

        Yuya Nishihara
    
parser: separate actions for primary expression and prefix operator...

              r25815
            
                  expr = (primary, value)

              elif prefix:

                  expr = (prefix[0], self._parseoperand(*prefix[1:]))

              else:

        Mads Kiilerich
    
parsers: fix localization markup of parser errors

              r14701
            
                  raise error.ParseError(_("not a prefix: %s") % token, pos)

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
              # gather tokens until we meet a lower binding strength

              while bind < self._elements[self.current[0]][0]:

        Matt Mackall
    
revset: raise ParseError exceptions

              r11289
            
                  token, value, pos = self._advance()

        Yuya Nishihara
    
parser: reorder infix/suffix handling to be similar to prefix/primary flow...

              r25817
            
                  # handle infix rules, take as suffix if unambiguous

        Yuya Nishihara
    
parser: separate actions for primary expression and prefix operator...

              r25815
            
                  infix, suffix = self._elements[token][3:]

        Yuya Nishihara
    
parser: take suffix action if no infix action is defined...

              r25818
            
                  if suffix and not (infix and self._hasnewterm()):

        Matt Mackall
    
revset: add support for prefix and suffix versions of : and ::

              r11278
            
                      expr = (suffix[0], expr)

        Yuya Nishihara
    
parser: reorder infix/suffix handling to be similar to prefix/primary flow...

              r25817
            
                  elif infix:

                      expr = (infix[0], expr, self._parseoperand(*infix[1:]))

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
                  else:

        Yuya Nishihara
    
parser: reorder infix/suffix handling to be similar to prefix/primary flow...

              r25817
            
                      raise error.ParseError(_("not an infix: %s") % token, pos)

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
              return expr

        Yuya Nishihara
    
parser: accept iterator of tokens instead of tokenizer function and program...

              r25654
            
          def parse(self, tokeniter):

              'generate a parse tree from tokens'

              self._iter = tokeniter

        Matt Mackall
    
templater: use the parser.py parser to extend the templater syntax

              r13176
            
              self._advance()

        Bernhard Leiner
    
revset: report a parse error if a revset is not parsed completely (issue2654)

              r13665
            
              res = self._parse()

              token, value, pos = self.current

              return res, pos

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
          def eval(self, tree):

              'recursively evaluate a parse tree using node methods'

              if not isinstance(tree, tuple):

                  return tree

              return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])

        Yuya Nishihara
    
parser: accept iterator of tokens instead of tokenizer function and program...

              r25654
            
          def __call__(self, tokeniter):

              'parse tokens into a parse tree and evaluate if methods given'

              t = self.parse(tokeniter)

        Matt Mackall
    
revset: introduce basic parser

              r11274
            
              if self._methods:

                  return self.eval(t)

              return t

        Yuya Nishihara
    
parser: move prettyformat() function from revset module...

              r25253
            
        Yuya Nishihara
    
revset: add function to build dict of positional and keyword arguments...

              r25705
            
      def buildargsdict(trees, funcname, keys, keyvaluenode, keynode):

          """Build dict from list containing positional and keyword arguments

          Invalid keywords or too many positional arguments are rejected, but

          missing arguments are just omitted.

          """

          if len(trees) > len(keys):

              raise error.ParseError(_("%(func)s takes at most %(nargs)d arguments")

                                     % {'func': funcname, 'nargs': len(keys)})

          args = {}

          # consume positional arguments

          for k, x in zip(keys, trees):

              if x[0] == keyvaluenode:

                  break

              args[k] = x

          # remainder should be keyword arguments

          for x in trees[len(args):]:

              if x[0] != keyvaluenode or x[1][0] != keynode:

                  raise error.ParseError(_("%(func)s got an invalid argument")

                                         % {'func': funcname})

              k = x[1][1]

              if k not in keys:

                  raise error.ParseError(_("%(func)s got an unexpected keyword "

                                           "argument '%(key)s'")

                                         % {'func': funcname, 'key': k})

              if k in args:

                  raise error.ParseError(_("%(func)s got multiple values for keyword "

                                           "argument '%(key)s'")

                                         % {'func': funcname, 'key': k})

              args[k] = x[2]

          return args

        Yuya Nishihara
    
parser: move unescape helper from templater...

              r26231
            
      def unescapestr(s):

          try:

              return s.decode("string_escape")

          except ValueError as e:

              # mangle Python's exception into our format

              raise error.ParseError(str(e).lower())

        Yuya Nishihara
    
parser: extract closure of prettyformat() to a top-level function...

              r25254
            
      def _prettyformat(tree, leafnodes, level, lines):

          if not isinstance(tree, tuple) or tree[0] in leafnodes:

              lines.append((level, str(tree)))

          else:

              lines.append((level, '(%s' % tree[0]))

              for s in tree[1:]:

                  _prettyformat(s, leafnodes, level + 1, lines)

              lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')]

        Yuya Nishihara
    
parser: move prettyformat() function from revset module...

              r25253
            
      def prettyformat(tree, leafnodes):

          lines = []

        Yuya Nishihara
    
parser: extract closure of prettyformat() to a top-level function...

              r25254
            
          _prettyformat(tree, leafnodes, 0, lines)

        Yuya Nishihara
    
parser: move prettyformat() function from revset module...

              r25253
            
          output = '\n'.join(('  ' * l + s) for l, s in lines)

          return output

        Yuya Nishihara
    
parser: add helper to reduce nesting of chained infix operations...

              r25306
            
      def simplifyinfixops(tree, targetnodes):

          """Flatten chained infix operations to reduce usage of Python stack

          >>> def f(tree):

          ...     print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',))

          >>> f(('or',

          ...     ('or',

          ...       ('symbol', '1'),

          ...       ('symbol', '2')),

          ...     ('symbol', '3')))

          (or

            ('symbol', '1')

            ('symbol', '2')

            ('symbol', '3'))

          >>> f(('func',

          ...     ('symbol', 'p1'),

          ...     ('or',

          ...       ('or',

          ...         ('func',

          ...           ('symbol', 'sort'),

          ...           ('list',

          ...             ('or',

          ...               ('or',

          ...                 ('symbol', '1'),

          ...                 ('symbol', '2')),

          ...               ('symbol', '3')),

          ...             ('negate',

          ...               ('symbol', 'rev')))),

          ...         ('and',

          ...           ('symbol', '4'),

          ...           ('group',

          ...             ('or',

          ...               ('or',

          ...                 ('symbol', '5'),

          ...                 ('symbol', '6')),

          ...               ('symbol', '7'))))),

          ...       ('symbol', '8'))))

          (func

            ('symbol', 'p1')

            (or

              (func

                ('symbol', 'sort')

                (list

                  (or

                    ('symbol', '1')

                    ('symbol', '2')

                    ('symbol', '3'))

                  (negate

                    ('symbol', 'rev'))))

              (and

                ('symbol', '4')

                (group

                  (or

                    ('symbol', '5')

                    ('symbol', '6')

                    ('symbol', '7'))))

              ('symbol', '8')))

          """

          if not isinstance(tree, tuple):

              return tree

          op = tree[0]

          if op not in targetnodes:

              return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:])

          # walk down left nodes taking each right node. no recursion to left nodes

          # because infix operators are left-associative, i.e. left tree is deep.

          # e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3)

          simplified = []

          x = tree

          while x[0] == op:

              l, r = x[1:]

              simplified.append(simplifyinfixops(r, targetnodes))

              x = l

          simplified.append(simplifyinfixops(x, targetnodes))

          simplified.append(op)

          return tuple(reversed(simplified))

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

Matt Mackall revset: introduce basic parser	r11274	# parser.py - simple top-down operator precedence parser for mercurial
		#
		# Copyright 2010 Matt Mackall <mpm@selenic.com>
		#
		# This software may be used and distributed according to the terms of the
		# GNU General Public License version 2 or any later version.

Julian Cowley parser: fix URL to effbot	r11449	# see http://effbot.org/zone/simple-top-down-parsing.htm and
Matt Mackall revset: introduce basic parser	r11274	# http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
		# for background

		# takes a tokenizer and elements
Yuya Nishihara parser: update documentation about tokenizer and elements	r25655	# tokenizer is an iterator that returns (type, value, pos) tuples
Yuya Nishihara parser: separate actions for primary expression and prefix operator...	r25815	# elements is a mapping of types to binding strength, primary, prefix, infix
		# and suffix actions
Matt Mackall revset: introduce basic parser	r11274	# an action is a tree node name, a tree label, and an optional match
timeless@mozdev.org en-us: labeled	r17500	# __call__(program) parses program into a labeled tree
Matt Mackall revset: introduce basic parser	r11274
Gregory Szorc parser: use absolute_import	r25963	from __future__ import absolute_import

		from .i18n import _
		from . import error
Matt Mackall revset: raise ParseError exceptions	r11289
Matt Mackall revset: introduce basic parser	r11274	class parser(object):
Yuya Nishihara parser: accept iterator of tokens instead of tokenizer function and program...	r25654	def __init__(self, elements, methods=None):
Matt Mackall revset: introduce basic parser	r11274	self._elements = elements
		self._methods = methods
Matt Mackall templater: use the parser.py parser to extend the templater syntax	r13176	self.current = None
Matt Mackall revset: introduce basic parser	r11274	def _advance(self):
		'advance the tokenizer'
		t = self.current
Pierre-Yves David parsers: use 'next' instead of try/except...	r25171	self.current = next(self._iter, None)
Matt Mackall revset: introduce basic parser	r11274	return t
Yuya Nishihara parser: extract function that tests if next token may start new term...	r25804	def _hasnewterm(self):
		'True if next token may start new term'
Yuya Nishihara parser: separate actions for primary expression and prefix operator...	r25815	return any(self._elements[self.current[0]][1:3])
Yuya Nishihara parser: remove unused parameter 'pos' from _match()...	r25802	def _match(self, m):
Matt Mackall revset: introduce basic parser	r11274	'make sure the tokenizer matches an end condition'
		if self.current[0] != m:
Mads Kiilerich parsers: fix localization markup of parser errors	r14701	raise error.ParseError(_("unexpected token: %s") % self.current[0],
Dirkjan Ochtman cleanups: undefined variables	r11305	self.current[2])
Matt Mackall revset: introduce basic parser	r11274	self._advance()
Yuya Nishihara parser: factor out function that parses right-hand side of prefix/infix ops...	r25803	def _parseoperand(self, bind, m=None):
		'gather right-hand-side operand until an end condition or binding met'
		if m and self.current[0] == m:
		expr = None
		else:
		expr = self._parse(bind)
		if m:
		self._match(m)
		return expr
Matt Mackall revset: introduce basic parser	r11274	def _parse(self, bind=0):
Matt Mackall revset: raise ParseError exceptions	r11289	token, value, pos = self._advance()
Yuya Nishihara parser: resolve ambiguity where both prefix and primary actions are defined...	r25816	# handle prefix rules on current token, take as primary if unambiguous
Yuya Nishihara parser: separate actions for primary expression and prefix operator...	r25815	primary, prefix = self._elements[token][1:3]
Yuya Nishihara parser: resolve ambiguity where both prefix and primary actions are defined...	r25816	if primary and not (prefix and self._hasnewterm()):
Yuya Nishihara parser: separate actions for primary expression and prefix operator...	r25815	expr = (primary, value)
		elif prefix:
		expr = (prefix[0], self._parseoperand(*prefix[1:]))
		else:
Mads Kiilerich parsers: fix localization markup of parser errors	r14701	raise error.ParseError(_("not a prefix: %s") % token, pos)
Matt Mackall revset: introduce basic parser	r11274	# gather tokens until we meet a lower binding strength
		while bind < self._elements[self.current[0]][0]:
Matt Mackall revset: raise ParseError exceptions	r11289	token, value, pos = self._advance()
Yuya Nishihara parser: reorder infix/suffix handling to be similar to prefix/primary flow...	r25817	# handle infix rules, take as suffix if unambiguous
Yuya Nishihara parser: separate actions for primary expression and prefix operator...	r25815	infix, suffix = self._elements[token][3:]
Yuya Nishihara parser: take suffix action if no infix action is defined...	r25818	if suffix and not (infix and self._hasnewterm()):
Matt Mackall revset: add support for prefix and suffix versions of : and ::	r11278	expr = (suffix[0], expr)
Yuya Nishihara parser: reorder infix/suffix handling to be similar to prefix/primary flow...	r25817	elif infix:
		expr = (infix[0], expr, self._parseoperand(*infix[1:]))
Matt Mackall revset: introduce basic parser	r11274	else:
Yuya Nishihara parser: reorder infix/suffix handling to be similar to prefix/primary flow...	r25817	raise error.ParseError(_("not an infix: %s") % token, pos)
Matt Mackall revset: introduce basic parser	r11274	return expr
Yuya Nishihara parser: accept iterator of tokens instead of tokenizer function and program...	r25654	def parse(self, tokeniter):
		'generate a parse tree from tokens'
		self._iter = tokeniter
Matt Mackall templater: use the parser.py parser to extend the templater syntax	r13176	self._advance()
Bernhard Leiner revset: report a parse error if a revset is not parsed completely (issue2654)	r13665	res = self._parse()
		token, value, pos = self.current
		return res, pos
Matt Mackall revset: introduce basic parser	r11274	def eval(self, tree):
		'recursively evaluate a parse tree using node methods'
		if not isinstance(tree, tuple):
		return tree
		return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
Yuya Nishihara parser: accept iterator of tokens instead of tokenizer function and program...	r25654	def __call__(self, tokeniter):
		'parse tokens into a parse tree and evaluate if methods given'
		t = self.parse(tokeniter)
Matt Mackall revset: introduce basic parser	r11274	if self._methods:
		return self.eval(t)
		return t
Yuya Nishihara parser: move prettyformat() function from revset module...	r25253
Yuya Nishihara revset: add function to build dict of positional and keyword arguments...	r25705	def buildargsdict(trees, funcname, keys, keyvaluenode, keynode):
		"""Build dict from list containing positional and keyword arguments

		Invalid keywords or too many positional arguments are rejected, but
		missing arguments are just omitted.
		"""
		if len(trees) > len(keys):
		raise error.ParseError(_("%(func)s takes at most %(nargs)d arguments")
		% {'func': funcname, 'nargs': len(keys)})
		args = {}
		# consume positional arguments
		for k, x in zip(keys, trees):
		if x[0] == keyvaluenode:
		break
		args[k] = x
		# remainder should be keyword arguments
		for x in trees[len(args):]:
		if x[0] != keyvaluenode or x[1][0] != keynode:
		raise error.ParseError(_("%(func)s got an invalid argument")
		% {'func': funcname})
		k = x[1][1]
		if k not in keys:
		raise error.ParseError(_("%(func)s got an unexpected keyword "
		"argument '%(key)s'")
		% {'func': funcname, 'key': k})
		if k in args:
		raise error.ParseError(_("%(func)s got multiple values for keyword "
		"argument '%(key)s'")
		% {'func': funcname, 'key': k})
		args[k] = x[2]
		return args

Yuya Nishihara parser: move unescape helper from templater...	r26231	def unescapestr(s):
		try:
		return s.decode("string_escape")
		except ValueError as e:
		# mangle Python's exception into our format
		raise error.ParseError(str(e).lower())

Yuya Nishihara parser: extract closure of prettyformat() to a top-level function...	r25254	def _prettyformat(tree, leafnodes, level, lines):
		if not isinstance(tree, tuple) or tree[0] in leafnodes:
		lines.append((level, str(tree)))
		else:
		lines.append((level, '(%s' % tree[0]))
		for s in tree[1:]:
		_prettyformat(s, leafnodes, level + 1, lines)
		lines[-1:] = [(lines[-1][0], lines[-1][1] + ')')]

Yuya Nishihara parser: move prettyformat() function from revset module...	r25253	def prettyformat(tree, leafnodes):
		lines = []
Yuya Nishihara parser: extract closure of prettyformat() to a top-level function...	r25254	_prettyformat(tree, leafnodes, 0, lines)
Yuya Nishihara parser: move prettyformat() function from revset module...	r25253	output = '\n'.join((' ' * l + s) for l, s in lines)
		return output
Yuya Nishihara parser: add helper to reduce nesting of chained infix operations...	r25306
		def simplifyinfixops(tree, targetnodes):
		"""Flatten chained infix operations to reduce usage of Python stack

		>>> def f(tree):
		... print prettyformat(simplifyinfixops(tree, ('or',)), ('symbol',))
		>>> f(('or',
		... ('or',
		... ('symbol', '1'),
		... ('symbol', '2')),
		... ('symbol', '3')))
		(or
		('symbol', '1')
		('symbol', '2')
		('symbol', '3'))
		>>> f(('func',
		... ('symbol', 'p1'),
		... ('or',
		... ('or',
		... ('func',
		... ('symbol', 'sort'),
		... ('list',
		... ('or',
		... ('or',
		... ('symbol', '1'),
		... ('symbol', '2')),
		... ('symbol', '3')),
		... ('negate',
		... ('symbol', 'rev')))),
		... ('and',
		... ('symbol', '4'),
		... ('group',
		... ('or',
		... ('or',
		... ('symbol', '5'),
		... ('symbol', '6')),
		... ('symbol', '7'))))),
		... ('symbol', '8'))))
		(func
		('symbol', 'p1')
		(or
		(func
		('symbol', 'sort')
		(list
		(or
		('symbol', '1')
		('symbol', '2')
		('symbol', '3'))
		(negate
		('symbol', 'rev'))))
		(and
		('symbol', '4')
		(group
		(or
		('symbol', '5')
		('symbol', '6')
		('symbol', '7'))))
		('symbol', '8')))
		"""
		if not isinstance(tree, tuple):
		return tree
		op = tree[0]
		if op not in targetnodes:
		return (op,) + tuple(simplifyinfixops(x, targetnodes) for x in tree[1:])

		# walk down left nodes taking each right node. no recursion to left nodes
		# because infix operators are left-associative, i.e. left tree is deep.
		# e.g. '1 + 2 + 3' -> (+ (+ 1 2) 3) -> (+ 1 2 3)
		simplified = []
		x = tree
		while x[0] == op:
		l, r = x[1:]
		simplified.append(simplifyinfixops(r, targetnodes))
		x = l
		simplified.append(simplifyinfixops(x, targetnodes))
		simplified.append(op)
		return tuple(reversed(simplified))