upstream/mercurial-mirror Files · mercurial/minirst.py

tests: Don't use $PWD...

tests: Don't use $PWD $PWD doesn't work with solaris sh - use `pwd` instead

Matt Mackall - - Load All Authors

File last commit:

r10264:d6512b3e default


                r10275:3a1f29b6

stable

Download file

             minirst.py
        
                    279 lines
            
             | 9.3 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / mercurial / minirst.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        Martin Geisler
    
minimal reStructuredText parser

              r9156
            
      # minirst.py - minimal reStructuredText parser

      #

      # Copyright 2009 Matt Mackall <mpm@selenic.com> and others

      #

      # This software may be used and distributed according to the terms of the

        Matt Mackall
    
Update license to GPLv2+

              r10263
            
      # GNU General Public License version 2 or any later version.

        Martin Geisler
    
minimal reStructuredText parser

              r9156
            
      """simplified reStructuredText parser.

      This parser knows just enough about reStructuredText to parse the

      Mercurial docstrings.

      It cheats in a major way: nested blocks are not really nested. They

      are just indented blocks that look like they are nested. This relies

      on the user to keep the right indentation for the blocks.

      It only supports a small subset of reStructuredText:

        Martin Geisler
    
minirst: update module docstring

              r9741
            
      - sections

        Martin Geisler
    
minimal reStructuredText parser

              r9156
            
      - paragraphs

        Martin Geisler
    
minirst: update module docstring

              r9741
            
      - literal blocks

      - definition lists

        Martin Geisler
    
minimal reStructuredText parser

              r9156
            
        Martin Geisler
    
minirst: update module docstring

              r9741
            
      - bullet lists (items must start with '-')

      - enumerated lists (no autonumbering)

        Martin Geisler
    
minimal reStructuredText parser

              r9156
            
        Martin Geisler
    
minirst: parse field lists

              r9293
            
      - field lists (colons cannot be escaped)

        Martin Geisler
    
minimal reStructuredText parser

              r9156
            
      - option lists (supports only long options without arguments)

        Martin Geisler
    
minirst: update module docstring

              r9741
            
      - inline literals (no other inline markup is not recognized)

        Martin Geisler
    
minimal reStructuredText parser

              r9156
            
      """

      import re, sys, textwrap

      def findblocks(text):

          """Find continuous blocks of lines in text.

          Returns a list of dictionaries representing the blocks. Each block

          has an 'indent' field and a 'lines' field.

          """

          blocks = [[]]

          lines = text.splitlines()

          for line in lines:

              if line.strip():

                  blocks[-1].append(line)

              elif blocks[-1]:

                  blocks.append([])

          if not blocks[-1]:

              del blocks[-1]

          for i, block in enumerate(blocks):

              indent = min((len(l) - len(l.lstrip())) for l in block)

              blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])

          return blocks

      def findliteralblocks(blocks):

          """Finds literal blocks and adds a 'type' field to the blocks.

          Literal blocks are given the type 'literal', all other blocks are

          given type the 'paragraph'.

          """

          i = 0

          while i < len(blocks):

              # Searching for a block that looks like this:

              #

              # +------------------------------+

              # | paragraph                    |

              # | (ends with "::")             |

              # +------------------------------+

              #    +---------------------------+

              #    | indented literal block    |

              #    +---------------------------+

              blocks[i]['type'] = 'paragraph'

              if blocks[i]['lines'][-1].endswith('::') and i+1 < len(blocks):

                  indent = blocks[i]['indent']

                  adjustment = blocks[i+1]['indent'] - indent

                  if blocks[i]['lines'] == ['::']:

                      # Expanded form: remove block

                      del blocks[i]

                      i -= 1

                  elif blocks[i]['lines'][-1].endswith(' ::'):

                      # Partially minimized form: remove space and both

                      # colons.

                      blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]

                  else:

                      # Fully minimized form: remove just one colon.

                      blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]

                  # List items are formatted with a hanging indent. We must

                  # correct for this here while we still have the original

                  # information on the indentation of the subsequent literal

                  # blocks available.

        Martin Geisler
    
minirst: prepare for general types of bullet lists...

              r9738
            
                  m = _bulletre.match(blocks[i]['lines'][0])

                  if m:

                      indent += m.end()

                      adjustment -= m.end()

        Martin Geisler
    
minimal reStructuredText parser

              r9156
            
                  # Mark the following indented blocks.

                  while i+1 < len(blocks) and blocks[i+1]['indent'] > indent:

                      blocks[i+1]['type'] = 'literal'

                      blocks[i+1]['indent'] -= adjustment

                      i += 1

              i += 1

          return blocks

        Martin Geisler
    
minirst: support enumerated lists

              r9739
            
      _bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|\(?[0-9A-Za-z]+\)) ')

        Martin Geisler
    
minirst: combine list parsing in one function...

              r9737
            
      _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)?  +)(.*)$')

      _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):( +)(.*)')

      _definitionre = re.compile(r'[^ ]')

      def splitparagraphs(blocks):

          """Split paragraphs into lists."""

          # Tuples with (list type, item regexp, single line items?). Order

          # matters: definition lists has the least specific regexp and must

          # come last.

          listtypes = [('bullet', _bulletre, True),

                       ('option', _optionre, True),

                       ('field', _fieldre, True),

                       ('definition', _definitionre, False)]

          def match(lines, i, itemre, singleline):

              """Does itemre match an item at line i?

              A list item can be followed by an idented line or another list

              item (but only if singleline is True).

              """

              line1 = lines[i]

              line2 = i+1 < len(lines) and lines[i+1] or ''

              if not itemre.match(line1):

                  return False

              if singleline:

                  return line2 == '' or line2[0] == ' ' or itemre.match(line2)

              else:

                  return line2.startswith(' ')

          i = 0

          while i < len(blocks):

              if blocks[i]['type'] == 'paragraph':

                  lines = blocks[i]['lines']

                  for type, itemre, singleline in listtypes:

                      if match(lines, 0, itemre, singleline):

                          items = []

                          for j, line in enumerate(lines):

                              if match(lines, j, itemre, singleline):

                                  items.append(dict(type=type, lines=[],

                                                    indent=blocks[i]['indent']))

                              items[-1]['lines'].append(line)

                          blocks[i:i+1] = items

                          break

              i += 1

          return blocks

        Martin Geisler
    
minimal reStructuredText parser

              r9156
            
      def findsections(blocks):

          """Finds sections.

          The blocks must have a 'type' field, i.e., they should have been

          run through findliteralblocks first.

          """

          for block in blocks:

              # Searching for a block that looks like this:

              #

              # +------------------------------+

              # | Section title                |

              # | -------------                |

              # +------------------------------+

              if (block['type'] == 'paragraph' and

                  len(block['lines']) == 2 and

                  block['lines'][1] == '-' * len(block['lines'][0])):

                  block['type'] = 'section'

          return blocks

        Martin Geisler
    
minirst: convert ``foo`` into "foo" upon display...

              r9623
            
      def inlineliterals(blocks):

          for b in blocks:

              if b['type'] == 'paragraph':

                  b['lines'] = [l.replace('``', '"') for l in b['lines']]

          return blocks

        Martin Geisler
    
minimal reStructuredText parser

              r9156
            
      def addmargins(blocks):

          """Adds empty blocks for vertical spacing.

          This groups bullets, options, and definitions together with no vertical

          space between them, and adds an empty block between all other blocks.

          """

          i = 1

          while i < len(blocks):

              if (blocks[i]['type'] == blocks[i-1]['type'] and

        Martin Geisler
    
minirst: parse field lists

              r9293
            
                  blocks[i]['type'] in ('bullet', 'option', 'field', 'definition')):

        Martin Geisler
    
minimal reStructuredText parser

              r9156
            
                  i += 1

              else:

                  blocks.insert(i, dict(lines=[''], indent=0, type='margin'))

                  i += 2

          return blocks

      def formatblock(block, width):

          """Format a block according to width."""

        Martin Geisler
    
util, minirst: do not crash with COLUMNS=0

              r9417
            
          if width <= 0:

              width = 78

        Martin Geisler
    
minimal reStructuredText parser

              r9156
            
          indent = ' ' * block['indent']

          if block['type'] == 'margin':

              return ''

        Martin Geisler
    
minirst: remove unnecessary "elif:" statements

              r9735
            
          if block['type'] == 'literal':

        Martin Geisler
    
minirst: indent literal blocks with two spaces...

              r9291
            
              indent += '  '

              return indent + ('\n' + indent).join(block['lines'])

        Martin Geisler
    
minirst: remove unnecessary "elif:" statements

              r9735
            
          if block['type'] == 'section':

        Martin Geisler
    
minimal reStructuredText parser

              r9156
            
              return indent + ('\n' + indent).join(block['lines'])

        Martin Geisler
    
minirst: remove unnecessary "elif:" statements

              r9735
            
          if block['type'] == 'definition':

        Martin Geisler
    
minimal reStructuredText parser

              r9156
            
              term = indent + block['lines'][0]

        Martin Geisler
    
minirst: combine list parsing in one function...

              r9737
            
              hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())

              defindent = indent + hang * ' '

        Martin Geisler
    
minimal reStructuredText parser

              r9156
            
              text = ' '.join(map(str.strip, block['lines'][1:]))

              return "%s\n%s" % (term, textwrap.fill(text, width=width,

                                                     initial_indent=defindent,

                                                     subsequent_indent=defindent))

        Martin Geisler
    
minirst: remove unnecessary "elif:" statements

              r9735
            
          initindent = subindent = indent

          if block['type'] == 'bullet':

        Martin Geisler
    
minirst: prepare for general types of bullet lists...

              r9738
            
              m = _bulletre.match(block['lines'][0])

              if m:

                  subindent = indent + m.end() * ' '

        Martin Geisler
    
minirst: combine list parsing in one function...

              r9737
            
          elif block['type'] == 'field':

              m = _fieldre.match(block['lines'][0])

              if m:

                  key, spaces, rest = m.groups()

                  # Turn ":foo: bar" into "foo   bar".

                  block['lines'][0] = '%s  %s%s' % (key, spaces, rest)

                  subindent = indent + (2 + len(key) + len(spaces)) * ' '

          elif block['type'] == 'option':

              m = _optionre.match(block['lines'][0])

              if m:

                  option, arg, rest = m.groups()

                  subindent = indent + (len(option) + len(arg)) * ' '

        Martin Geisler
    
minimal reStructuredText parser

              r9156
            
        Martin Geisler
    
minirst: combine list parsing in one function...

              r9737
            
          text = ' '.join(map(str.strip, block['lines']))

        Martin Geisler
    
minirst: remove unnecessary "elif:" statements

              r9735
            
          return textwrap.fill(text, width=width,

                               initial_indent=initindent,

                               subsequent_indent=subindent)

        Martin Geisler
    
minimal reStructuredText parser

              r9156
            
        Martin Geisler
    
help: un-indent help topics...

              r9540
            
      def format(text, width, indent=0):

        Martin Geisler
    
minimal reStructuredText parser

              r9156
            
          """Parse and format the text according to width."""

          blocks = findblocks(text)

        Martin Geisler
    
help: un-indent help topics...

              r9540
            
          for b in blocks:

              b['indent'] += indent

        Martin Geisler
    
minimal reStructuredText parser

              r9156
            
          blocks = findliteralblocks(blocks)

        Martin Geisler
    
minirst: convert ``foo`` into "foo" upon display...

              r9623
            
          blocks = inlineliterals(blocks)

        Martin Geisler
    
minirst: combine list parsing in one function...

              r9737
            
          blocks = splitparagraphs(blocks)

        Martin Geisler
    
minimal reStructuredText parser

              r9156
            
          blocks = findsections(blocks)

          blocks = addmargins(blocks)

          return '\n'.join(formatblock(b, width) for b in blocks)

      if __name__ == "__main__":

          from pprint import pprint

          def debug(func, blocks):

              blocks = func(blocks)

              print "*** after %s:" % func.__name__

              pprint(blocks)

              print

              return blocks

          text = open(sys.argv[1]).read()

          blocks = debug(findblocks, text)

          blocks = debug(findliteralblocks, blocks)

        Martin Geisler
    
minirst: combine list parsing in one function...

              r9737
            
          blocks = debug(splitparagraphs, blocks)

        Martin Geisler
    
minimal reStructuredText parser

              r9156
            
          blocks = debug(findsections, blocks)

          blocks = debug(addmargins, blocks)

          print '\n'.join(formatblock(b, 30) for b in blocks)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

Martin Geisler minimal reStructuredText parser	r9156	# minirst.py - minimal reStructuredText parser
		#
		# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
		#
		# This software may be used and distributed according to the terms of the
Matt Mackall Update license to GPLv2+	r10263	# GNU General Public License version 2 or any later version.
Martin Geisler minimal reStructuredText parser	r9156
		"""simplified reStructuredText parser.

		This parser knows just enough about reStructuredText to parse the
		Mercurial docstrings.

		It cheats in a major way: nested blocks are not really nested. They
		are just indented blocks that look like they are nested. This relies
		on the user to keep the right indentation for the blocks.

		It only supports a small subset of reStructuredText:

Martin Geisler minirst: update module docstring	r9741	- sections

Martin Geisler minimal reStructuredText parser	r9156	- paragraphs

Martin Geisler minirst: update module docstring	r9741	- literal blocks

		- definition lists
Martin Geisler minimal reStructuredText parser	r9156
Martin Geisler minirst: update module docstring	r9741	- bullet lists (items must start with '-')

		- enumerated lists (no autonumbering)
Martin Geisler minimal reStructuredText parser	r9156
Martin Geisler minirst: parse field lists	r9293	- field lists (colons cannot be escaped)

Martin Geisler minimal reStructuredText parser	r9156	- option lists (supports only long options without arguments)

Martin Geisler minirst: update module docstring	r9741	- inline literals (no other inline markup is not recognized)
Martin Geisler minimal reStructuredText parser	r9156	"""

		import re, sys, textwrap


		def findblocks(text):
		"""Find continuous blocks of lines in text.

		Returns a list of dictionaries representing the blocks. Each block
		has an 'indent' field and a 'lines' field.
		"""
		blocks = [[]]
		lines = text.splitlines()
		for line in lines:
		if line.strip():
		blocks[-1].append(line)
		elif blocks[-1]:
		blocks.append([])
		if not blocks[-1]:
		del blocks[-1]

		for i, block in enumerate(blocks):
		indent = min((len(l) - len(l.lstrip())) for l in block)
		blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])
		return blocks


		def findliteralblocks(blocks):
		"""Finds literal blocks and adds a 'type' field to the blocks.

		Literal blocks are given the type 'literal', all other blocks are
		given type the 'paragraph'.
		"""
		i = 0
		while i < len(blocks):
		# Searching for a block that looks like this:
		#
		# +------------------------------+
		# \| paragraph \|
		# \| (ends with "::") \|
		# +------------------------------+
		# +---------------------------+
		# \| indented literal block \|
		# +---------------------------+
		blocks[i]['type'] = 'paragraph'
		if blocks[i]['lines'][-1].endswith('::') and i+1 < len(blocks):
		indent = blocks[i]['indent']
		adjustment = blocks[i+1]['indent'] - indent

		if blocks[i]['lines'] == ['::']:
		# Expanded form: remove block
		del blocks[i]
		i -= 1
		elif blocks[i]['lines'][-1].endswith(' ::'):
		# Partially minimized form: remove space and both
		# colons.
		blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
		else:
		# Fully minimized form: remove just one colon.
		blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]

		# List items are formatted with a hanging indent. We must
		# correct for this here while we still have the original
		# information on the indentation of the subsequent literal
		# blocks available.
Martin Geisler minirst: prepare for general types of bullet lists...	r9738	m = _bulletre.match(blocks[i]['lines'][0])
		if m:
		indent += m.end()
		adjustment -= m.end()
Martin Geisler minimal reStructuredText parser	r9156
		# Mark the following indented blocks.
		while i+1 < len(blocks) and blocks[i+1]['indent'] > indent:
		blocks[i+1]['type'] = 'literal'
		blocks[i+1]['indent'] -= adjustment
		i += 1
		i += 1
		return blocks

Martin Geisler minirst: support enumerated lists	r9739	_bulletre = re.compile(r'(-\|[0-9A-Za-z]+\.\|\(?[0-9A-Za-z]+\)) ')
Martin Geisler minirst: combine list parsing in one function...	r9737	_optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-])? +)(.)$')
		_fieldre = re.compile(r':(?![: ])([^:])(?<! ):( +)(.)')
		_definitionre = re.compile(r'[^ ]')

		def splitparagraphs(blocks):
		"""Split paragraphs into lists."""
		# Tuples with (list type, item regexp, single line items?). Order
		# matters: definition lists has the least specific regexp and must
		# come last.
		listtypes = [('bullet', _bulletre, True),
		('option', _optionre, True),
		('field', _fieldre, True),
		('definition', _definitionre, False)]

		def match(lines, i, itemre, singleline):
		"""Does itemre match an item at line i?

		A list item can be followed by an idented line or another list
		item (but only if singleline is True).
		"""
		line1 = lines[i]
		line2 = i+1 < len(lines) and lines[i+1] or ''
		if not itemre.match(line1):
		return False
		if singleline:
		return line2 == '' or line2[0] == ' ' or itemre.match(line2)
		else:
		return line2.startswith(' ')

		i = 0
		while i < len(blocks):
		if blocks[i]['type'] == 'paragraph':
		lines = blocks[i]['lines']
		for type, itemre, singleline in listtypes:
		if match(lines, 0, itemre, singleline):
		items = []
		for j, line in enumerate(lines):
		if match(lines, j, itemre, singleline):
		items.append(dict(type=type, lines=[],
		indent=blocks[i]['indent']))
		items[-1]['lines'].append(line)
		blocks[i:i+1] = items
		break
		i += 1
		return blocks

Martin Geisler minimal reStructuredText parser	r9156
		def findsections(blocks):
		"""Finds sections.

		The blocks must have a 'type' field, i.e., they should have been
		run through findliteralblocks first.
		"""
		for block in blocks:
		# Searching for a block that looks like this:
		#
		# +------------------------------+
		# \| Section title \|
		# \| ------------- \|
		# +------------------------------+
		if (block['type'] == 'paragraph' and
		len(block['lines']) == 2 and
		block['lines'][1] == '-' * len(block['lines'][0])):
		block['type'] = 'section'
		return blocks


Martin Geisler minirst: convert ``foo`` into "foo" upon display...	r9623	def inlineliterals(blocks):
		for b in blocks:
		if b['type'] == 'paragraph':
		b['lines'] = [l.replace('``', '"') for l in b['lines']]
		return blocks


Martin Geisler minimal reStructuredText parser	r9156	def addmargins(blocks):
		"""Adds empty blocks for vertical spacing.

		This groups bullets, options, and definitions together with no vertical
		space between them, and adds an empty block between all other blocks.
		"""
		i = 1
		while i < len(blocks):
		if (blocks[i]['type'] == blocks[i-1]['type'] and
Martin Geisler minirst: parse field lists	r9293	blocks[i]['type'] in ('bullet', 'option', 'field', 'definition')):
Martin Geisler minimal reStructuredText parser	r9156	i += 1
		else:
		blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
		i += 2
		return blocks


		def formatblock(block, width):
		"""Format a block according to width."""
Martin Geisler util, minirst: do not crash with COLUMNS=0	r9417	if width <= 0:
		width = 78
Martin Geisler minimal reStructuredText parser	r9156	indent = ' ' * block['indent']
		if block['type'] == 'margin':
		return ''
Martin Geisler minirst: remove unnecessary "elif:" statements	r9735	if block['type'] == 'literal':
Martin Geisler minirst: indent literal blocks with two spaces...	r9291	indent += ' '
		return indent + ('\n' + indent).join(block['lines'])
Martin Geisler minirst: remove unnecessary "elif:" statements	r9735	if block['type'] == 'section':
Martin Geisler minimal reStructuredText parser	r9156	return indent + ('\n' + indent).join(block['lines'])
Martin Geisler minirst: remove unnecessary "elif:" statements	r9735	if block['type'] == 'definition':
Martin Geisler minimal reStructuredText parser	r9156	term = indent + block['lines'][0]
Martin Geisler minirst: combine list parsing in one function...	r9737	hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
		defindent = indent + hang * ' '
Martin Geisler minimal reStructuredText parser	r9156	text = ' '.join(map(str.strip, block['lines'][1:]))
		return "%s\n%s" % (term, textwrap.fill(text, width=width,
		initial_indent=defindent,
		subsequent_indent=defindent))
Martin Geisler minirst: remove unnecessary "elif:" statements	r9735	initindent = subindent = indent
		if block['type'] == 'bullet':
Martin Geisler minirst: prepare for general types of bullet lists...	r9738	m = _bulletre.match(block['lines'][0])
		if m:
		subindent = indent + m.end() * ' '
Martin Geisler minirst: combine list parsing in one function...	r9737	elif block['type'] == 'field':
		m = _fieldre.match(block['lines'][0])
		if m:
		key, spaces, rest = m.groups()
		# Turn ":foo: bar" into "foo bar".
		block['lines'][0] = '%s %s%s' % (key, spaces, rest)
		subindent = indent + (2 + len(key) + len(spaces)) * ' '
		elif block['type'] == 'option':
		m = _optionre.match(block['lines'][0])
		if m:
		option, arg, rest = m.groups()
		subindent = indent + (len(option) + len(arg)) * ' '
Martin Geisler minimal reStructuredText parser	r9156
Martin Geisler minirst: combine list parsing in one function...	r9737	text = ' '.join(map(str.strip, block['lines']))
Martin Geisler minirst: remove unnecessary "elif:" statements	r9735	return textwrap.fill(text, width=width,
		initial_indent=initindent,
		subsequent_indent=subindent)
Martin Geisler minimal reStructuredText parser	r9156

Martin Geisler help: un-indent help topics...	r9540	def format(text, width, indent=0):
Martin Geisler minimal reStructuredText parser	r9156	"""Parse and format the text according to width."""
		blocks = findblocks(text)
Martin Geisler help: un-indent help topics...	r9540	for b in blocks:
		b['indent'] += indent
Martin Geisler minimal reStructuredText parser	r9156	blocks = findliteralblocks(blocks)
Martin Geisler minirst: convert ``foo`` into "foo" upon display...	r9623	blocks = inlineliterals(blocks)
Martin Geisler minirst: combine list parsing in one function...	r9737	blocks = splitparagraphs(blocks)
Martin Geisler minimal reStructuredText parser	r9156	blocks = findsections(blocks)
		blocks = addmargins(blocks)
		return '\n'.join(formatblock(b, width) for b in blocks)


		if __name__ == "__main__":
		from pprint import pprint

		def debug(func, blocks):
		blocks = func(blocks)
		print "*** after %s:" % func.__name__
		pprint(blocks)
		print
		return blocks

		text = open(sys.argv[1]).read()
		blocks = debug(findblocks, text)
		blocks = debug(findliteralblocks, blocks)
Martin Geisler minirst: combine list parsing in one function...	r9737	blocks = debug(splitparagraphs, blocks)
Martin Geisler minimal reStructuredText parser	r9156	blocks = debug(findsections, blocks)
		blocks = debug(addmargins, blocks)
		print '\n'.join(formatblock(b, 30) for b in blocks)