upstream/ipython Files · IPython/nbconvert/filters/strings.py

Merge pull request from minrk/nbformat4...

Merge pull request from minrk/nbformat4 nbformat v4

MinRK - - Load All Authors

File last commit:

r18596:2d590459


                r18617:482c7bd6

Download file

             strings.py
        
                    221 lines
            
             | 5.6 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / IPython / nbconvert / filters / strings.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        MinRK
    
add html_text and add_anchor filters...

              r11302
            
      # coding: utf-8

        Jonathan Frederic
    
Cleanup and refactor of filters

              r10676
            
      """String filters.

        Jonathan Frederic
    
Moved wrap code into Strings utility file.

              r10433
            
        Jonathan Frederic
    
Cleanup and refactor of filters

              r10676
            
      Contains a collection of useful string manipulation filters for use in Jinja

      templates.

        Jonathan Frederic
    
Moved wrap code into Strings utility file.

              r10433
            
      """

        MinRK
    
remove heading cells in v4

              r18596
            
      # Copyright (c) IPython Development Team.

      # Distributed under the terms of the Modified BSD License.

        Jonathan Frederic
    
Moved wrap code into Strings utility file.

              r10433
            
        MinRK
    
add posix_path filter...

              r11972
            
      import os

        MinRK
    
fix `file/` URL replacements in nbconvert

              r11202
            
      import re

        Jonathan Frederic
    
Cleanup and refactor of filters

              r10676
            
      import textwrap

        Thomas Kluyver
    
Fixes for nbconvert under Python 3

              r13397
            
      try:

          from urllib.parse import quote  # Py 3

      except ImportError:

          from urllib2 import quote  # Py 2

        MinRK
    
add html_text and add_anchor filters...

              r11302
            
      from xml.etree import ElementTree

        MinRK
    
convert IPython syntax to Python syntax in nbconvert python template...

              r11711
            
      from IPython.core.interactiveshell import InteractiveShell

        MinRK
    
add html_text and add_anchor filters...

              r11302
            
      from IPython.utils import py3compat

        Brian E. Granger
    
Fixing import for nbconvert.

              r11089
            
        Jonathan Frederic
    
Cleanup and refactor of filters

              r10676
            
        Brian E. Granger
    
Fixing import logic.

              r11088
            
      __all__ = [

        Jonathan Frederic
    
Filter names cleanup

              r11685
            
          'wrap_text',

          'html2text',

        MinRK
    
add html_text and add_anchor filters...

              r11302
            
          'add_anchor',

        Brian E. Granger
    
Fixing import logic.

              r11088
            
          'strip_dollars',

        Jonathan Frederic
    
Filter names cleanup

              r11685
            
          'strip_files_prefix',

          'comment_lines',

        MinRK
    
convert IPython syntax to Python syntax in nbconvert python template...

              r11711
            
          'get_lines',

          'ipython2python',

        MinRK
    
add posix_path filter...

              r11972
            
          'posix_path',

        MinRK
    
fix markdown images...

              r12451
            
          'path2url',

        MinRK
    
add ascii_only filter

              r15434
            
          'add_prompts',

          'ascii_only',

        Benjamin ABEL
    
Fix issue #5877 with tests...

              r18427
            
          'prevent_list_blocks',

        Brian E. Granger
    
Fixing import logic.

              r11088
            
      ]

        Jonathan Frederic
    
Filter names cleanup

              r11685
            
      def wrap_text(text, width=100):

        Jonathan Frederic
    
Cleanup and refactor of filters

              r10676
            
          """ 

          Intelligently wrap text.

          Wrap text without breaking words if possible.

          Parameters

          ----------

          text : str

              Text to wrap.

          width : int, optional

              Number of characters to wrap to, default 100.

          """

        Jonathan Frederic
    
Moved wrap code into Strings utility file.

              r10433
            
        Jonathan Frederic
    
Cleanup and refactor of filters

              r10676
            
          split_text = text.split('\n')

          wrp = map(lambda x:textwrap.wrap(x,width), split_text)

        Jonathan Frederic
    
Moved wrap code into Strings utility file.

              r10433
            
          wrpd = map('\n'.join, wrp)

        Jonathan Frederic
    
Moved more code to Strings utilities file

              r10434
            
          return '\n'.join(wrpd)

        MinRK
    
add html_text and add_anchor filters...

              r11302
            
        Jonathan Frederic
    
Filter names cleanup

              r11685
            
      def html2text(element):

        MinRK
    
add html_text and add_anchor filters...

              r11302
            
          """extract inner text from html

          Analog of jQuery's $(element).text()

          """

        Jonathan Frederic
    
Fixes for Py3.3

              r11547
            
          if isinstance(element, py3compat.string_types):

        MinRK
    
don't do anything if add_anchor fails

              r13844
            
              try:

                  element = ElementTree.fromstring(element)

              except Exception:

                  # failed to parse, just return it unmodified

                  return element

        MinRK
    
add html_text and add_anchor filters...

              r11302
            
          text = element.text or ""

          for child in element:

        Jonathan Frederic
    
Filter names cleanup

              r11685
            
              text += html2text(child)

        MinRK
    
add html_text and add_anchor filters...

              r11302
            
          text += (element.tail or "")

          return text

      def add_anchor(html):

        MinRK
    
remove heading cells in v4

              r18596
            
          """Add an anchor-link to an html header

        MinRK
    
allow extra pandoc args

              r11293
            
        MinRK
    
remove heading cells in v4

              r18596
            
          For use on markdown headings

        MinRK
    
allow extra pandoc args

              r11293
            
          """

        MinRK
    
don't do anything if add_anchor fails

              r13844
            
          try:

              h = ElementTree.fromstring(py3compat.cast_bytes_py2(html, encoding='utf-8'))

          except Exception:

              # failed to parse, just return it unmodified

              return html

        Jonathan Frederic
    
Filter names cleanup

              r11685
            
          link = html2text(h).replace(' ', '-')

        MinRK
    
add html_text and add_anchor filters...

              r11302
            
          h.set('id', link)

          a = ElementTree.Element("a", {"class" : "anchor-link", "href" : "#" + link})

          a.text = u'¶'

          h.append(a)

        Jonathan Frederic
    
Moved add_anchor bytes-strings fix into add_anchor

              r11927
            
          # Known issue of Python3.x, ElementTree.tostring() returns a byte string

          # instead of a text string.  See issue http://bugs.python.org/issue10942

          # Workaround is to make sure the bytes are casted to a string.

        Jonathan Frederic
    
Simplify decode to unicode

              r11946
            
          return py3compat.decode(ElementTree.tostring(h), 'utf-8')

        MinRK
    
allow extra pandoc args

              r11293
            
        Jonathan Frederic
    
Moved more code to Strings utilities file

              r10434
            
        Jonathan Frederic
    
Moved add_prompts into correct location.

              r12708
            
      def add_prompts(code, first='>>> ', cont='... '):

          """Add prompts to code snippets"""

          new_code = []

          code_list = code.split('\n')

          new_code.append(first + code_list[0])

          for line in code_list[1:]:

              new_code.append(cont + line)

          return '\n'.join(new_code)

        Jonathan Frederic
    
Moved more code to Strings utilities file

              r10434
            
      def strip_dollars(text):

        Jonathan Frederic
    
Cleanup and refactor of filters

              r10676
            
          """

          Remove all dollar symbols from text

          Parameters

          ----------

          text : str

              Text to remove dollars from

          """

        Jonathan Frederic
    
Moved more code to Strings utilities file

              r10434
            
        Jonathan Frederic
    
Post code-review, extended refactor.

              r10485
            
          return text.strip('$')

        jakobgager
    
Small latex mods: Escapes, Headings, Equations...

              r10882
            
        MinRK
    
update patterns for strip_files filter...

              r13868
            
      files_url_pattern = re.compile(r'(src|href)\=([\'"]?)/?files/')

      markdown_url_pattern = re.compile(r'(!?)\[(?P<caption>.*?)\]\(/?files/(?P<location>.*?)\)')

        MinRK
    
fix `file/` URL replacements in nbconvert

              r11202
            
        Jonathan Frederic
    
Filter names cleanup

              r11685
            
      def strip_files_prefix(text):

        Jonathan Frederic
    
Cleanup and refactor of filters

              r10676
            
          """

        Peter Davis
    
`strip_files_prefix` now also strips markdown style links, `latex_base` updated to include filter

              r13593
            
          Fix all fake URLs that start with `files/`, stripping out the `files/` prefix.

          Applies to both urls (for html) and relative paths (for markdown paths).

        Jonathan Frederic
    
Cleanup and refactor of filters

              r10676
            
          Parameters

          ----------

          text : str

        MinRK
    
fix `file/` URL replacements in nbconvert

              r11202
            
              Text in which to replace 'src="files/real...' with 'src="real...'

        Jonathan Frederic
    
Cleanup and refactor of filters

              r10676
            
          """

        Peter Davis
    
`strip_files_prefix` now also strips markdown style links, `latex_base` updated to include filter

              r13593
            
          cleaned_text = files_url_pattern.sub(r"\1=\2", text)

        MinRK
    
update patterns for strip_files filter...

              r13868
            
          cleaned_text = markdown_url_pattern.sub(r'\1[\2](\3)', cleaned_text)

        Peter Davis
    
`strip_files_prefix` now also strips markdown style links, `latex_base` updated to include filter

              r13593
            
          return cleaned_text

        Jonathan Frederic
    
Post code-review, extended refactor.

              r10485
            
        Jonathan Frederic
    
Filter names cleanup

              r11685
            
      def comment_lines(text, prefix='# '):

        Jonathan Frederic
    
Cleanup and refactor of filters

              r10676
            
          """

          Build a Python comment line from input text.

          Parameters

          ----------

          text : str

              Text to comment out.

        Jonathan Frederic
    
Filter names cleanup

              r11685
            
          prefix : str

              Character to append to the start of each line.

        Jonathan Frederic
    
Cleanup and refactor of filters

              r10676
            
          """

          #Replace line breaks with line breaks and comment symbols.

          #Also add a comment symbol at the beginning to comment out

          #the first line.

        Jonathan Frederic
    
Filter names cleanup

              r11685
            
          return prefix + ('\n'+prefix).join(text.split('\n')) 

        Jonathan Frederic
    
Cleanup and refactor of filters

              r10676
            
        Jonathan Frederic
    
Post code-review, extended refactor.

              r10485
            
        Jonathan Frederic
    
Cleanup and refactor of filters

              r10676
            
      def get_lines(text, start=None,end=None):

        Jonathan Frederic
    
Post code-review, extended refactor.

              r10485
            
          """

          Split the input text into separate lines and then return the 

          lines that the caller is interested in.

        Jonathan Frederic
    
Cleanup and refactor of filters

              r10676
            
          Parameters

          ----------

          text : str

              Text to parse lines from.

          start : int, optional

              First line to grab from.

          end : int, optional

              Last line to grab from.

        Jonathan Frederic
    
Post code-review, extended refactor.

              r10485
            
          """

          # Split the input into lines.

        Jonathan Frederic
    
Cleanup and refactor of filters

              r10676
            
          lines = text.split("\n")

        Jonathan Frederic
    
Post code-review, extended refactor.

              r10485
            
          # Return the right lines.

          return "\n".join(lines[start:end]) #re-join

        MinRK
    
convert IPython syntax to Python syntax in nbconvert python template...

              r11711
            
      def ipython2python(code):

          """Transform IPython syntax to pure Python syntax

          Parameters

          ----------

          code : str

              IPython code, to be transformed to pure Python

          """

          shell = InteractiveShell.instance()

          return shell.input_transformer_manager.transform_cell(code)

        MinRK
    
add posix_path filter...

              r11972
            
      def posix_path(path):

          """Turn a path into posix-style path/to/etc

          Mainly for use in latex on Windows,

          where native Windows paths are not allowed.

          """

          if os.path.sep != '/':

              return path.replace(os.path.sep, '/')

          return path

        MinRK
    
fix markdown images...

              r12451
            
      def path2url(path):

          """Turn a file path into a URL"""

          parts = path.split(os.path.sep)

          return '/'.join(quote(part) for part in parts)

        MinRK
    
add ascii_only filter

              r15434
            
      def ascii_only(s):

          """ensure a string is ascii"""

          s = py3compat.cast_unicode(s)

        Benjamin ABEL
    
Fix issue #5877 with tests...

              r18427
            
          return s.encode('ascii', 'replace').decode('ascii')

      def prevent_list_blocks(s):

          """

          Prevent presence of enumerate or itemize blocks in latex headings cells

          """

          out = re.sub('(^\s*\d*)\.', '\\1\.', s)

          out = re.sub('(^\s*)\-', '\\1\-', out)

          out = re.sub('(^\s*)\+', '\\1\+', out)

          out = re.sub('(^\s*)\*', '\\1\*', out)

          return out

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

MinRK add html_text and add_anchor filters...	r11302	# coding: utf-8
Jonathan Frederic Cleanup and refactor of filters	r10676	"""String filters.
Jonathan Frederic Moved wrap code into Strings utility file.	r10433
Jonathan Frederic Cleanup and refactor of filters	r10676	Contains a collection of useful string manipulation filters for use in Jinja
		templates.
Jonathan Frederic Moved wrap code into Strings utility file.	r10433	"""

MinRK remove heading cells in v4	r18596	# Copyright (c) IPython Development Team.
		# Distributed under the terms of the Modified BSD License.
Jonathan Frederic Moved wrap code into Strings utility file.	r10433
MinRK add posix_path filter...	r11972	import os
MinRK fix `file/` URL replacements in nbconvert	r11202	import re
Jonathan Frederic Cleanup and refactor of filters	r10676	import textwrap
Thomas Kluyver Fixes for nbconvert under Python 3	r13397	try:
		from urllib.parse import quote # Py 3
		except ImportError:
		from urllib2 import quote # Py 2
MinRK add html_text and add_anchor filters...	r11302	from xml.etree import ElementTree
MinRK convert IPython syntax to Python syntax in nbconvert python template...	r11711
		from IPython.core.interactiveshell import InteractiveShell
MinRK add html_text and add_anchor filters...	r11302	from IPython.utils import py3compat
Brian E. Granger Fixing import for nbconvert.	r11089
Jonathan Frederic Cleanup and refactor of filters	r10676
Brian E. Granger Fixing import logic.	r11088	__all__ = [
Jonathan Frederic Filter names cleanup	r11685	'wrap_text',
		'html2text',
MinRK add html_text and add_anchor filters...	r11302	'add_anchor',
Brian E. Granger Fixing import logic.	r11088	'strip_dollars',
Jonathan Frederic Filter names cleanup	r11685	'strip_files_prefix',
		'comment_lines',
MinRK convert IPython syntax to Python syntax in nbconvert python template...	r11711	'get_lines',
		'ipython2python',
MinRK add posix_path filter...	r11972	'posix_path',
MinRK fix markdown images...	r12451	'path2url',
MinRK add ascii_only filter	r15434	'add_prompts',
		'ascii_only',
Benjamin ABEL Fix issue #5877 with tests...	r18427	'prevent_list_blocks',
Brian E. Granger Fixing import logic.	r11088	]


Jonathan Frederic Filter names cleanup	r11685	def wrap_text(text, width=100):
Jonathan Frederic Cleanup and refactor of filters	r10676	"""
		Intelligently wrap text.
		Wrap text without breaking words if possible.

		Parameters
		----------
		text : str
		Text to wrap.
		width : int, optional
		Number of characters to wrap to, default 100.
		"""
Jonathan Frederic Moved wrap code into Strings utility file.	r10433
Jonathan Frederic Cleanup and refactor of filters	r10676	split_text = text.split('\n')
		wrp = map(lambda x:textwrap.wrap(x,width), split_text)
Jonathan Frederic Moved wrap code into Strings utility file.	r10433	wrpd = map('\n'.join, wrp)
Jonathan Frederic Moved more code to Strings utilities file	r10434	return '\n'.join(wrpd)

MinRK add html_text and add_anchor filters...	r11302
Jonathan Frederic Filter names cleanup	r11685	def html2text(element):
MinRK add html_text and add_anchor filters...	r11302	"""extract inner text from html

		Analog of jQuery's $(element).text()
		"""
Jonathan Frederic Fixes for Py3.3	r11547	if isinstance(element, py3compat.string_types):
MinRK don't do anything if add_anchor fails	r13844	try:
		element = ElementTree.fromstring(element)
		except Exception:
		# failed to parse, just return it unmodified
		return element
MinRK add html_text and add_anchor filters...	r11302
		text = element.text or ""
		for child in element:
Jonathan Frederic Filter names cleanup	r11685	text += html2text(child)
MinRK add html_text and add_anchor filters...	r11302	text += (element.tail or "")
		return text


		def add_anchor(html):
MinRK remove heading cells in v4	r18596	"""Add an anchor-link to an html header
MinRK allow extra pandoc args	r11293
MinRK remove heading cells in v4	r18596	For use on markdown headings
MinRK allow extra pandoc args	r11293	"""
MinRK don't do anything if add_anchor fails	r13844	try:
		h = ElementTree.fromstring(py3compat.cast_bytes_py2(html, encoding='utf-8'))
		except Exception:
		# failed to parse, just return it unmodified
		return html
Jonathan Frederic Filter names cleanup	r11685	link = html2text(h).replace(' ', '-')
MinRK add html_text and add_anchor filters...	r11302	h.set('id', link)
		a = ElementTree.Element("a", {"class" : "anchor-link", "href" : "#" + link})
		a.text = u'¶'
		h.append(a)
Jonathan Frederic Moved add_anchor bytes-strings fix into add_anchor	r11927
		# Known issue of Python3.x, ElementTree.tostring() returns a byte string
		# instead of a text string. See issue http://bugs.python.org/issue10942
		# Workaround is to make sure the bytes are casted to a string.
Jonathan Frederic Simplify decode to unicode	r11946	return py3compat.decode(ElementTree.tostring(h), 'utf-8')
MinRK allow extra pandoc args	r11293
Jonathan Frederic Moved more code to Strings utilities file	r10434
Jonathan Frederic Moved add_prompts into correct location.	r12708	def add_prompts(code, first='>>> ', cont='... '):
		"""Add prompts to code snippets"""
		new_code = []
		code_list = code.split('\n')
		new_code.append(first + code_list[0])
		for line in code_list[1:]:
		new_code.append(cont + line)
		return '\n'.join(new_code)


Jonathan Frederic Moved more code to Strings utilities file	r10434	def strip_dollars(text):
Jonathan Frederic Cleanup and refactor of filters	r10676	"""
		Remove all dollar symbols from text

		Parameters
		----------
		text : str
		Text to remove dollars from
		"""
Jonathan Frederic Moved more code to Strings utilities file	r10434
Jonathan Frederic Post code-review, extended refactor.	r10485	return text.strip('$')

jakobgager Small latex mods: Escapes, Headings, Equations...	r10882
MinRK update patterns for strip_files filter...	r13868	files_url_pattern = re.compile(r'(src\|href)\=([\'"]?)/?files/')
		markdown_url_pattern = re.compile(r'(!?)\[(?P<caption>.?)\]\(/?files/(?P<location>.?)\)')
MinRK fix `file/` URL replacements in nbconvert	r11202
Jonathan Frederic Filter names cleanup	r11685	def strip_files_prefix(text):
Jonathan Frederic Cleanup and refactor of filters	r10676	"""
Peter Davis `strip_files_prefix` now also strips markdown style links, `latex_base` updated to include filter	r13593	Fix all fake URLs that start with `files/`, stripping out the `files/` prefix.
		Applies to both urls (for html) and relative paths (for markdown paths).
Jonathan Frederic Cleanup and refactor of filters	r10676
		Parameters
		----------
		text : str
MinRK fix `file/` URL replacements in nbconvert	r11202	Text in which to replace 'src="files/real...' with 'src="real...'
Jonathan Frederic Cleanup and refactor of filters	r10676	"""
Peter Davis `strip_files_prefix` now also strips markdown style links, `latex_base` updated to include filter	r13593	cleaned_text = files_url_pattern.sub(r"\1=\2", text)
MinRK update patterns for strip_files filter...	r13868	cleaned_text = markdown_url_pattern.sub(r'\1[\2](\3)', cleaned_text)
Peter Davis `strip_files_prefix` now also strips markdown style links, `latex_base` updated to include filter	r13593	return cleaned_text
Jonathan Frederic Post code-review, extended refactor.	r10485

Jonathan Frederic Filter names cleanup	r11685	def comment_lines(text, prefix='# '):
Jonathan Frederic Cleanup and refactor of filters	r10676	"""
		Build a Python comment line from input text.

		Parameters
		----------
		text : str
		Text to comment out.
Jonathan Frederic Filter names cleanup	r11685	prefix : str
		Character to append to the start of each line.
Jonathan Frederic Cleanup and refactor of filters	r10676	"""

		#Replace line breaks with line breaks and comment symbols.
		#Also add a comment symbol at the beginning to comment out
		#the first line.
Jonathan Frederic Filter names cleanup	r11685	return prefix + ('\n'+prefix).join(text.split('\n'))
Jonathan Frederic Cleanup and refactor of filters	r10676
Jonathan Frederic Post code-review, extended refactor.	r10485
Jonathan Frederic Cleanup and refactor of filters	r10676	def get_lines(text, start=None,end=None):
Jonathan Frederic Post code-review, extended refactor.	r10485	"""
		Split the input text into separate lines and then return the
		lines that the caller is interested in.
Jonathan Frederic Cleanup and refactor of filters	r10676
		Parameters
		----------
		text : str
		Text to parse lines from.
		start : int, optional
		First line to grab from.
		end : int, optional
		Last line to grab from.
Jonathan Frederic Post code-review, extended refactor.	r10485	"""

		# Split the input into lines.
Jonathan Frederic Cleanup and refactor of filters	r10676	lines = text.split("\n")
Jonathan Frederic Post code-review, extended refactor.	r10485
		# Return the right lines.
		return "\n".join(lines[start:end]) #re-join
MinRK convert IPython syntax to Python syntax in nbconvert python template...	r11711
		def ipython2python(code):
		"""Transform IPython syntax to pure Python syntax

		Parameters
		----------

		code : str
		IPython code, to be transformed to pure Python
		"""
		shell = InteractiveShell.instance()
		return shell.input_transformer_manager.transform_cell(code)
MinRK add posix_path filter...	r11972
		def posix_path(path):
		"""Turn a path into posix-style path/to/etc

		Mainly for use in latex on Windows,
		where native Windows paths are not allowed.
		"""
		if os.path.sep != '/':
		return path.replace(os.path.sep, '/')
		return path
MinRK fix markdown images...	r12451
		def path2url(path):
		"""Turn a file path into a URL"""
		parts = path.split(os.path.sep)
		return '/'.join(quote(part) for part in parts)
MinRK add ascii_only filter	r15434
		def ascii_only(s):
		"""ensure a string is ascii"""
		s = py3compat.cast_unicode(s)
Benjamin ABEL Fix issue #5877 with tests...	r18427	return s.encode('ascii', 'replace').decode('ascii')

		def prevent_list_blocks(s):
		"""
		Prevent presence of enumerate or itemize blocks in latex headings cells
		"""
		out = re.sub('(^\s\d)\.', '\\1\.', s)
		out = re.sub('(^\s*)\-', '\\1\-', out)
		out = re.sub('(^\s*)\+', '\\1\+', out)
		out = re.sub('(^\s)\', '\\1\*', out)
		return out