upstream/ipython Commit - r13844:f7f20b52

don't do anything if add_anchor fails

MinRK -

r13844:f7f20b52

parent child

IPython/nbconvert/filters/strings.py

0 +8 0

              # coding: utf-8
              """String filters.
              Contains a collection of useful string manipulation filters for use in Jinja
              templates.
              """
              #-----------------------------------------------------------------------------
              # Copyright (c) 2013, the IPython Development Team.
              #
              # Distributed under the terms of the Modified BSD License.
              #
              # The full license is in the file COPYING.txt, distributed with this software.
              #-----------------------------------------------------------------------------
              #-----------------------------------------------------------------------------
              # Imports
              #-----------------------------------------------------------------------------
              import os
              import re
              import textwrap
              try:
                  from urllib.parse import quote  # Py 3
              except ImportError:
                  from urllib2 import quote  # Py 2
              from xml.etree import ElementTree
              from IPython.core.interactiveshell import InteractiveShell
              from IPython.utils import py3compat
              #-----------------------------------------------------------------------------
              # Functions
              #-----------------------------------------------------------------------------
              __all__ = [
                  'wrap_text',
                  'html2text',
                  'add_anchor',
                  'strip_dollars',
                  'strip_files_prefix',
                  'comment_lines',
                  'get_lines',
                  'ipython2python',
                  'posix_path',
                  'path2url',
                  'add_prompts'
              ]
              def wrap_text(text, width=100):
                  """
                  Intelligently wrap text.
                  Wrap text without breaking words if possible.
                  Parameters
                  ----------
                  text : str
                      Text to wrap.
                  width : int, optional
                      Number of characters to wrap to, default 100.
                  """
                  split_text = text.split('\n')
                  wrp = map(lambda x:textwrap.wrap(x,width), split_text)
                  wrpd = map('\n'.join, wrp)
                  return '\n'.join(wrpd)
              def html2text(element):
                  """extract inner text from html
                  Analog of jQuery's $(element).text()
                  """
                  if isinstance(element, py3compat.string_types):
+                     try:
                      element = ElementTree.fromstring(element)
+                     except Exception:
+                         # failed to parse, just return it unmodified
+                         return element
                  text = element.text or ""
                  for child in element:
                      text += html2text(child)
                  text += (element.tail or "")
                  return text
              def add_anchor(html):
                  """Add an anchor-link to an html header tag
                  For use in heading cells
                  """
+                 try:
                  h = ElementTree.fromstring(py3compat.cast_bytes_py2(html, encoding='utf-8'))
+                 except Exception:
+                     # failed to parse, just return it unmodified
+                     return html
                  link = html2text(h).replace(' ', '-')
                  h.set('id', link)
                  a = ElementTree.Element("a", {"class" : "anchor-link", "href" : "#" + link})
                  a.text = u'¶'
                  h.append(a)
                  # Known issue of Python3.x, ElementTree.tostring() returns a byte string
                  # instead of a text string.  See issue http://bugs.python.org/issue10942
                  # Workaround is to make sure the bytes are casted to a string.
                  return py3compat.decode(ElementTree.tostring(h), 'utf-8')
              def add_prompts(code, first='>>> ', cont='... '):
                  """Add prompts to code snippets"""
                  new_code = []
                  code_list = code.split('\n')
                  new_code.append(first + code_list[0])
                  for line in code_list[1:]:
                      new_code.append(cont + line)
                  return '\n'.join(new_code)
              def strip_dollars(text):
                  """
                  Remove all dollar symbols from text
                  Parameters
                  ----------
                  text : str
                      Text to remove dollars from
                  """
                  return text.strip('$')
              files_url_pattern = re.compile(r'(src|href)\=([\'"]?)files/')
              def strip_files_prefix(text):
                  """
                  Fix all fake URLs that start with `files/`,
                  stripping out the `files/` prefix.
                  Parameters
                  ----------
                  text : str
                      Text in which to replace 'src="files/real...' with 'src="real...'
                  """
                  return files_url_pattern.sub(r"\1=\2", text)
              def comment_lines(text, prefix='# '):
                  """
                  Build a Python comment line from input text.
                  Parameters
                  ----------
                  text : str
                      Text to comment out.
                  prefix : str
                      Character to append to the start of each line.
                  """
                  #Replace line breaks with line breaks and comment symbols.
                  #Also add a comment symbol at the beginning to comment out
                  #the first line.
                  return prefix + ('\n'+prefix).join(text.split('\n'))
              def get_lines(text, start=None,end=None):
                  """
                  Split the input text into separate lines and then return the
                  lines that the caller is interested in.
                  Parameters
                  ----------
                  text : str
                      Text to parse lines from.
                  start : int, optional
                      First line to grab from.
                  end : int, optional
                      Last line to grab from.
                  """
                  # Split the input into lines.
                  lines = text.split("\n")
                  # Return the right lines.
                  return "\n".join(lines[start:end]) #re-join
              def ipython2python(code):
                  """Transform IPython syntax to pure Python syntax
                  Parameters
                  ----------
                  code : str
                      IPython code, to be transformed to pure Python
                  """
                  shell = InteractiveShell.instance()
                  return shell.input_transformer_manager.transform_cell(code)
              def posix_path(path):
                  """Turn a path into posix-style path/to/etc
                  Mainly for use in latex on Windows,
                  where native Windows paths are not allowed.
                  """
                  if os.path.sep != '/':
                      return path.replace(os.path.sep, '/')
                  return path
              def path2url(path):
                  """Turn a file path into a URL"""
                  parts = path.split(os.path.sep)
                  return '/'.join(quote(part) for part in parts)

IPython/nbconvert/filters/tests/test_strings.py

0 +5 0

              """
              Module with tests for Strings
              """
              #-----------------------------------------------------------------------------
              # Copyright (c) 2013, the IPython Development Team.
              #
              # Distributed under the terms of the Modified BSD License.
              #
              # The full license is in the file COPYING.txt, distributed with this software.
              #-----------------------------------------------------------------------------
              #-----------------------------------------------------------------------------
              # Imports
              #-----------------------------------------------------------------------------
              import os
              from ...tests.base import TestsBase
              from ..strings import (wrap_text, html2text, add_anchor, strip_dollars,
                  strip_files_prefix, get_lines, comment_lines, ipython2python, posix_path,
                  add_prompts
              )
              #-----------------------------------------------------------------------------
              # Class
              #-----------------------------------------------------------------------------
              class TestStrings(TestsBase):
                  def test_wrap_text(self):
                      """wrap_text test"""
                      test_text = """
                      Tush! never tell me; I take it much unkindly
                      That thou, Iago, who hast had my purse
                      As if the strings were thine, shouldst know of this.
                      """
                      for length in [30,5,1]:
                          self._confirm_wrap_text(test_text, length)
                  def _confirm_wrap_text(self, text, length):
                      for line in wrap_text(text, length).split('\n'):
                          assert len(line) <= length
                  def test_html2text(self):
                      """html2text test"""
                      #TODO: More tests
                      self.assertEqual(html2text('<name>joe</name>'), 'joe')
                  def test_add_anchor(self):
                      """add_anchor test"""
                      #TODO: More tests
                      results = add_anchor('<b>Hello World!</b>')
                      assert 'Hello World!' in results
                      assert 'id="' in results
                      assert 'class="anchor-link"' in results
                      assert '<b' in results
                      assert '</b>' in results
+                 def test_add_anchor_fail(self):
+                     """add_anchor does nothing when it fails"""
+                     html = '<h1>Hello <br>World!</h1>'
+                     results = add_anchor(html)
+                     self.assertEqual(html, results)
                  def test_strip_dollars(self):
                      """strip_dollars test"""
                      tests = [
                          ('', ''),
                          ('$$', ''),
                          ('$H$', 'H'),
                          ('$He', 'He'),
                          ('H$el', 'H$el'),
                          ('Hell$', 'Hell'),
                          ('Hello', 'Hello'),
                          ('W$o$rld', 'W$o$rld')]
                      for test in tests:
                          self._try_strip_dollars(test[0], test[1])
                  def _try_strip_dollars(self, test, result):
                      self.assertEqual(strip_dollars(test), result)
                  def test_strip_files_prefix(self):
                      """strip_files_prefix test"""
                      tests = [
                          ('', ''),
                          ('/files', '/files'),
                          ('test="/files"', 'test="/files"'),
                          ('My files are in `files/`', 'My files are in `files/`'),
                          ('<a href="files/test.html">files/test.html</a>', '<a href="test.html">files/test.html</a>')]
                      for test in tests:
                          self._try_files_prefix(test[0], test[1])
                  def _try_files_prefix(self, test, result):
                      self.assertEqual(strip_files_prefix(test), result)
                  def test_comment_lines(self):
                      """comment_lines test"""
                      for line in comment_lines('hello\nworld\n!').split('\n'):
                          assert line.startswith('# ')
                      for line in comment_lines('hello\nworld\n!', 'beep').split('\n'):
                          assert line.startswith('beep')
                  def test_get_lines(self):
                      """get_lines test"""
                      text = "hello\nworld\n!"
                      self.assertEqual(get_lines(text, start=1), "world\n!")
                      self.assertEqual(get_lines(text, end=2), "hello\nworld")
                      self.assertEqual(get_lines(text, start=2, end=5), "!")
                      self.assertEqual(get_lines(text, start=-2), "world\n!")
                  def test_ipython2python(self):
                      """ipython2python test"""
                      #TODO: More tests
                      results = ipython2python(u'%%pylab\nprint("Hello-World")').replace("u'", "'")
                      self.fuzzy_compare(results, u"get_ipython().run_cell_magic('pylab', '', 'print(\"Hello-World\")')",
                          ignore_spaces=True, ignore_newlines=True)
                  def test_posix_path(self):
                      """posix_path test"""
                      path_list = ['foo', 'bar']
                      expected = '/'.join(path_list)
                      native = os.path.join(*path_list)
                      filtered = posix_path(native)
                      self.assertEqual(filtered, expected)
                  def test_add_prompts(self):
                      """add_prompts test"""
                      text1 = """for i in range(10):\n  i += 1\n  print i"""
                      text2 = """>>> for i in range(10):\n...   i += 1\n...   print i"""
                      self.assertEqual(text2, add_prompts(text1))

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages