upstream/ipython Commit - r13884:730129d3

Merge pull request from minrk/etree-fail...

Matthias Bussonnier -

r13884:730129d3

parent child

IPython/nbconvert/filters/strings.py

0 +10 -2

             # coding: utf-8
             """String filters.
             Contains a collection of useful string manipulation filters for use in Jinja
             templates.
             """
             #-----------------------------------------------------------------------------
             # Copyright (c) 2013, the IPython Development Team.
             #
             # Distributed under the terms of the Modified BSD License.
             #
             # The full license is in the file COPYING.txt, distributed with this software.
             #-----------------------------------------------------------------------------
             #-----------------------------------------------------------------------------
             # Imports
             #-----------------------------------------------------------------------------
             import os
             import re
             import textwrap
             try:
                 from urllib.parse import quote  # Py 3
             except ImportError:
                 from urllib2 import quote  # Py 2
             from xml.etree import ElementTree
             from IPython.core.interactiveshell import InteractiveShell
             from IPython.utils import py3compat
             #-----------------------------------------------------------------------------
             # Functions
             #-----------------------------------------------------------------------------
             __all__ = [
                 'wrap_text',
                 'html2text',
                 'add_anchor',
                 'strip_dollars',
                 'strip_files_prefix',
                 'comment_lines',
                 'get_lines',
                 'ipython2python',
                 'posix_path',
                 'path2url',
                 'add_prompts'
             ]
             def wrap_text(text, width=100):
                 """
                 Intelligently wrap text.
                 Wrap text without breaking words if possible.
                 Parameters
                 ----------
                 text : str
                     Text to wrap.
                 width : int, optional
                     Number of characters to wrap to, default 100.
                 """
                 split_text = text.split('\n')
                 wrp = map(lambda x:textwrap.wrap(x,width), split_text)
                 wrpd = map('\n'.join, wrp)
                 return '\n'.join(wrpd)
             def html2text(element):
                 """extract inner text from html
                 Analog of jQuery's $(element).text()
                 """
                 if isinstance(element, py3compat.string_types):
-                    element = ElementTree.fromstring(element)
+                    try:
+                        element = ElementTree.fromstring(element)
+                    except Exception:
+                        # failed to parse, just return it unmodified
+                        return element
                 text = element.text or ""
                 for child in element:
                     text += html2text(child)
                 text += (element.tail or "")
                 return text
             def add_anchor(html):
                 """Add an anchor-link to an html header tag
                 For use in heading cells
                 """
-                h = ElementTree.fromstring(py3compat.cast_bytes_py2(html, encoding='utf-8'))
+                try:
+                    h = ElementTree.fromstring(py3compat.cast_bytes_py2(html, encoding='utf-8'))
+                except Exception:
+                    # failed to parse, just return it unmodified
+                    return html
                 link = html2text(h).replace(' ', '-')
                 h.set('id', link)
                 a = ElementTree.Element("a", {"class" : "anchor-link", "href" : "#" + link})
                 a.text = u'¶'
                 h.append(a)
                 # Known issue of Python3.x, ElementTree.tostring() returns a byte string
                 # instead of a text string.  See issue http://bugs.python.org/issue10942
                 # Workaround is to make sure the bytes are casted to a string.
                 return py3compat.decode(ElementTree.tostring(h), 'utf-8')
             def add_prompts(code, first='>>> ', cont='... '):
                 """Add prompts to code snippets"""
                 new_code = []
                 code_list = code.split('\n')
                 new_code.append(first + code_list[0])
                 for line in code_list[1:]:
                     new_code.append(cont + line)
                 return '\n'.join(new_code)
             def strip_dollars(text):
                 """
                 Remove all dollar symbols from text
                 Parameters
                 ----------
                 text : str
                     Text to remove dollars from
                 """
                 return text.strip('$')
             files_url_pattern = re.compile(r'(src|href)\=([\'"]?)/?files/')
             markdown_url_pattern = re.compile(r'(!?)\[(?P<caption>.*?)\]\(/?files/(?P<location>.*?)\)')
             def strip_files_prefix(text):
                 """
                 Fix all fake URLs that start with `files/`, stripping out the `files/` prefix.
                 Applies to both urls (for html) and relative paths (for markdown paths).
                 Parameters
                 ----------
                 text : str
                     Text in which to replace 'src="files/real...' with 'src="real...'
                 """
                 cleaned_text = files_url_pattern.sub(r"\1=\2", text)
                 cleaned_text = markdown_url_pattern.sub(r'\1[\2](\3)', cleaned_text)
                 return cleaned_text
             def comment_lines(text, prefix='# '):
                 """
                 Build a Python comment line from input text.
                 Parameters
                 ----------
                 text : str
                     Text to comment out.
                 prefix : str
                     Character to append to the start of each line.
                 """
                 #Replace line breaks with line breaks and comment symbols.
                 #Also add a comment symbol at the beginning to comment out
                 #the first line.
                 return prefix + ('\n'+prefix).join(text.split('\n'))
             def get_lines(text, start=None,end=None):
                 """
                 Split the input text into separate lines and then return the
                 lines that the caller is interested in.
                 Parameters
                 ----------
                 text : str
                     Text to parse lines from.
                 start : int, optional
                     First line to grab from.
                 end : int, optional
                     Last line to grab from.
                 """
                 # Split the input into lines.
                 lines = text.split("\n")
                 # Return the right lines.
                 return "\n".join(lines[start:end]) #re-join
             def ipython2python(code):
                 """Transform IPython syntax to pure Python syntax
                 Parameters
                 ----------
                 code : str
                     IPython code, to be transformed to pure Python
                 """
                 shell = InteractiveShell.instance()
                 return shell.input_transformer_manager.transform_cell(code)
             def posix_path(path):
                 """Turn a path into posix-style path/to/etc
                 Mainly for use in latex on Windows,
                 where native Windows paths are not allowed.
                 """
                 if os.path.sep != '/':
                     return path.replace(os.path.sep, '/')
                 return path
             def path2url(path):
                 """Turn a file path into a URL"""
                 parts = path.split(os.path.sep)
                 return '/'.join(quote(part) for part in parts)

IPython/nbconvert/filters/tests/test_strings.py

0 +6 -1

             """
             Module with tests for Strings
             """
             #-----------------------------------------------------------------------------
             # Copyright (c) 2013, the IPython Development Team.
             #
             # Distributed under the terms of the Modified BSD License.
             #
             # The full license is in the file COPYING.txt, distributed with this software.
             #-----------------------------------------------------------------------------
             #-----------------------------------------------------------------------------
             # Imports
             #-----------------------------------------------------------------------------
             import os
             from ...tests.base import TestsBase
             from ..strings import (wrap_text, html2text, add_anchor, strip_dollars,
                 strip_files_prefix, get_lines, comment_lines, ipython2python, posix_path,
                 add_prompts
             )
             #-----------------------------------------------------------------------------
             # Class
             #-----------------------------------------------------------------------------
             class TestStrings(TestsBase):
                 def test_wrap_text(self):
                     """wrap_text test"""
                     test_text = """
                     Tush! never tell me; I take it much unkindly
                     That thou, Iago, who hast had my purse
                     As if the strings were thine, shouldst know of this.
                     """
                     for length in [30,5,1]:
                         self._confirm_wrap_text(test_text, length)
                 def _confirm_wrap_text(self, text, length):
                     for line in wrap_text(text, length).split('\n'):
                         assert len(line) <= length
                 def test_html2text(self):
                     """html2text test"""
                     #TODO: More tests
                     self.assertEqual(html2text('<name>joe</name>'), 'joe')
                 def test_add_anchor(self):
                     """add_anchor test"""
                     #TODO: More tests
                     results = add_anchor('<b>Hello World!</b>')
                     assert 'Hello World!' in results
                     assert 'id="' in results
                     assert 'class="anchor-link"' in results
                     assert '<b' in results
                     assert '</b>' in results
+                def test_add_anchor_fail(self):
+                    """add_anchor does nothing when it fails"""
+                    html = '<h1>Hello <br>World!</h1>'
+                    results = add_anchor(html)
+                    self.assertEqual(html, results)
                 def test_strip_dollars(self):
                     """strip_dollars test"""
                     tests = [
                         ('', ''),
                         ('$$', ''),
                         ('$H$', 'H'),
                         ('$He', 'He'),
                         ('H$el', 'H$el'),
                         ('Hell$', 'Hell'),
                         ('Hello', 'Hello'),
                         ('W$o$rld', 'W$o$rld')]
                     for test in tests:
                         self._try_strip_dollars(test[0], test[1])
                 def _try_strip_dollars(self, test, result):
                     self.assertEqual(strip_dollars(test), result)
                 def test_strip_files_prefix(self):
                     """strip_files_prefix test"""
                     tests = [
                         ('', ''),
                         ('/files', '/files'),
                         ('test="/files"', 'test="/files"'),
                         ('My files are in `files/`', 'My files are in `files/`'),
                         ('<a href="files/test.html">files/test.html</a>', '<a href="test.html">files/test.html</a>'),
                         ('<a href="/files/test.html">files/test.html</a>', '<a href="test.html">files/test.html</a>'),
                         ("<a href='files/test.html'>files/test.html</a>", "<a href='test.html'>files/test.html</a>"),
                         ('<img src="files/url/location.gif">', '<img src="url/location.gif">'),
                         ('<img src="/files/url/location.gif">', '<img src="url/location.gif">'),
                         ('hello![caption]', 'hello![caption]'),
                         ('hello![caption](/url/location.gif)', 'hello![caption](/url/location.gif)'),
                         ('hello![caption](url/location.gif)', 'hello![caption](url/location.gif)'),
                         ('hello![caption](url/location.gif)', 'hello![caption](url/location.gif)'),
                         ('hello![caption](files/url/location.gif)', 'hello![caption](url/location.gif)'),
                         ('hello![caption](/files/url/location.gif)', 'hello![caption](url/location.gif)'),
                         ('hello [text](/files/url/location.gif)', 'hello [text](url/location.gif)'),
                         ('hello [text space](files/url/location.gif)', 'hello [text space](url/location.gif)'),
                     ]
                     for test in tests:
                         self._try_files_prefix(test[0], test[1])
                 def _try_files_prefix(self, test, result):
                     self.assertEqual(strip_files_prefix(test), result)
                 def test_comment_lines(self):
                     """comment_lines test"""
                     for line in comment_lines('hello\nworld\n!').split('\n'):
                         assert line.startswith('# ')
                     for line in comment_lines('hello\nworld\n!', 'beep').split('\n'):
                         assert line.startswith('beep')
                 def test_get_lines(self):
                     """get_lines test"""
                     text = "hello\nworld\n!"
                     self.assertEqual(get_lines(text, start=1), "world\n!")
                     self.assertEqual(get_lines(text, end=2), "hello\nworld")
                     self.assertEqual(get_lines(text, start=2, end=5), "!")
                     self.assertEqual(get_lines(text, start=-2), "world\n!")
                 def test_ipython2python(self):
                     """ipython2python test"""
                     #TODO: More tests
                     results = ipython2python(u'%%pylab\nprint("Hello-World")').replace("u'", "'")
                     self.fuzzy_compare(results, u"get_ipython().run_cell_magic('pylab', '', 'print(\"Hello-World\")')",
                         ignore_spaces=True, ignore_newlines=True)
                 def test_posix_path(self):
                     """posix_path test"""
                     path_list = ['foo', 'bar']
                     expected = '/'.join(path_list)
                     native = os.path.join(*path_list)
                     filtered = posix_path(native)
                     self.assertEqual(filtered, expected)
                 def test_add_prompts(self):
                     """add_prompts test"""
                     text1 = """for i in range(10):\n  i += 1\n  print i"""
                     text2 = """>>> for i in range(10):\n...   i += 1\n...   print i"""
                     self.assertEqual(text2, add_prompts(text1))

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages