##// END OF EJS Templates
Merge pull request #4696 from minrk/etree-fail...
Matthias Bussonnier -
r13884:730129d3 merge
parent child Browse files
Show More
@@ -1,207 +1,215 b''
1 # coding: utf-8
1 # coding: utf-8
2 """String filters.
2 """String filters.
3
3
4 Contains a collection of useful string manipulation filters for use in Jinja
4 Contains a collection of useful string manipulation filters for use in Jinja
5 templates.
5 templates.
6 """
6 """
7 #-----------------------------------------------------------------------------
7 #-----------------------------------------------------------------------------
8 # Copyright (c) 2013, the IPython Development Team.
8 # Copyright (c) 2013, the IPython Development Team.
9 #
9 #
10 # Distributed under the terms of the Modified BSD License.
10 # Distributed under the terms of the Modified BSD License.
11 #
11 #
12 # The full license is in the file COPYING.txt, distributed with this software.
12 # The full license is in the file COPYING.txt, distributed with this software.
13 #-----------------------------------------------------------------------------
13 #-----------------------------------------------------------------------------
14
14
15 #-----------------------------------------------------------------------------
15 #-----------------------------------------------------------------------------
16 # Imports
16 # Imports
17 #-----------------------------------------------------------------------------
17 #-----------------------------------------------------------------------------
18
18
19 import os
19 import os
20 import re
20 import re
21 import textwrap
21 import textwrap
22 try:
22 try:
23 from urllib.parse import quote # Py 3
23 from urllib.parse import quote # Py 3
24 except ImportError:
24 except ImportError:
25 from urllib2 import quote # Py 2
25 from urllib2 import quote # Py 2
26 from xml.etree import ElementTree
26 from xml.etree import ElementTree
27
27
28 from IPython.core.interactiveshell import InteractiveShell
28 from IPython.core.interactiveshell import InteractiveShell
29 from IPython.utils import py3compat
29 from IPython.utils import py3compat
30
30
31 #-----------------------------------------------------------------------------
31 #-----------------------------------------------------------------------------
32 # Functions
32 # Functions
33 #-----------------------------------------------------------------------------
33 #-----------------------------------------------------------------------------
34
34
35 __all__ = [
35 __all__ = [
36 'wrap_text',
36 'wrap_text',
37 'html2text',
37 'html2text',
38 'add_anchor',
38 'add_anchor',
39 'strip_dollars',
39 'strip_dollars',
40 'strip_files_prefix',
40 'strip_files_prefix',
41 'comment_lines',
41 'comment_lines',
42 'get_lines',
42 'get_lines',
43 'ipython2python',
43 'ipython2python',
44 'posix_path',
44 'posix_path',
45 'path2url',
45 'path2url',
46 'add_prompts'
46 'add_prompts'
47 ]
47 ]
48
48
49
49
50 def wrap_text(text, width=100):
50 def wrap_text(text, width=100):
51 """
51 """
52 Intelligently wrap text.
52 Intelligently wrap text.
53 Wrap text without breaking words if possible.
53 Wrap text without breaking words if possible.
54
54
55 Parameters
55 Parameters
56 ----------
56 ----------
57 text : str
57 text : str
58 Text to wrap.
58 Text to wrap.
59 width : int, optional
59 width : int, optional
60 Number of characters to wrap to, default 100.
60 Number of characters to wrap to, default 100.
61 """
61 """
62
62
63 split_text = text.split('\n')
63 split_text = text.split('\n')
64 wrp = map(lambda x:textwrap.wrap(x,width), split_text)
64 wrp = map(lambda x:textwrap.wrap(x,width), split_text)
65 wrpd = map('\n'.join, wrp)
65 wrpd = map('\n'.join, wrp)
66 return '\n'.join(wrpd)
66 return '\n'.join(wrpd)
67
67
68
68
69 def html2text(element):
69 def html2text(element):
70 """extract inner text from html
70 """extract inner text from html
71
71
72 Analog of jQuery's $(element).text()
72 Analog of jQuery's $(element).text()
73 """
73 """
74 if isinstance(element, py3compat.string_types):
74 if isinstance(element, py3compat.string_types):
75 element = ElementTree.fromstring(element)
75 try:
76 element = ElementTree.fromstring(element)
77 except Exception:
78 # failed to parse, just return it unmodified
79 return element
76
80
77 text = element.text or ""
81 text = element.text or ""
78 for child in element:
82 for child in element:
79 text += html2text(child)
83 text += html2text(child)
80 text += (element.tail or "")
84 text += (element.tail or "")
81 return text
85 return text
82
86
83
87
84 def add_anchor(html):
88 def add_anchor(html):
85 """Add an anchor-link to an html header tag
89 """Add an anchor-link to an html header tag
86
90
87 For use in heading cells
91 For use in heading cells
88 """
92 """
89 h = ElementTree.fromstring(py3compat.cast_bytes_py2(html, encoding='utf-8'))
93 try:
94 h = ElementTree.fromstring(py3compat.cast_bytes_py2(html, encoding='utf-8'))
95 except Exception:
96 # failed to parse, just return it unmodified
97 return html
90 link = html2text(h).replace(' ', '-')
98 link = html2text(h).replace(' ', '-')
91 h.set('id', link)
99 h.set('id', link)
92 a = ElementTree.Element("a", {"class" : "anchor-link", "href" : "#" + link})
100 a = ElementTree.Element("a", {"class" : "anchor-link", "href" : "#" + link})
93 a.text = u'ΒΆ'
101 a.text = u'ΒΆ'
94 h.append(a)
102 h.append(a)
95
103
96 # Known issue of Python3.x, ElementTree.tostring() returns a byte string
104 # Known issue of Python3.x, ElementTree.tostring() returns a byte string
97 # instead of a text string. See issue http://bugs.python.org/issue10942
105 # instead of a text string. See issue http://bugs.python.org/issue10942
98 # Workaround is to make sure the bytes are casted to a string.
106 # Workaround is to make sure the bytes are casted to a string.
99 return py3compat.decode(ElementTree.tostring(h), 'utf-8')
107 return py3compat.decode(ElementTree.tostring(h), 'utf-8')
100
108
101
109
102 def add_prompts(code, first='>>> ', cont='... '):
110 def add_prompts(code, first='>>> ', cont='... '):
103 """Add prompts to code snippets"""
111 """Add prompts to code snippets"""
104 new_code = []
112 new_code = []
105 code_list = code.split('\n')
113 code_list = code.split('\n')
106 new_code.append(first + code_list[0])
114 new_code.append(first + code_list[0])
107 for line in code_list[1:]:
115 for line in code_list[1:]:
108 new_code.append(cont + line)
116 new_code.append(cont + line)
109 return '\n'.join(new_code)
117 return '\n'.join(new_code)
110
118
111
119
112 def strip_dollars(text):
120 def strip_dollars(text):
113 """
121 """
114 Remove all dollar symbols from text
122 Remove all dollar symbols from text
115
123
116 Parameters
124 Parameters
117 ----------
125 ----------
118 text : str
126 text : str
119 Text to remove dollars from
127 Text to remove dollars from
120 """
128 """
121
129
122 return text.strip('$')
130 return text.strip('$')
123
131
124
132
125 files_url_pattern = re.compile(r'(src|href)\=([\'"]?)/?files/')
133 files_url_pattern = re.compile(r'(src|href)\=([\'"]?)/?files/')
126 markdown_url_pattern = re.compile(r'(!?)\[(?P<caption>.*?)\]\(/?files/(?P<location>.*?)\)')
134 markdown_url_pattern = re.compile(r'(!?)\[(?P<caption>.*?)\]\(/?files/(?P<location>.*?)\)')
127
135
128 def strip_files_prefix(text):
136 def strip_files_prefix(text):
129 """
137 """
130 Fix all fake URLs that start with `files/`, stripping out the `files/` prefix.
138 Fix all fake URLs that start with `files/`, stripping out the `files/` prefix.
131 Applies to both urls (for html) and relative paths (for markdown paths).
139 Applies to both urls (for html) and relative paths (for markdown paths).
132
140
133 Parameters
141 Parameters
134 ----------
142 ----------
135 text : str
143 text : str
136 Text in which to replace 'src="files/real...' with 'src="real...'
144 Text in which to replace 'src="files/real...' with 'src="real...'
137 """
145 """
138 cleaned_text = files_url_pattern.sub(r"\1=\2", text)
146 cleaned_text = files_url_pattern.sub(r"\1=\2", text)
139 cleaned_text = markdown_url_pattern.sub(r'\1[\2](\3)', cleaned_text)
147 cleaned_text = markdown_url_pattern.sub(r'\1[\2](\3)', cleaned_text)
140 return cleaned_text
148 return cleaned_text
141
149
142
150
143 def comment_lines(text, prefix='# '):
151 def comment_lines(text, prefix='# '):
144 """
152 """
145 Build a Python comment line from input text.
153 Build a Python comment line from input text.
146
154
147 Parameters
155 Parameters
148 ----------
156 ----------
149 text : str
157 text : str
150 Text to comment out.
158 Text to comment out.
151 prefix : str
159 prefix : str
152 Character to append to the start of each line.
160 Character to append to the start of each line.
153 """
161 """
154
162
155 #Replace line breaks with line breaks and comment symbols.
163 #Replace line breaks with line breaks and comment symbols.
156 #Also add a comment symbol at the beginning to comment out
164 #Also add a comment symbol at the beginning to comment out
157 #the first line.
165 #the first line.
158 return prefix + ('\n'+prefix).join(text.split('\n'))
166 return prefix + ('\n'+prefix).join(text.split('\n'))
159
167
160
168
161 def get_lines(text, start=None,end=None):
169 def get_lines(text, start=None,end=None):
162 """
170 """
163 Split the input text into separate lines and then return the
171 Split the input text into separate lines and then return the
164 lines that the caller is interested in.
172 lines that the caller is interested in.
165
173
166 Parameters
174 Parameters
167 ----------
175 ----------
168 text : str
176 text : str
169 Text to parse lines from.
177 Text to parse lines from.
170 start : int, optional
178 start : int, optional
171 First line to grab from.
179 First line to grab from.
172 end : int, optional
180 end : int, optional
173 Last line to grab from.
181 Last line to grab from.
174 """
182 """
175
183
176 # Split the input into lines.
184 # Split the input into lines.
177 lines = text.split("\n")
185 lines = text.split("\n")
178
186
179 # Return the right lines.
187 # Return the right lines.
180 return "\n".join(lines[start:end]) #re-join
188 return "\n".join(lines[start:end]) #re-join
181
189
182 def ipython2python(code):
190 def ipython2python(code):
183 """Transform IPython syntax to pure Python syntax
191 """Transform IPython syntax to pure Python syntax
184
192
185 Parameters
193 Parameters
186 ----------
194 ----------
187
195
188 code : str
196 code : str
189 IPython code, to be transformed to pure Python
197 IPython code, to be transformed to pure Python
190 """
198 """
191 shell = InteractiveShell.instance()
199 shell = InteractiveShell.instance()
192 return shell.input_transformer_manager.transform_cell(code)
200 return shell.input_transformer_manager.transform_cell(code)
193
201
194 def posix_path(path):
202 def posix_path(path):
195 """Turn a path into posix-style path/to/etc
203 """Turn a path into posix-style path/to/etc
196
204
197 Mainly for use in latex on Windows,
205 Mainly for use in latex on Windows,
198 where native Windows paths are not allowed.
206 where native Windows paths are not allowed.
199 """
207 """
200 if os.path.sep != '/':
208 if os.path.sep != '/':
201 return path.replace(os.path.sep, '/')
209 return path.replace(os.path.sep, '/')
202 return path
210 return path
203
211
204 def path2url(path):
212 def path2url(path):
205 """Turn a file path into a URL"""
213 """Turn a file path into a URL"""
206 parts = path.split(os.path.sep)
214 parts = path.split(os.path.sep)
207 return '/'.join(quote(part) for part in parts)
215 return '/'.join(quote(part) for part in parts)
@@ -1,148 +1,153 b''
1 """
1 """
2 Module with tests for Strings
2 Module with tests for Strings
3 """
3 """
4
4
5 #-----------------------------------------------------------------------------
5 #-----------------------------------------------------------------------------
6 # Copyright (c) 2013, the IPython Development Team.
6 # Copyright (c) 2013, the IPython Development Team.
7 #
7 #
8 # Distributed under the terms of the Modified BSD License.
8 # Distributed under the terms of the Modified BSD License.
9 #
9 #
10 # The full license is in the file COPYING.txt, distributed with this software.
10 # The full license is in the file COPYING.txt, distributed with this software.
11 #-----------------------------------------------------------------------------
11 #-----------------------------------------------------------------------------
12
12
13 #-----------------------------------------------------------------------------
13 #-----------------------------------------------------------------------------
14 # Imports
14 # Imports
15 #-----------------------------------------------------------------------------
15 #-----------------------------------------------------------------------------
16 import os
16 import os
17
17
18 from ...tests.base import TestsBase
18 from ...tests.base import TestsBase
19 from ..strings import (wrap_text, html2text, add_anchor, strip_dollars,
19 from ..strings import (wrap_text, html2text, add_anchor, strip_dollars,
20 strip_files_prefix, get_lines, comment_lines, ipython2python, posix_path,
20 strip_files_prefix, get_lines, comment_lines, ipython2python, posix_path,
21 add_prompts
21 add_prompts
22 )
22 )
23
23
24
24
25 #-----------------------------------------------------------------------------
25 #-----------------------------------------------------------------------------
26 # Class
26 # Class
27 #-----------------------------------------------------------------------------
27 #-----------------------------------------------------------------------------
28
28
29 class TestStrings(TestsBase):
29 class TestStrings(TestsBase):
30
30
31 def test_wrap_text(self):
31 def test_wrap_text(self):
32 """wrap_text test"""
32 """wrap_text test"""
33 test_text = """
33 test_text = """
34 Tush! never tell me; I take it much unkindly
34 Tush! never tell me; I take it much unkindly
35 That thou, Iago, who hast had my purse
35 That thou, Iago, who hast had my purse
36 As if the strings were thine, shouldst know of this.
36 As if the strings were thine, shouldst know of this.
37 """
37 """
38 for length in [30,5,1]:
38 for length in [30,5,1]:
39 self._confirm_wrap_text(test_text, length)
39 self._confirm_wrap_text(test_text, length)
40
40
41
41
42 def _confirm_wrap_text(self, text, length):
42 def _confirm_wrap_text(self, text, length):
43 for line in wrap_text(text, length).split('\n'):
43 for line in wrap_text(text, length).split('\n'):
44 assert len(line) <= length
44 assert len(line) <= length
45
45
46
46
47 def test_html2text(self):
47 def test_html2text(self):
48 """html2text test"""
48 """html2text test"""
49 #TODO: More tests
49 #TODO: More tests
50 self.assertEqual(html2text('<name>joe</name>'), 'joe')
50 self.assertEqual(html2text('<name>joe</name>'), 'joe')
51
51
52
52
53 def test_add_anchor(self):
53 def test_add_anchor(self):
54 """add_anchor test"""
54 """add_anchor test"""
55 #TODO: More tests
55 #TODO: More tests
56 results = add_anchor('<b>Hello World!</b>')
56 results = add_anchor('<b>Hello World!</b>')
57 assert 'Hello World!' in results
57 assert 'Hello World!' in results
58 assert 'id="' in results
58 assert 'id="' in results
59 assert 'class="anchor-link"' in results
59 assert 'class="anchor-link"' in results
60 assert '<b' in results
60 assert '<b' in results
61 assert '</b>' in results
61 assert '</b>' in results
62
62
63
63 def test_add_anchor_fail(self):
64 """add_anchor does nothing when it fails"""
65 html = '<h1>Hello <br>World!</h1>'
66 results = add_anchor(html)
67 self.assertEqual(html, results)
68
64 def test_strip_dollars(self):
69 def test_strip_dollars(self):
65 """strip_dollars test"""
70 """strip_dollars test"""
66 tests = [
71 tests = [
67 ('', ''),
72 ('', ''),
68 ('$$', ''),
73 ('$$', ''),
69 ('$H$', 'H'),
74 ('$H$', 'H'),
70 ('$He', 'He'),
75 ('$He', 'He'),
71 ('H$el', 'H$el'),
76 ('H$el', 'H$el'),
72 ('Hell$', 'Hell'),
77 ('Hell$', 'Hell'),
73 ('Hello', 'Hello'),
78 ('Hello', 'Hello'),
74 ('W$o$rld', 'W$o$rld')]
79 ('W$o$rld', 'W$o$rld')]
75 for test in tests:
80 for test in tests:
76 self._try_strip_dollars(test[0], test[1])
81 self._try_strip_dollars(test[0], test[1])
77
82
78
83
79 def _try_strip_dollars(self, test, result):
84 def _try_strip_dollars(self, test, result):
80 self.assertEqual(strip_dollars(test), result)
85 self.assertEqual(strip_dollars(test), result)
81
86
82
87
83 def test_strip_files_prefix(self):
88 def test_strip_files_prefix(self):
84 """strip_files_prefix test"""
89 """strip_files_prefix test"""
85 tests = [
90 tests = [
86 ('', ''),
91 ('', ''),
87 ('/files', '/files'),
92 ('/files', '/files'),
88 ('test="/files"', 'test="/files"'),
93 ('test="/files"', 'test="/files"'),
89 ('My files are in `files/`', 'My files are in `files/`'),
94 ('My files are in `files/`', 'My files are in `files/`'),
90 ('<a href="files/test.html">files/test.html</a>', '<a href="test.html">files/test.html</a>'),
95 ('<a href="files/test.html">files/test.html</a>', '<a href="test.html">files/test.html</a>'),
91 ('<a href="/files/test.html">files/test.html</a>', '<a href="test.html">files/test.html</a>'),
96 ('<a href="/files/test.html">files/test.html</a>', '<a href="test.html">files/test.html</a>'),
92 ("<a href='files/test.html'>files/test.html</a>", "<a href='test.html'>files/test.html</a>"),
97 ("<a href='files/test.html'>files/test.html</a>", "<a href='test.html'>files/test.html</a>"),
93 ('<img src="files/url/location.gif">', '<img src="url/location.gif">'),
98 ('<img src="files/url/location.gif">', '<img src="url/location.gif">'),
94 ('<img src="/files/url/location.gif">', '<img src="url/location.gif">'),
99 ('<img src="/files/url/location.gif">', '<img src="url/location.gif">'),
95 ('hello![caption]', 'hello![caption]'),
100 ('hello![caption]', 'hello![caption]'),
96 ('hello![caption](/url/location.gif)', 'hello![caption](/url/location.gif)'),
101 ('hello![caption](/url/location.gif)', 'hello![caption](/url/location.gif)'),
97 ('hello![caption](url/location.gif)', 'hello![caption](url/location.gif)'),
102 ('hello![caption](url/location.gif)', 'hello![caption](url/location.gif)'),
98 ('hello![caption](url/location.gif)', 'hello![caption](url/location.gif)'),
103 ('hello![caption](url/location.gif)', 'hello![caption](url/location.gif)'),
99 ('hello![caption](files/url/location.gif)', 'hello![caption](url/location.gif)'),
104 ('hello![caption](files/url/location.gif)', 'hello![caption](url/location.gif)'),
100 ('hello![caption](/files/url/location.gif)', 'hello![caption](url/location.gif)'),
105 ('hello![caption](/files/url/location.gif)', 'hello![caption](url/location.gif)'),
101 ('hello [text](/files/url/location.gif)', 'hello [text](url/location.gif)'),
106 ('hello [text](/files/url/location.gif)', 'hello [text](url/location.gif)'),
102 ('hello [text space](files/url/location.gif)', 'hello [text space](url/location.gif)'),
107 ('hello [text space](files/url/location.gif)', 'hello [text space](url/location.gif)'),
103 ]
108 ]
104 for test in tests:
109 for test in tests:
105 self._try_files_prefix(test[0], test[1])
110 self._try_files_prefix(test[0], test[1])
106
111
107
112
108 def _try_files_prefix(self, test, result):
113 def _try_files_prefix(self, test, result):
109 self.assertEqual(strip_files_prefix(test), result)
114 self.assertEqual(strip_files_prefix(test), result)
110
115
111
116
112 def test_comment_lines(self):
117 def test_comment_lines(self):
113 """comment_lines test"""
118 """comment_lines test"""
114 for line in comment_lines('hello\nworld\n!').split('\n'):
119 for line in comment_lines('hello\nworld\n!').split('\n'):
115 assert line.startswith('# ')
120 assert line.startswith('# ')
116 for line in comment_lines('hello\nworld\n!', 'beep').split('\n'):
121 for line in comment_lines('hello\nworld\n!', 'beep').split('\n'):
117 assert line.startswith('beep')
122 assert line.startswith('beep')
118
123
119
124
120 def test_get_lines(self):
125 def test_get_lines(self):
121 """get_lines test"""
126 """get_lines test"""
122 text = "hello\nworld\n!"
127 text = "hello\nworld\n!"
123 self.assertEqual(get_lines(text, start=1), "world\n!")
128 self.assertEqual(get_lines(text, start=1), "world\n!")
124 self.assertEqual(get_lines(text, end=2), "hello\nworld")
129 self.assertEqual(get_lines(text, end=2), "hello\nworld")
125 self.assertEqual(get_lines(text, start=2, end=5), "!")
130 self.assertEqual(get_lines(text, start=2, end=5), "!")
126 self.assertEqual(get_lines(text, start=-2), "world\n!")
131 self.assertEqual(get_lines(text, start=-2), "world\n!")
127
132
128
133
129 def test_ipython2python(self):
134 def test_ipython2python(self):
130 """ipython2python test"""
135 """ipython2python test"""
131 #TODO: More tests
136 #TODO: More tests
132 results = ipython2python(u'%%pylab\nprint("Hello-World")').replace("u'", "'")
137 results = ipython2python(u'%%pylab\nprint("Hello-World")').replace("u'", "'")
133 self.fuzzy_compare(results, u"get_ipython().run_cell_magic('pylab', '', 'print(\"Hello-World\")')",
138 self.fuzzy_compare(results, u"get_ipython().run_cell_magic('pylab', '', 'print(\"Hello-World\")')",
134 ignore_spaces=True, ignore_newlines=True)
139 ignore_spaces=True, ignore_newlines=True)
135
140
136 def test_posix_path(self):
141 def test_posix_path(self):
137 """posix_path test"""
142 """posix_path test"""
138 path_list = ['foo', 'bar']
143 path_list = ['foo', 'bar']
139 expected = '/'.join(path_list)
144 expected = '/'.join(path_list)
140 native = os.path.join(*path_list)
145 native = os.path.join(*path_list)
141 filtered = posix_path(native)
146 filtered = posix_path(native)
142 self.assertEqual(filtered, expected)
147 self.assertEqual(filtered, expected)
143
148
144 def test_add_prompts(self):
149 def test_add_prompts(self):
145 """add_prompts test"""
150 """add_prompts test"""
146 text1 = """for i in range(10):\n i += 1\n print i"""
151 text1 = """for i in range(10):\n i += 1\n print i"""
147 text2 = """>>> for i in range(10):\n... i += 1\n... print i"""
152 text2 = """>>> for i in range(10):\n... i += 1\n... print i"""
148 self.assertEqual(text2, add_prompts(text1))
153 self.assertEqual(text2, add_prompts(text1))
General Comments 0
You need to be logged in to leave comments. Login now