##// END OF EJS Templates
Backport PR #4092: nbconvert: Fix for unicode html headers, Windows + Python 2.x...
MinRK -
Show More
@@ -1,183 +1,183 b''
1 # coding: utf-8
1 # coding: utf-8
2 """String filters.
2 """String filters.
3
3
4 Contains a collection of useful string manipulation filters for use in Jinja
4 Contains a collection of useful string manipulation filters for use in Jinja
5 templates.
5 templates.
6 """
6 """
7 #-----------------------------------------------------------------------------
7 #-----------------------------------------------------------------------------
8 # Copyright (c) 2013, the IPython Development Team.
8 # Copyright (c) 2013, the IPython Development Team.
9 #
9 #
10 # Distributed under the terms of the Modified BSD License.
10 # Distributed under the terms of the Modified BSD License.
11 #
11 #
12 # The full license is in the file COPYING.txt, distributed with this software.
12 # The full license is in the file COPYING.txt, distributed with this software.
13 #-----------------------------------------------------------------------------
13 #-----------------------------------------------------------------------------
14
14
15 #-----------------------------------------------------------------------------
15 #-----------------------------------------------------------------------------
16 # Imports
16 # Imports
17 #-----------------------------------------------------------------------------
17 #-----------------------------------------------------------------------------
18
18
19 import os
19 import os
20 import re
20 import re
21 import textwrap
21 import textwrap
22 from xml.etree import ElementTree
22 from xml.etree import ElementTree
23
23
24 from IPython.core.interactiveshell import InteractiveShell
24 from IPython.core.interactiveshell import InteractiveShell
25 from IPython.utils import py3compat
25 from IPython.utils import py3compat
26
26
27 #-----------------------------------------------------------------------------
27 #-----------------------------------------------------------------------------
28 # Functions
28 # Functions
29 #-----------------------------------------------------------------------------
29 #-----------------------------------------------------------------------------
30
30
31 __all__ = [
31 __all__ = [
32 'wrap_text',
32 'wrap_text',
33 'html2text',
33 'html2text',
34 'add_anchor',
34 'add_anchor',
35 'strip_dollars',
35 'strip_dollars',
36 'strip_files_prefix',
36 'strip_files_prefix',
37 'comment_lines',
37 'comment_lines',
38 'get_lines',
38 'get_lines',
39 'ipython2python',
39 'ipython2python',
40 'posix_path',
40 'posix_path',
41 ]
41 ]
42
42
43
43
44 def wrap_text(text, width=100):
44 def wrap_text(text, width=100):
45 """
45 """
46 Intelligently wrap text.
46 Intelligently wrap text.
47 Wrap text without breaking words if possible.
47 Wrap text without breaking words if possible.
48
48
49 Parameters
49 Parameters
50 ----------
50 ----------
51 text : str
51 text : str
52 Text to wrap.
52 Text to wrap.
53 width : int, optional
53 width : int, optional
54 Number of characters to wrap to, default 100.
54 Number of characters to wrap to, default 100.
55 """
55 """
56
56
57 split_text = text.split('\n')
57 split_text = text.split('\n')
58 wrp = map(lambda x:textwrap.wrap(x,width), split_text)
58 wrp = map(lambda x:textwrap.wrap(x,width), split_text)
59 wrpd = map('\n'.join, wrp)
59 wrpd = map('\n'.join, wrp)
60 return '\n'.join(wrpd)
60 return '\n'.join(wrpd)
61
61
62
62
63 def html2text(element):
63 def html2text(element):
64 """extract inner text from html
64 """extract inner text from html
65
65
66 Analog of jQuery's $(element).text()
66 Analog of jQuery's $(element).text()
67 """
67 """
68 if isinstance(element, py3compat.string_types):
68 if isinstance(element, py3compat.string_types):
69 element = ElementTree.fromstring(element)
69 element = ElementTree.fromstring(element)
70
70
71 text = element.text or ""
71 text = element.text or ""
72 for child in element:
72 for child in element:
73 text += html2text(child)
73 text += html2text(child)
74 text += (element.tail or "")
74 text += (element.tail or "")
75 return text
75 return text
76
76
77
77
78 def add_anchor(html):
78 def add_anchor(html):
79 """Add an anchor-link to an html header tag
79 """Add an anchor-link to an html header tag
80
80
81 For use in heading cells
81 For use in heading cells
82 """
82 """
83 h = ElementTree.fromstring(py3compat.cast_bytes_py2(html))
83 h = ElementTree.fromstring(py3compat.cast_bytes_py2(html, encoding='utf-8'))
84 link = html2text(h).replace(' ', '-')
84 link = html2text(h).replace(' ', '-')
85 h.set('id', link)
85 h.set('id', link)
86 a = ElementTree.Element("a", {"class" : "anchor-link", "href" : "#" + link})
86 a = ElementTree.Element("a", {"class" : "anchor-link", "href" : "#" + link})
87 a.text = u'ΒΆ'
87 a.text = u'ΒΆ'
88 h.append(a)
88 h.append(a)
89
89
90 # Known issue of Python3.x, ElementTree.tostring() returns a byte string
90 # Known issue of Python3.x, ElementTree.tostring() returns a byte string
91 # instead of a text string. See issue http://bugs.python.org/issue10942
91 # instead of a text string. See issue http://bugs.python.org/issue10942
92 # Workaround is to make sure the bytes are casted to a string.
92 # Workaround is to make sure the bytes are casted to a string.
93 return py3compat.decode(ElementTree.tostring(h), 'utf-8')
93 return py3compat.decode(ElementTree.tostring(h), 'utf-8')
94
94
95
95
96 def strip_dollars(text):
96 def strip_dollars(text):
97 """
97 """
98 Remove all dollar symbols from text
98 Remove all dollar symbols from text
99
99
100 Parameters
100 Parameters
101 ----------
101 ----------
102 text : str
102 text : str
103 Text to remove dollars from
103 Text to remove dollars from
104 """
104 """
105
105
106 return text.strip('$')
106 return text.strip('$')
107
107
108
108
109 files_url_pattern = re.compile(r'(src|href)\=([\'"]?)files/')
109 files_url_pattern = re.compile(r'(src|href)\=([\'"]?)files/')
110
110
111 def strip_files_prefix(text):
111 def strip_files_prefix(text):
112 """
112 """
113 Fix all fake URLs that start with `files/`,
113 Fix all fake URLs that start with `files/`,
114 stripping out the `files/` prefix.
114 stripping out the `files/` prefix.
115
115
116 Parameters
116 Parameters
117 ----------
117 ----------
118 text : str
118 text : str
119 Text in which to replace 'src="files/real...' with 'src="real...'
119 Text in which to replace 'src="files/real...' with 'src="real...'
120 """
120 """
121 return files_url_pattern.sub(r"\1=\2", text)
121 return files_url_pattern.sub(r"\1=\2", text)
122
122
123
123
124 def comment_lines(text, prefix='# '):
124 def comment_lines(text, prefix='# '):
125 """
125 """
126 Build a Python comment line from input text.
126 Build a Python comment line from input text.
127
127
128 Parameters
128 Parameters
129 ----------
129 ----------
130 text : str
130 text : str
131 Text to comment out.
131 Text to comment out.
132 prefix : str
132 prefix : str
133 Character to append to the start of each line.
133 Character to append to the start of each line.
134 """
134 """
135
135
136 #Replace line breaks with line breaks and comment symbols.
136 #Replace line breaks with line breaks and comment symbols.
137 #Also add a comment symbol at the beginning to comment out
137 #Also add a comment symbol at the beginning to comment out
138 #the first line.
138 #the first line.
139 return prefix + ('\n'+prefix).join(text.split('\n'))
139 return prefix + ('\n'+prefix).join(text.split('\n'))
140
140
141
141
142 def get_lines(text, start=None,end=None):
142 def get_lines(text, start=None,end=None):
143 """
143 """
144 Split the input text into separate lines and then return the
144 Split the input text into separate lines and then return the
145 lines that the caller is interested in.
145 lines that the caller is interested in.
146
146
147 Parameters
147 Parameters
148 ----------
148 ----------
149 text : str
149 text : str
150 Text to parse lines from.
150 Text to parse lines from.
151 start : int, optional
151 start : int, optional
152 First line to grab from.
152 First line to grab from.
153 end : int, optional
153 end : int, optional
154 Last line to grab from.
154 Last line to grab from.
155 """
155 """
156
156
157 # Split the input into lines.
157 # Split the input into lines.
158 lines = text.split("\n")
158 lines = text.split("\n")
159
159
160 # Return the right lines.
160 # Return the right lines.
161 return "\n".join(lines[start:end]) #re-join
161 return "\n".join(lines[start:end]) #re-join
162
162
163 def ipython2python(code):
163 def ipython2python(code):
164 """Transform IPython syntax to pure Python syntax
164 """Transform IPython syntax to pure Python syntax
165
165
166 Parameters
166 Parameters
167 ----------
167 ----------
168
168
169 code : str
169 code : str
170 IPython code, to be transformed to pure Python
170 IPython code, to be transformed to pure Python
171 """
171 """
172 shell = InteractiveShell.instance()
172 shell = InteractiveShell.instance()
173 return shell.input_transformer_manager.transform_cell(code)
173 return shell.input_transformer_manager.transform_cell(code)
174
174
175 def posix_path(path):
175 def posix_path(path):
176 """Turn a path into posix-style path/to/etc
176 """Turn a path into posix-style path/to/etc
177
177
178 Mainly for use in latex on Windows,
178 Mainly for use in latex on Windows,
179 where native Windows paths are not allowed.
179 where native Windows paths are not allowed.
180 """
180 """
181 if os.path.sep != '/':
181 if os.path.sep != '/':
182 return path.replace(os.path.sep, '/')
182 return path.replace(os.path.sep, '/')
183 return path
183 return path
General Comments 0
You need to be logged in to leave comments. Login now