##// END OF EJS Templates
add ascii_only filter
MinRK -
Show More
@@ -1,322 +1,323 b''
1 1 """This module defines TemplateExporter, a highly configurable converter
2 2 that uses Jinja2 to export notebook files into different formats.
3 3 """
4 4
5 5 #-----------------------------------------------------------------------------
6 6 # Copyright (c) 2013, the IPython Development Team.
7 7 #
8 8 # Distributed under the terms of the Modified BSD License.
9 9 #
10 10 # The full license is in the file COPYING.txt, distributed with this software.
11 11 #-----------------------------------------------------------------------------
12 12
13 13 #-----------------------------------------------------------------------------
14 14 # Imports
15 15 #-----------------------------------------------------------------------------
16 16
17 17 from __future__ import print_function, absolute_import
18 18
19 19 # Stdlib imports
20 20 import os
21 21
22 22 # other libs/dependencies are imported at runtime
23 23 # to move ImportErrors to runtime when the requirement is actually needed
24 24
25 25 # IPython imports
26 26 from IPython.utils.traitlets import MetaHasTraits, Unicode, List, Dict, Any
27 27 from IPython.utils.importstring import import_item
28 28 from IPython.utils import py3compat, text
29 29
30 30 from IPython.nbformat.current import docstring_nbformat_mod
31 31 from IPython.nbconvert import filters
32 32 from .exporter import Exporter
33 33
34 34 #-----------------------------------------------------------------------------
35 35 # Globals and constants
36 36 #-----------------------------------------------------------------------------
37 37
38 38 #Jinja2 extensions to load.
39 39 JINJA_EXTENSIONS = ['jinja2.ext.loopcontrols']
40 40
41 41 default_filters = {
42 42 'indent': text.indent,
43 43 'markdown2html': filters.markdown2html,
44 44 'ansi2html': filters.ansi2html,
45 45 'filter_data_type': filters.DataTypeFilter,
46 46 'get_lines': filters.get_lines,
47 47 'highlight2html': filters.Highlight2Html,
48 48 'highlight2latex': filters.Highlight2Latex,
49 49 'ipython2python': filters.ipython2python,
50 50 'posix_path': filters.posix_path,
51 51 'markdown2latex': filters.markdown2latex,
52 52 'markdown2rst': filters.markdown2rst,
53 53 'comment_lines': filters.comment_lines,
54 54 'strip_ansi': filters.strip_ansi,
55 55 'strip_dollars': filters.strip_dollars,
56 56 'strip_files_prefix': filters.strip_files_prefix,
57 57 'html2text' : filters.html2text,
58 58 'add_anchor': filters.add_anchor,
59 59 'ansi2latex': filters.ansi2latex,
60 60 'wrap_text': filters.wrap_text,
61 61 'escape_latex': filters.escape_latex,
62 62 'citation2latex': filters.citation2latex,
63 63 'path2url': filters.path2url,
64 64 'add_prompts': filters.add_prompts,
65 'ascii_only': filters.ascii_only,
65 66 }
66 67
67 68 #-----------------------------------------------------------------------------
68 69 # Class
69 70 #-----------------------------------------------------------------------------
70 71
71 72 class TemplateExporter(Exporter):
72 73 """
73 74 Exports notebooks into other file formats. Uses Jinja 2 templating engine
74 75 to output new formats. Inherit from this class if you are creating a new
75 76 template type along with new filters/preprocessors. If the filters/
76 77 preprocessors provided by default suffice, there is no need to inherit from
77 78 this class. Instead, override the template_file and file_extension
78 79 traits via a config file.
79 80
80 81 {filters}
81 82 """
82 83
83 84 # finish the docstring
84 85 __doc__ = __doc__.format(filters = '- '+'\n - '.join(default_filters.keys()))
85 86
86 87
87 88 template_file = Unicode(u'default',
88 89 config=True,
89 90 help="Name of the template file to use")
90 91 def _template_file_changed(self, name, old, new):
91 92 if new == 'default':
92 93 self.template_file = self.default_template
93 94 else:
94 95 self.template_file = new
95 96 self.template = None
96 97 self._load_template()
97 98
98 99 default_template = Unicode(u'')
99 100 template = Any()
100 101 environment = Any()
101 102
102 103 template_path = List(['.'], config=True)
103 104 def _template_path_changed(self, name, old, new):
104 105 self._load_template()
105 106
106 107 default_template_path = Unicode(
107 108 os.path.join("..", "templates"),
108 109 help="Path where the template files are located.")
109 110
110 111 template_skeleton_path = Unicode(
111 112 os.path.join("..", "templates", "skeleton"),
112 113 help="Path where the template skeleton files are located.")
113 114
114 115 #Jinja block definitions
115 116 jinja_comment_block_start = Unicode("", config=True)
116 117 jinja_comment_block_end = Unicode("", config=True)
117 118 jinja_variable_block_start = Unicode("", config=True)
118 119 jinja_variable_block_end = Unicode("", config=True)
119 120 jinja_logic_block_start = Unicode("", config=True)
120 121 jinja_logic_block_end = Unicode("", config=True)
121 122
122 123 #Extension that the template files use.
123 124 template_extension = Unicode(".tpl", config=True)
124 125
125 126 filters = Dict(config=True,
126 127 help="""Dictionary of filters, by name and namespace, to add to the Jinja
127 128 environment.""")
128 129
129 130 raw_mimetypes = List(config=True,
130 131 help="""formats of raw cells to be included in this Exporter's output."""
131 132 )
132 133 def _raw_mimetypes_default(self):
133 134 return [self.output_mimetype, '']
134 135
135 136
136 137 def __init__(self, config=None, extra_loaders=None, **kw):
137 138 """
138 139 Public constructor
139 140
140 141 Parameters
141 142 ----------
142 143 config : config
143 144 User configuration instance.
144 145 extra_loaders : list[of Jinja Loaders]
145 146 ordered list of Jinja loader to find templates. Will be tried in order
146 147 before the default FileSystem ones.
147 148 template : str (optional, kw arg)
148 149 Template to use when exporting.
149 150 """
150 151 super(TemplateExporter, self).__init__(config=config, **kw)
151 152
152 153 #Init
153 154 self._init_template()
154 155 self._init_environment(extra_loaders=extra_loaders)
155 156 self._init_preprocessors()
156 157 self._init_filters()
157 158
158 159
159 160 def _load_template(self):
160 161 """Load the Jinja template object from the template file
161 162
162 163 This is a no-op if the template attribute is already defined,
163 164 or the Jinja environment is not setup yet.
164 165
165 166 This is triggered by various trait changes that would change the template.
166 167 """
167 168 from jinja2 import TemplateNotFound
168 169
169 170 if self.template is not None:
170 171 return
171 172 # called too early, do nothing
172 173 if self.environment is None:
173 174 return
174 175 # Try different template names during conversion. First try to load the
175 176 # template by name with extension added, then try loading the template
176 177 # as if the name is explicitly specified, then try the name as a
177 178 # 'flavor', and lastly just try to load the template by module name.
178 179 try_names = []
179 180 if self.template_file:
180 181 try_names.extend([
181 182 self.template_file + self.template_extension,
182 183 self.template_file,
183 184 ])
184 185 for try_name in try_names:
185 186 self.log.debug("Attempting to load template %s", try_name)
186 187 try:
187 188 self.template = self.environment.get_template(try_name)
188 189 except (TemplateNotFound, IOError):
189 190 pass
190 191 except Exception as e:
191 192 self.log.warn("Unexpected exception loading template: %s", try_name, exc_info=True)
192 193 else:
193 194 self.log.info("Loaded template %s", try_name)
194 195 break
195 196
196 197 @docstring_nbformat_mod
197 198 def from_notebook_node(self, nb, resources=None, **kw):
198 199 """
199 200 Convert a notebook from a notebook node instance.
200 201
201 202 Parameters
202 203 ----------
203 204 nb : :class:`~{nbformat_mod}.nbbase.NotebookNode`
204 205 Notebook node
205 206 resources : dict
206 207 Additional resources that can be accessed read/write by
207 208 preprocessors and filters.
208 209 """
209 210 nb_copy, resources = super(TemplateExporter, self).from_notebook_node(nb, resources, **kw)
210 211 resources.setdefault('raw_mimetypes', self.raw_mimetypes)
211 212
212 213 self._load_template()
213 214
214 215 if self.template is not None:
215 216 output = self.template.render(nb=nb_copy, resources=resources)
216 217 else:
217 218 raise IOError('template file "%s" could not be found' % self.template_file)
218 219 return output, resources
219 220
220 221
221 222 def register_filter(self, name, jinja_filter):
222 223 """
223 224 Register a filter.
224 225 A filter is a function that accepts and acts on one string.
225 226 The filters are accesible within the Jinja templating engine.
226 227
227 228 Parameters
228 229 ----------
229 230 name : str
230 231 name to give the filter in the Jinja engine
231 232 filter : filter
232 233 """
233 234 if jinja_filter is None:
234 235 raise TypeError('filter')
235 236 isclass = isinstance(jinja_filter, type)
236 237 constructed = not isclass
237 238
238 239 #Handle filter's registration based on it's type
239 240 if constructed and isinstance(jinja_filter, py3compat.string_types):
240 241 #filter is a string, import the namespace and recursively call
241 242 #this register_filter method
242 243 filter_cls = import_item(jinja_filter)
243 244 return self.register_filter(name, filter_cls)
244 245
245 246 if constructed and hasattr(jinja_filter, '__call__'):
246 247 #filter is a function, no need to construct it.
247 248 self.environment.filters[name] = jinja_filter
248 249 return jinja_filter
249 250
250 251 elif isclass and isinstance(jinja_filter, MetaHasTraits):
251 252 #filter is configurable. Make sure to pass in new default for
252 253 #the enabled flag if one was specified.
253 254 filter_instance = jinja_filter(parent=self)
254 255 self.register_filter(name, filter_instance )
255 256
256 257 elif isclass:
257 258 #filter is not configurable, construct it
258 259 filter_instance = jinja_filter()
259 260 self.register_filter(name, filter_instance)
260 261
261 262 else:
262 263 #filter is an instance of something without a __call__
263 264 #attribute.
264 265 raise TypeError('filter')
265 266
266 267
267 268 def _init_template(self):
268 269 """
269 270 Make sure a template name is specified. If one isn't specified, try to
270 271 build one from the information we know.
271 272 """
272 273 self._template_file_changed('template_file', self.template_file, self.template_file)
273 274
274 275
275 276 def _init_environment(self, extra_loaders=None):
276 277 """
277 278 Create the Jinja templating environment.
278 279 """
279 280 from jinja2 import Environment, ChoiceLoader, FileSystemLoader
280 281 here = os.path.dirname(os.path.realpath(__file__))
281 282 loaders = []
282 283 if extra_loaders:
283 284 loaders.extend(extra_loaders)
284 285
285 286 paths = self.template_path
286 287 paths.extend([os.path.join(here, self.default_template_path),
287 288 os.path.join(here, self.template_skeleton_path)])
288 289 loaders.append(FileSystemLoader(paths))
289 290
290 291 self.environment = Environment(
291 292 loader= ChoiceLoader(loaders),
292 293 extensions=JINJA_EXTENSIONS
293 294 )
294 295
295 296 #Set special Jinja2 syntax that will not conflict with latex.
296 297 if self.jinja_logic_block_start:
297 298 self.environment.block_start_string = self.jinja_logic_block_start
298 299 if self.jinja_logic_block_end:
299 300 self.environment.block_end_string = self.jinja_logic_block_end
300 301 if self.jinja_variable_block_start:
301 302 self.environment.variable_start_string = self.jinja_variable_block_start
302 303 if self.jinja_variable_block_end:
303 304 self.environment.variable_end_string = self.jinja_variable_block_end
304 305 if self.jinja_comment_block_start:
305 306 self.environment.comment_start_string = self.jinja_comment_block_start
306 307 if self.jinja_comment_block_end:
307 308 self.environment.comment_end_string = self.jinja_comment_block_end
308 309
309 310
310 311 def _init_filters(self):
311 312 """
312 313 Register all of the filters required for the exporter.
313 314 """
314 315
315 316 #Add default filters to the Jinja2 environment
316 317 for key, value in default_filters.items():
317 318 self.register_filter(key, value)
318 319
319 320 #Load user filters. Overwrite existing filters if need be.
320 321 if self.filters:
321 322 for key, user_filter in self.filters.items():
322 323 self.register_filter(key, user_filter)
@@ -1,215 +1,221 b''
1 1 # coding: utf-8
2 2 """String filters.
3 3
4 4 Contains a collection of useful string manipulation filters for use in Jinja
5 5 templates.
6 6 """
7 7 #-----------------------------------------------------------------------------
8 8 # Copyright (c) 2013, the IPython Development Team.
9 9 #
10 10 # Distributed under the terms of the Modified BSD License.
11 11 #
12 12 # The full license is in the file COPYING.txt, distributed with this software.
13 13 #-----------------------------------------------------------------------------
14 14
15 15 #-----------------------------------------------------------------------------
16 16 # Imports
17 17 #-----------------------------------------------------------------------------
18 18
19 19 import os
20 20 import re
21 21 import textwrap
22 22 try:
23 23 from urllib.parse import quote # Py 3
24 24 except ImportError:
25 25 from urllib2 import quote # Py 2
26 26 from xml.etree import ElementTree
27 27
28 28 from IPython.core.interactiveshell import InteractiveShell
29 29 from IPython.utils import py3compat
30 30
31 31 #-----------------------------------------------------------------------------
32 32 # Functions
33 33 #-----------------------------------------------------------------------------
34 34
35 35 __all__ = [
36 36 'wrap_text',
37 37 'html2text',
38 38 'add_anchor',
39 39 'strip_dollars',
40 40 'strip_files_prefix',
41 41 'comment_lines',
42 42 'get_lines',
43 43 'ipython2python',
44 44 'posix_path',
45 45 'path2url',
46 'add_prompts'
46 'add_prompts',
47 'ascii_only',
47 48 ]
48 49
49 50
50 51 def wrap_text(text, width=100):
51 52 """
52 53 Intelligently wrap text.
53 54 Wrap text without breaking words if possible.
54 55
55 56 Parameters
56 57 ----------
57 58 text : str
58 59 Text to wrap.
59 60 width : int, optional
60 61 Number of characters to wrap to, default 100.
61 62 """
62 63
63 64 split_text = text.split('\n')
64 65 wrp = map(lambda x:textwrap.wrap(x,width), split_text)
65 66 wrpd = map('\n'.join, wrp)
66 67 return '\n'.join(wrpd)
67 68
68 69
69 70 def html2text(element):
70 71 """extract inner text from html
71 72
72 73 Analog of jQuery's $(element).text()
73 74 """
74 75 if isinstance(element, py3compat.string_types):
75 76 try:
76 77 element = ElementTree.fromstring(element)
77 78 except Exception:
78 79 # failed to parse, just return it unmodified
79 80 return element
80 81
81 82 text = element.text or ""
82 83 for child in element:
83 84 text += html2text(child)
84 85 text += (element.tail or "")
85 86 return text
86 87
87 88
88 89 def add_anchor(html):
89 90 """Add an anchor-link to an html header tag
90 91
91 92 For use in heading cells
92 93 """
93 94 try:
94 95 h = ElementTree.fromstring(py3compat.cast_bytes_py2(html, encoding='utf-8'))
95 96 except Exception:
96 97 # failed to parse, just return it unmodified
97 98 return html
98 99 link = html2text(h).replace(' ', '-')
99 100 h.set('id', link)
100 101 a = ElementTree.Element("a", {"class" : "anchor-link", "href" : "#" + link})
101 102 a.text = u'ΒΆ'
102 103 h.append(a)
103 104
104 105 # Known issue of Python3.x, ElementTree.tostring() returns a byte string
105 106 # instead of a text string. See issue http://bugs.python.org/issue10942
106 107 # Workaround is to make sure the bytes are casted to a string.
107 108 return py3compat.decode(ElementTree.tostring(h), 'utf-8')
108 109
109 110
110 111 def add_prompts(code, first='>>> ', cont='... '):
111 112 """Add prompts to code snippets"""
112 113 new_code = []
113 114 code_list = code.split('\n')
114 115 new_code.append(first + code_list[0])
115 116 for line in code_list[1:]:
116 117 new_code.append(cont + line)
117 118 return '\n'.join(new_code)
118 119
119 120
120 121 def strip_dollars(text):
121 122 """
122 123 Remove all dollar symbols from text
123 124
124 125 Parameters
125 126 ----------
126 127 text : str
127 128 Text to remove dollars from
128 129 """
129 130
130 131 return text.strip('$')
131 132
132 133
133 134 files_url_pattern = re.compile(r'(src|href)\=([\'"]?)/?files/')
134 135 markdown_url_pattern = re.compile(r'(!?)\[(?P<caption>.*?)\]\(/?files/(?P<location>.*?)\)')
135 136
136 137 def strip_files_prefix(text):
137 138 """
138 139 Fix all fake URLs that start with `files/`, stripping out the `files/` prefix.
139 140 Applies to both urls (for html) and relative paths (for markdown paths).
140 141
141 142 Parameters
142 143 ----------
143 144 text : str
144 145 Text in which to replace 'src="files/real...' with 'src="real...'
145 146 """
146 147 cleaned_text = files_url_pattern.sub(r"\1=\2", text)
147 148 cleaned_text = markdown_url_pattern.sub(r'\1[\2](\3)', cleaned_text)
148 149 return cleaned_text
149 150
150 151
151 152 def comment_lines(text, prefix='# '):
152 153 """
153 154 Build a Python comment line from input text.
154 155
155 156 Parameters
156 157 ----------
157 158 text : str
158 159 Text to comment out.
159 160 prefix : str
160 161 Character to append to the start of each line.
161 162 """
162 163
163 164 #Replace line breaks with line breaks and comment symbols.
164 165 #Also add a comment symbol at the beginning to comment out
165 166 #the first line.
166 167 return prefix + ('\n'+prefix).join(text.split('\n'))
167 168
168 169
169 170 def get_lines(text, start=None,end=None):
170 171 """
171 172 Split the input text into separate lines and then return the
172 173 lines that the caller is interested in.
173 174
174 175 Parameters
175 176 ----------
176 177 text : str
177 178 Text to parse lines from.
178 179 start : int, optional
179 180 First line to grab from.
180 181 end : int, optional
181 182 Last line to grab from.
182 183 """
183 184
184 185 # Split the input into lines.
185 186 lines = text.split("\n")
186 187
187 188 # Return the right lines.
188 189 return "\n".join(lines[start:end]) #re-join
189 190
190 191 def ipython2python(code):
191 192 """Transform IPython syntax to pure Python syntax
192 193
193 194 Parameters
194 195 ----------
195 196
196 197 code : str
197 198 IPython code, to be transformed to pure Python
198 199 """
199 200 shell = InteractiveShell.instance()
200 201 return shell.input_transformer_manager.transform_cell(code)
201 202
202 203 def posix_path(path):
203 204 """Turn a path into posix-style path/to/etc
204 205
205 206 Mainly for use in latex on Windows,
206 207 where native Windows paths are not allowed.
207 208 """
208 209 if os.path.sep != '/':
209 210 return path.replace(os.path.sep, '/')
210 211 return path
211 212
212 213 def path2url(path):
213 214 """Turn a file path into a URL"""
214 215 parts = path.split(os.path.sep)
215 216 return '/'.join(quote(part) for part in parts)
217
218 def ascii_only(s):
219 """ensure a string is ascii"""
220 s = py3compat.cast_unicode(s)
221 return s.encode('ascii', 'replace').decode('ascii') No newline at end of file
General Comments 0
You need to be logged in to leave comments. Login now