##// END OF EJS Templates
Give highlight_source a proper boolean default.
David Warde-Farley -
Show More
@@ -1,389 +1,389 b''
1 """Base classes for the notebook conversion pipeline.
1 """Base classes for the notebook conversion pipeline.
2
2
3 This module defines Converter, from which all objects designed to implement
3 This module defines Converter, from which all objects designed to implement
4 a conversion of IPython notebooks to some other format should inherit.
4 a conversion of IPython notebooks to some other format should inherit.
5 """
5 """
6 #-----------------------------------------------------------------------------
6 #-----------------------------------------------------------------------------
7 # Copyright (c) 2012, the IPython Development Team.
7 # Copyright (c) 2012, the IPython Development Team.
8 #
8 #
9 # Distributed under the terms of the Modified BSD License.
9 # Distributed under the terms of the Modified BSD License.
10 #
10 #
11 # The full license is in the file COPYING.txt, distributed with this software.
11 # The full license is in the file COPYING.txt, distributed with this software.
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13
13
14 #-----------------------------------------------------------------------------
14 #-----------------------------------------------------------------------------
15 # Imports
15 # Imports
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17
17
18 from __future__ import print_function, absolute_import
18 from __future__ import print_function, absolute_import
19
19
20 # Stdlib imports
20 # Stdlib imports
21 import codecs
21 import codecs
22 import io
22 import io
23 import logging
23 import logging
24 import os
24 import os
25 import pprint
25 import pprint
26 import re
26 import re
27 from types import FunctionType
27 from types import FunctionType
28
28
29 # IPython imports
29 # IPython imports
30 from IPython.nbformat import current as nbformat
30 from IPython.nbformat import current as nbformat
31
31
32 # Our own imports
32 # Our own imports
33 from converters.utils import remove_fake_files_url
33 from converters.utils import remove_fake_files_url
34
34
35
35
36 #-----------------------------------------------------------------------------
36 #-----------------------------------------------------------------------------
37 # Local utilities
37 # Local utilities
38 #-----------------------------------------------------------------------------
38 #-----------------------------------------------------------------------------
39
39
40 def clean_filename(filename):
40 def clean_filename(filename):
41 """
41 """
42 Remove non-alphanumeric characters from filenames.
42 Remove non-alphanumeric characters from filenames.
43
43
44 Parameters
44 Parameters
45 ----------
45 ----------
46 filename : str
46 filename : str
47 The filename to be sanitized.
47 The filename to be sanitized.
48
48
49 Returns
49 Returns
50 -------
50 -------
51 clean : str
51 clean : str
52 A sanitized filename that contains only alphanumeric
52 A sanitized filename that contains only alphanumeric
53 characters and underscores.
53 characters and underscores.
54 """
54 """
55 filename = re.sub(r'[^a-zA-Z0-9_]', '_', filename)
55 filename = re.sub(r'[^a-zA-Z0-9_]', '_', filename)
56 return filename
56 return filename
57
57
58
58
59 #-----------------------------------------------------------------------------
59 #-----------------------------------------------------------------------------
60 # Class declarations
60 # Class declarations
61 #-----------------------------------------------------------------------------
61 #-----------------------------------------------------------------------------
62
62
63 class ConversionException(Exception):
63 class ConversionException(Exception):
64 pass
64 pass
65
65
66
66
67 class DocStringInheritor(type):
67 class DocStringInheritor(type):
68 """
68 """
69 This metaclass will walk the list of bases until the desired
69 This metaclass will walk the list of bases until the desired
70 superclass method is found AND if that method has a docstring and only
70 superclass method is found AND if that method has a docstring and only
71 THEN does it attach the superdocstring to the derived class method.
71 THEN does it attach the superdocstring to the derived class method.
72
72
73 Please use carefully, I just did the metaclass thing by following
73 Please use carefully, I just did the metaclass thing by following
74 Michael Foord's Metaclass tutorial
74 Michael Foord's Metaclass tutorial
75 (http://www.voidspace.org.uk/python/articles/metaclasses.shtml), I may
75 (http://www.voidspace.org.uk/python/articles/metaclasses.shtml), I may
76 have missed a step or two.
76 have missed a step or two.
77
77
78 source:
78 source:
79 http://groups.google.com/group/comp.lang.python/msg/26f7b4fcb4d66c95
79 http://groups.google.com/group/comp.lang.python/msg/26f7b4fcb4d66c95
80 by Paul McGuire
80 by Paul McGuire
81 """
81 """
82 def __new__(meta, classname, bases, classDict):
82 def __new__(meta, classname, bases, classDict):
83 newClassDict = {}
83 newClassDict = {}
84 for attributeName, attribute in classDict.items():
84 for attributeName, attribute in classDict.items():
85 if type(attribute) == FunctionType:
85 if type(attribute) == FunctionType:
86 # look through bases for matching function by name
86 # look through bases for matching function by name
87 for baseclass in bases:
87 for baseclass in bases:
88 if hasattr(baseclass, attributeName):
88 if hasattr(baseclass, attributeName):
89 basefn = getattr(baseclass, attributeName)
89 basefn = getattr(baseclass, attributeName)
90 if basefn.__doc__:
90 if basefn.__doc__:
91 attribute.__doc__ = basefn.__doc__
91 attribute.__doc__ = basefn.__doc__
92 break
92 break
93 newClassDict[attributeName] = attribute
93 newClassDict[attributeName] = attribute
94 return type.__new__(meta, classname, bases, newClassDict)
94 return type.__new__(meta, classname, bases, newClassDict)
95
95
96
96
97 class Converter(object):
97 class Converter(object):
98 __metaclass__ = DocStringInheritor
98 __metaclass__ = DocStringInheritor
99 default_encoding = 'utf-8'
99 default_encoding = 'utf-8'
100 extension = str()
100 extension = str()
101 figures_counter = 0
101 figures_counter = 0
102 infile = str()
102 infile = str()
103 highlight_source = None
103 highlight_source = True
104 infile_dir = str()
104 infile_dir = str()
105 infile_root = str()
105 infile_root = str()
106 files_dir = str()
106 files_dir = str()
107 with_preamble = True
107 with_preamble = True
108 user_preamble = None
108 user_preamble = None
109 output = unicode()
109 output = unicode()
110 raw_as_verbatim = False
110 raw_as_verbatim = False
111 blank_symbol = " "
111 blank_symbol = " "
112 # Which display data format is best? Subclasses can override if
112 # Which display data format is best? Subclasses can override if
113 # they have specific requirements.
113 # they have specific requirements.
114 display_data_priority = ['pdf', 'svg', 'png', 'jpg', 'text']
114 display_data_priority = ['pdf', 'svg', 'png', 'jpg', 'text']
115
115
116 def __init__(self, infile, highlight_source=True):
116 def __init__(self, infile, highlight_source=True):
117 self.infile = infile
117 self.infile = infile
118 self.highlight_source = highlight_source
118 self.highlight_source = highlight_source
119 self.infile_dir, infile_root = os.path.split(infile)
119 self.infile_dir, infile_root = os.path.split(infile)
120 infile_root = os.path.splitext(infile_root)[0]
120 infile_root = os.path.splitext(infile_root)[0]
121 self.clean_name = clean_filename(infile_root)
121 self.clean_name = clean_filename(infile_root)
122 files_dir = os.path.join(self.infile_dir, self.clean_name + '_files')
122 files_dir = os.path.join(self.infile_dir, self.clean_name + '_files')
123 if not os.path.isdir(files_dir):
123 if not os.path.isdir(files_dir):
124 os.mkdir(files_dir)
124 os.mkdir(files_dir)
125 self.infile_root = infile_root
125 self.infile_root = infile_root
126 self.files_dir = files_dir
126 self.files_dir = files_dir
127 self.outbase = os.path.join(self.infile_dir, infile_root)
127 self.outbase = os.path.join(self.infile_dir, infile_root)
128
128
129 def __del__(self):
129 def __del__(self):
130 if os.path.isdir(self.files_dir) and not os.listdir(self.files_dir):
130 if os.path.isdir(self.files_dir) and not os.listdir(self.files_dir):
131 os.rmdir(self.files_dir)
131 os.rmdir(self.files_dir)
132
132
133 def _get_prompt_number(self, cell):
133 def _get_prompt_number(self, cell):
134 return cell.prompt_number if hasattr(cell, 'prompt_number') \
134 return cell.prompt_number if hasattr(cell, 'prompt_number') \
135 else self.blank_symbol
135 else self.blank_symbol
136
136
137 def dispatch(self, cell_type):
137 def dispatch(self, cell_type):
138 """return cell_type dependent render method, for example render_code
138 """return cell_type dependent render method, for example render_code
139 """
139 """
140 return getattr(self, 'render_' + cell_type, self.render_unknown)
140 return getattr(self, 'render_' + cell_type, self.render_unknown)
141
141
142 def dispatch_display_format(self, format):
142 def dispatch_display_format(self, format):
143 """
143 """
144 return output_type dependent render method, for example
144 return output_type dependent render method, for example
145 render_output_text
145 render_output_text
146 """
146 """
147 return getattr(self, 'render_display_format_' + format,
147 return getattr(self, 'render_display_format_' + format,
148 self.render_unknown_display)
148 self.render_unknown_display)
149
149
150 def convert(self, cell_separator='\n'):
150 def convert(self, cell_separator='\n'):
151 """
151 """
152 Generic method to converts notebook to a string representation.
152 Generic method to converts notebook to a string representation.
153
153
154 This is accomplished by dispatching on the cell_type, so subclasses of
154 This is accomplished by dispatching on the cell_type, so subclasses of
155 Convereter class do not need to re-implement this method, but just
155 Convereter class do not need to re-implement this method, but just
156 need implementation for the methods that will be dispatched.
156 need implementation for the methods that will be dispatched.
157
157
158 Parameters
158 Parameters
159 ----------
159 ----------
160 cell_separator : string
160 cell_separator : string
161 Character or string to join cells with. Default is "\n"
161 Character or string to join cells with. Default is "\n"
162
162
163 Returns
163 Returns
164 -------
164 -------
165 out : string
165 out : string
166 """
166 """
167 lines = []
167 lines = []
168 lines.extend(self.optional_header())
168 lines.extend(self.optional_header())
169 lines.extend(self.main_body(cell_separator))
169 lines.extend(self.main_body(cell_separator))
170 lines.extend(self.optional_footer())
170 lines.extend(self.optional_footer())
171 return u'\n'.join(lines)
171 return u'\n'.join(lines)
172
172
173 def main_body(self, cell_separator='\n'):
173 def main_body(self, cell_separator='\n'):
174 converted_cells = []
174 converted_cells = []
175 for worksheet in self.nb.worksheets:
175 for worksheet in self.nb.worksheets:
176 for cell in worksheet.cells:
176 for cell in worksheet.cells:
177 #print(cell.cell_type) # dbg
177 #print(cell.cell_type) # dbg
178 conv_fn = self.dispatch(cell.cell_type)
178 conv_fn = self.dispatch(cell.cell_type)
179 if cell.cell_type in ('markdown', 'raw'):
179 if cell.cell_type in ('markdown', 'raw'):
180 remove_fake_files_url(cell)
180 remove_fake_files_url(cell)
181 converted_cells.append('\n'.join(conv_fn(cell)))
181 converted_cells.append('\n'.join(conv_fn(cell)))
182 cell_lines = cell_separator.join(converted_cells).split('\n')
182 cell_lines = cell_separator.join(converted_cells).split('\n')
183 return cell_lines
183 return cell_lines
184
184
185 def render(self):
185 def render(self):
186 "read, convert, and save self.infile"
186 "read, convert, and save self.infile"
187 if not hasattr(self, 'nb'):
187 if not hasattr(self, 'nb'):
188 self.read()
188 self.read()
189 self.output = self.convert()
189 self.output = self.convert()
190 assert(type(self.output) == unicode)
190 assert(type(self.output) == unicode)
191 return self.save()
191 return self.save()
192
192
193 def read(self):
193 def read(self):
194 "read and parse notebook into NotebookNode called self.nb"
194 "read and parse notebook into NotebookNode called self.nb"
195 with open(self.infile) as f:
195 with open(self.infile) as f:
196 self.nb = nbformat.read(f, 'json')
196 self.nb = nbformat.read(f, 'json')
197
197
198 def save(self, outfile=None, encoding=None):
198 def save(self, outfile=None, encoding=None):
199 "read and parse notebook into self.nb"
199 "read and parse notebook into self.nb"
200 if outfile is None:
200 if outfile is None:
201 outfile = self.outbase + '.' + self.extension
201 outfile = self.outbase + '.' + self.extension
202 if encoding is None:
202 if encoding is None:
203 encoding = self.default_encoding
203 encoding = self.default_encoding
204 with io.open(outfile, 'w', encoding=encoding) as f:
204 with io.open(outfile, 'w', encoding=encoding) as f:
205 f.write(self.output)
205 f.write(self.output)
206 return os.path.abspath(outfile)
206 return os.path.abspath(outfile)
207
207
208 def optional_header(self):
208 def optional_header(self):
209 """
209 """
210 Optional header to insert at the top of the converted notebook
210 Optional header to insert at the top of the converted notebook
211
211
212 Returns a list
212 Returns a list
213 """
213 """
214 return []
214 return []
215
215
216 def optional_footer(self):
216 def optional_footer(self):
217 """
217 """
218 Optional footer to insert at the end of the converted notebook
218 Optional footer to insert at the end of the converted notebook
219
219
220 Returns a list
220 Returns a list
221 """
221 """
222 return []
222 return []
223
223
224 def _new_figure(self, data, fmt):
224 def _new_figure(self, data, fmt):
225 """Create a new figure file in the given format.
225 """Create a new figure file in the given format.
226
226
227 Returns a path relative to the input file.
227 Returns a path relative to the input file.
228 """
228 """
229 figname = '%s_fig_%02i.%s' % (self.clean_name,
229 figname = '%s_fig_%02i.%s' % (self.clean_name,
230 self.figures_counter, fmt)
230 self.figures_counter, fmt)
231 self.figures_counter += 1
231 self.figures_counter += 1
232 fullname = os.path.join(self.files_dir, figname)
232 fullname = os.path.join(self.files_dir, figname)
233
233
234 # Binary files are base64-encoded, SVG is already XML
234 # Binary files are base64-encoded, SVG is already XML
235 if fmt in ('png', 'jpg', 'pdf'):
235 if fmt in ('png', 'jpg', 'pdf'):
236 data = data.decode('base64')
236 data = data.decode('base64')
237 fopen = lambda fname: open(fname, 'wb')
237 fopen = lambda fname: open(fname, 'wb')
238 else:
238 else:
239 fopen = lambda fname: codecs.open(fname, 'wb',
239 fopen = lambda fname: codecs.open(fname, 'wb',
240 self.default_encoding)
240 self.default_encoding)
241
241
242 with fopen(fullname) as f:
242 with fopen(fullname) as f:
243 f.write(data)
243 f.write(data)
244
244
245 return fullname
245 return fullname
246
246
247 def render_heading(self, cell):
247 def render_heading(self, cell):
248 """convert a heading cell
248 """convert a heading cell
249
249
250 Returns list."""
250 Returns list."""
251 raise NotImplementedError
251 raise NotImplementedError
252
252
253 def render_code(self, cell):
253 def render_code(self, cell):
254 """Convert a code cell
254 """Convert a code cell
255
255
256 Returns list."""
256 Returns list."""
257 raise NotImplementedError
257 raise NotImplementedError
258
258
259 def render_markdown(self, cell):
259 def render_markdown(self, cell):
260 """convert a markdown cell
260 """convert a markdown cell
261
261
262 Returns list."""
262 Returns list."""
263 raise NotImplementedError
263 raise NotImplementedError
264
264
265 def _img_lines(self, img_file):
265 def _img_lines(self, img_file):
266 """Return list of lines to include an image file."""
266 """Return list of lines to include an image file."""
267 # Note: subclasses may choose to implement format-specific _FMT_lines
267 # Note: subclasses may choose to implement format-specific _FMT_lines
268 # methods if they so choose (FMT in {png, svg, jpg, pdf}).
268 # methods if they so choose (FMT in {png, svg, jpg, pdf}).
269 raise NotImplementedError
269 raise NotImplementedError
270
270
271 def render_display_data(self, output):
271 def render_display_data(self, output):
272 """convert display data from the output of a code cell
272 """convert display data from the output of a code cell
273
273
274 Returns list.
274 Returns list.
275 """
275 """
276 for fmt in self.display_data_priority:
276 for fmt in self.display_data_priority:
277 if fmt in output:
277 if fmt in output:
278 break
278 break
279 else:
279 else:
280 for fmt in output:
280 for fmt in output:
281 if fmt != 'output_type':
281 if fmt != 'output_type':
282 break
282 break
283 else:
283 else:
284 raise RuntimeError('no display data')
284 raise RuntimeError('no display data')
285
285
286 # Is it an image?
286 # Is it an image?
287 if fmt in ['png', 'svg', 'jpg', 'pdf']:
287 if fmt in ['png', 'svg', 'jpg', 'pdf']:
288 img_file = self._new_figure(output[fmt], fmt)
288 img_file = self._new_figure(output[fmt], fmt)
289 # Subclasses can have format-specific render functions (e.g.,
289 # Subclasses can have format-specific render functions (e.g.,
290 # latex has to auto-convert all SVG to PDF first).
290 # latex has to auto-convert all SVG to PDF first).
291 lines_fun = getattr(self, '_%s_lines' % fmt, None)
291 lines_fun = getattr(self, '_%s_lines' % fmt, None)
292 if not lines_fun:
292 if not lines_fun:
293 lines_fun = self._img_lines
293 lines_fun = self._img_lines
294 lines = lines_fun(img_file)
294 lines = lines_fun(img_file)
295 else:
295 else:
296 lines_fun = self.dispatch_display_format(fmt)
296 lines_fun = self.dispatch_display_format(fmt)
297 lines = lines_fun(output)
297 lines = lines_fun(output)
298
298
299 return lines
299 return lines
300
300
301 def render_raw(self, cell):
301 def render_raw(self, cell):
302 """convert a cell with raw text
302 """convert a cell with raw text
303
303
304 Returns list."""
304 Returns list."""
305 raise NotImplementedError
305 raise NotImplementedError
306
306
307 def render_unknown(self, cell):
307 def render_unknown(self, cell):
308 """Render cells of unkown type
308 """Render cells of unkown type
309
309
310 Returns list."""
310 Returns list."""
311 data = pprint.pformat(cell)
311 data = pprint.pformat(cell)
312 logging.warning('Unknown cell: %s' % cell.cell_type)
312 logging.warning('Unknown cell: %s' % cell.cell_type)
313 return self._unknown_lines(data)
313 return self._unknown_lines(data)
314
314
315 def render_unknown_display(self, output, type):
315 def render_unknown_display(self, output, type):
316 """Render cells of unkown type
316 """Render cells of unkown type
317
317
318 Returns list."""
318 Returns list."""
319 data = pprint.pformat(output)
319 data = pprint.pformat(output)
320 logging.warning('Unknown output: %s' % output.output_type)
320 logging.warning('Unknown output: %s' % output.output_type)
321 return self._unknown_lines(data)
321 return self._unknown_lines(data)
322
322
323 def render_stream(self, output):
323 def render_stream(self, output):
324 """render the stream part of an output
324 """render the stream part of an output
325
325
326 Returns list.
326 Returns list.
327
327
328 Identical to render_display_format_text
328 Identical to render_display_format_text
329 """
329 """
330 return self.render_display_format_text(output)
330 return self.render_display_format_text(output)
331
331
332 def render_pyout(self, output):
332 def render_pyout(self, output):
333 """convert pyout part of a code cell
333 """convert pyout part of a code cell
334
334
335 Returns list."""
335 Returns list."""
336 raise NotImplementedError
336 raise NotImplementedError
337
337
338 def render_pyerr(self, output):
338 def render_pyerr(self, output):
339 """convert pyerr part of a code cell
339 """convert pyerr part of a code cell
340
340
341 Returns list."""
341 Returns list."""
342 raise NotImplementedError
342 raise NotImplementedError
343
343
344 def _unknown_lines(self, data):
344 def _unknown_lines(self, data):
345 """Return list of lines for an unknown cell.
345 """Return list of lines for an unknown cell.
346
346
347 Parameters
347 Parameters
348 ----------
348 ----------
349 data : str
349 data : str
350 The content of the unknown data as a single string.
350 The content of the unknown data as a single string.
351 """
351 """
352 raise NotImplementedError
352 raise NotImplementedError
353
353
354 # These are the possible format types in an output node
354 # These are the possible format types in an output node
355
355
356 def render_display_format_text(self, output):
356 def render_display_format_text(self, output):
357 """render the text part of an output
357 """render the text part of an output
358
358
359 Returns list.
359 Returns list.
360 """
360 """
361 raise NotImplementedError
361 raise NotImplementedError
362
362
363 def render_display_format_html(self, output):
363 def render_display_format_html(self, output):
364 """render the html part of an output
364 """render the html part of an output
365
365
366 Returns list.
366 Returns list.
367 """
367 """
368 raise NotImplementedError
368 raise NotImplementedError
369
369
370 def render_display_format_latex(self, output):
370 def render_display_format_latex(self, output):
371 """render the latex part of an output
371 """render the latex part of an output
372
372
373 Returns list.
373 Returns list.
374 """
374 """
375 raise NotImplementedError
375 raise NotImplementedError
376
376
377 def render_display_format_json(self, output):
377 def render_display_format_json(self, output):
378 """render the json part of an output
378 """render the json part of an output
379
379
380 Returns list.
380 Returns list.
381 """
381 """
382 raise NotImplementedError
382 raise NotImplementedError
383
383
384 def render_display_format_javascript(self, output):
384 def render_display_format_javascript(self, output):
385 """render the javascript part of an output
385 """render the javascript part of an output
386
386
387 Returns list.
387 Returns list.
388 """
388 """
389 raise NotImplementedError
389 raise NotImplementedError
General Comments 0
You need to be logged in to leave comments. Login now