##// END OF EJS Templates
Clearer/simpler code suggested by @fperez....
David Warde-Farley -
Show More
@@ -1,357 +1,355 b''
1 from __future__ import print_function, absolute_import
1 from __future__ import print_function, absolute_import
2 from converters.utils import remove_fake_files_url
2 from converters.utils import remove_fake_files_url
3
3
4 # Stdlib
4 # Stdlib
5 import codecs
5 import codecs
6 import io
6 import io
7 import logging
7 import logging
8 import os
8 import os
9 import pprint
9 import pprint
10 import re
10 import re
11 from types import FunctionType
11 from types import FunctionType
12
12
13 # From IPython
13 # From IPython
14 from IPython.nbformat import current as nbformat
14 from IPython.nbformat import current as nbformat
15
15
16 # local
16 # local
17
17
18 def clean_filename(filename):
18 def clean_filename(filename):
19 """
19 """
20 Remove non-alphanumeric characters from filenames.
20 Remove non-alphanumeric characters from filenames.
21
21
22 Parameters
22 Parameters
23 ----------
23 ----------
24 filename : str
24 filename : str
25 The filename to be sanitized.
25 The filename to be sanitized.
26
26
27 Returns
27 Returns
28 -------
28 -------
29 clean : str
29 clean : str
30 A sanitized filename that contains only alphanumeric
30 A sanitized filename that contains only alphanumeric
31 characters and underscores.
31 characters and underscores.
32 """
32 """
33 filename = re.sub(r'[^a-zA-Z0-9_]', '_', filename)
33 filename = re.sub(r'[^a-zA-Z0-9_]', '_', filename)
34 return filename
34 return filename
35
35
36 #-----------------------------------------------------------------------------
36 #-----------------------------------------------------------------------------
37 # Class declarations
37 # Class declarations
38 #-----------------------------------------------------------------------------
38 #-----------------------------------------------------------------------------
39
39
40 class ConversionException(Exception):
40 class ConversionException(Exception):
41 pass
41 pass
42
42
43 class DocStringInheritor(type):
43 class DocStringInheritor(type):
44 """
44 """
45 This metaclass will walk the list of bases until the desired
45 This metaclass will walk the list of bases until the desired
46 superclass method is found AND if that method has a docstring and only
46 superclass method is found AND if that method has a docstring and only
47 THEN does it attach the superdocstring to the derived class method.
47 THEN does it attach the superdocstring to the derived class method.
48
48
49 Please use carefully, I just did the metaclass thing by following
49 Please use carefully, I just did the metaclass thing by following
50 Michael Foord's Metaclass tutorial
50 Michael Foord's Metaclass tutorial
51 (http://www.voidspace.org.uk/python/articles/metaclasses.shtml), I may
51 (http://www.voidspace.org.uk/python/articles/metaclasses.shtml), I may
52 have missed a step or two.
52 have missed a step or two.
53
53
54 source:
54 source:
55 http://groups.google.com/group/comp.lang.python/msg/26f7b4fcb4d66c95
55 http://groups.google.com/group/comp.lang.python/msg/26f7b4fcb4d66c95
56 by Paul McGuire
56 by Paul McGuire
57 """
57 """
58 def __new__(meta, classname, bases, classDict):
58 def __new__(meta, classname, bases, classDict):
59 newClassDict = {}
59 newClassDict = {}
60 for attributeName, attribute in classDict.items():
60 for attributeName, attribute in classDict.items():
61 if type(attribute) == FunctionType:
61 if type(attribute) == FunctionType:
62 # look through bases for matching function by name
62 # look through bases for matching function by name
63 for baseclass in bases:
63 for baseclass in bases:
64 if hasattr(baseclass, attributeName):
64 if hasattr(baseclass, attributeName):
65 basefn = getattr(baseclass, attributeName)
65 basefn = getattr(baseclass, attributeName)
66 if basefn.__doc__:
66 if basefn.__doc__:
67 attribute.__doc__ = basefn.__doc__
67 attribute.__doc__ = basefn.__doc__
68 break
68 break
69 newClassDict[attributeName] = attribute
69 newClassDict[attributeName] = attribute
70 return type.__new__(meta, classname, bases, newClassDict)
70 return type.__new__(meta, classname, bases, newClassDict)
71
71
72 class Converter(object):
72 class Converter(object):
73 __metaclass__ = DocStringInheritor
73 __metaclass__ = DocStringInheritor
74 default_encoding = 'utf-8'
74 default_encoding = 'utf-8'
75 extension = str()
75 extension = str()
76 figures_counter = 0
76 figures_counter = 0
77 infile = str()
77 infile = str()
78 infile_dir = str()
78 infile_dir = str()
79 infile_root = str()
79 infile_root = str()
80 files_dir = str()
80 files_dir = str()
81 with_preamble = True
81 with_preamble = True
82 user_preamble = None
82 user_preamble = None
83 output = unicode()
83 output = unicode()
84 raw_as_verbatim = False
84 raw_as_verbatim = False
85 # Which display data format is best? Subclasses can override if
85 # Which display data format is best? Subclasses can override if
86 # they have specific requirements.
86 # they have specific requirements.
87 display_data_priority = ['pdf', 'svg', 'png', 'jpg', 'text']
87 display_data_priority = ['pdf', 'svg', 'png', 'jpg', 'text']
88
88
89 def __init__(self, infile):
89 def __init__(self, infile):
90 self.infile = infile
90 self.infile = infile
91 self.infile_dir, infile_root = os.path.split(infile)
91 self.infile_dir, infile_root = os.path.split(infile)
92 infile_root = os.path.splitext(infile_root)[0]
92 infile_root = os.path.splitext(infile_root)[0]
93 self.clean_name = clean_filename(infile_root)
93 self.clean_name = clean_filename(infile_root)
94 files_dir = os.path.join(self.infile_dir, self.clean_name + '_files')
94 files_dir = os.path.join(self.infile_dir, self.clean_name + '_files')
95 if not os.path.isdir(files_dir):
95 if not os.path.isdir(files_dir):
96 os.mkdir(files_dir)
96 os.mkdir(files_dir)
97 self.infile_root = infile_root
97 self.infile_root = infile_root
98 self.files_dir = files_dir
98 self.files_dir = files_dir
99 self.outbase = os.path.join(self.infile_dir, infile_root)
99 self.outbase = os.path.join(self.infile_dir, infile_root)
100
100
101 def __del__(self):
101 def __del__(self):
102 if os.path.isdir(self.files_dir) and not os.listdir(self.files_dir):
102 if os.path.isdir(self.files_dir) and not os.listdir(self.files_dir):
103 os.rmdir(self.files_dir)
103 os.rmdir(self.files_dir)
104
104
105 def dispatch(self, cell_type):
105 def dispatch(self, cell_type):
106 """return cell_type dependent render method, for example render_code
106 """return cell_type dependent render method, for example render_code
107 """
107 """
108 return getattr(self, 'render_' + cell_type, self.render_unknown)
108 return getattr(self, 'render_' + cell_type, self.render_unknown)
109
109
110 def dispatch_display_format(self, format):
110 def dispatch_display_format(self, format):
111 """return output_type dependent render method, for example render_output_text
111 """return output_type dependent render method, for example render_output_text
112 """
112 """
113 return getattr(self, 'render_display_format_' + format, self.render_unknown_display)
113 return getattr(self, 'render_display_format_' + format, self.render_unknown_display)
114
114
115 def convert(self, cell_separator='\n'):
115 def convert(self, cell_separator='\n'):
116 """
116 """
117 Generic method to converts notebook to a string representation.
117 Generic method to converts notebook to a string representation.
118
118
119 This is accomplished by dispatching on the cell_type, so subclasses of
119 This is accomplished by dispatching on the cell_type, so subclasses of
120 Convereter class do not need to re-implement this method, but just
120 Convereter class do not need to re-implement this method, but just
121 need implementation for the methods that will be dispatched.
121 need implementation for the methods that will be dispatched.
122
122
123 Parameters
123 Parameters
124 ----------
124 ----------
125 cell_separator : string
125 cell_separator : string
126 Character or string to join cells with. Default is "\n"
126 Character or string to join cells with. Default is "\n"
127
127
128 Returns
128 Returns
129 -------
129 -------
130 out : string
130 out : string
131 """
131 """
132 lines = []
132 lines = []
133 lines.extend(self.optional_header())
133 lines.extend(self.optional_header())
134 lines.extend(self.main_body(cell_separator))
134 lines.extend(self.main_body(cell_separator))
135 lines.extend(self.optional_footer())
135 lines.extend(self.optional_footer())
136 return u'\n'.join(lines)
136 return u'\n'.join(lines)
137
137
138 def main_body(self, cell_separator='\n'):
138 def main_body(self, cell_separator='\n'):
139 converted_cells = []
139 converted_cells = []
140 for worksheet in self.nb.worksheets:
140 for worksheet in self.nb.worksheets:
141 for cell in worksheet.cells:
141 for cell in worksheet.cells:
142 #print(cell.cell_type) # dbg
142 #print(cell.cell_type) # dbg
143 conv_fn = self.dispatch(cell.cell_type)
143 conv_fn = self.dispatch(cell.cell_type)
144 if cell.cell_type in ('markdown', 'raw'):
144 if cell.cell_type in ('markdown', 'raw'):
145 remove_fake_files_url(cell)
145 remove_fake_files_url(cell)
146 converted_cells.append('\n'.join(conv_fn(cell)))
146 converted_cells.append('\n'.join(conv_fn(cell)))
147 cell_lines = cell_separator.join(converted_cells).split('\n')
147 cell_lines = cell_separator.join(converted_cells).split('\n')
148 return cell_lines
148 return cell_lines
149
149
150 def render(self):
150 def render(self):
151 "read, convert, and save self.infile"
151 "read, convert, and save self.infile"
152 if not hasattr(self, 'nb'):
152 if not hasattr(self, 'nb'):
153 self.read()
153 self.read()
154 self.output = self.convert()
154 self.output = self.convert()
155 assert(type(self.output) == unicode)
155 assert(type(self.output) == unicode)
156 return self.save()
156 return self.save()
157
157
158 def read(self):
158 def read(self):
159 "read and parse notebook into NotebookNode called self.nb"
159 "read and parse notebook into NotebookNode called self.nb"
160 with open(self.infile) as f:
160 with open(self.infile) as f:
161 self.nb = nbformat.read(f, 'json')
161 self.nb = nbformat.read(f, 'json')
162
162
163 def save(self, outfile=None, encoding=None):
163 def save(self, outfile=None, encoding=None):
164 "read and parse notebook into self.nb"
164 "read and parse notebook into self.nb"
165 if outfile is None:
165 if outfile is None:
166 outfile = self.outbase + '.' + self.extension
166 outfile = self.outbase + '.' + self.extension
167 if encoding is None:
167 if encoding is None:
168 encoding = self.default_encoding
168 encoding = self.default_encoding
169 with io.open(outfile, 'w', encoding=encoding) as f:
169 with io.open(outfile, 'w', encoding=encoding) as f:
170 f.write(self.output)
170 f.write(self.output)
171 return os.path.abspath(outfile)
171 return os.path.abspath(outfile)
172
172
173 def optional_header(self):
173 def optional_header(self):
174 """
174 """
175 Optional header to insert at the top of the converted notebook
175 Optional header to insert at the top of the converted notebook
176
176
177 Returns a list
177 Returns a list
178 """
178 """
179 return []
179 return []
180
180
181 def optional_footer(self):
181 def optional_footer(self):
182 """
182 """
183 Optional footer to insert at the end of the converted notebook
183 Optional footer to insert at the end of the converted notebook
184
184
185 Returns a list
185 Returns a list
186 """
186 """
187 return []
187 return []
188
188
189 def _new_figure(self, data, fmt):
189 def _new_figure(self, data, fmt):
190 """Create a new figure file in the given format.
190 """Create a new figure file in the given format.
191
191
192 Returns a path relative to the input file.
192 Returns a path relative to the input file.
193 """
193 """
194 figname = '%s_fig_%02i.%s' % (self.clean_name,
194 figname = '%s_fig_%02i.%s' % (self.clean_name,
195 self.figures_counter, fmt)
195 self.figures_counter, fmt)
196 self.figures_counter += 1
196 self.figures_counter += 1
197 fullname = os.path.join(self.files_dir, figname)
197 fullname = os.path.join(self.files_dir, figname)
198
198
199 # Binary files are base64-encoded, SVG is already XML
199 # Binary files are base64-encoded, SVG is already XML
200 if fmt in ('png', 'jpg', 'pdf'):
200 if fmt in ('png', 'jpg', 'pdf'):
201 data = data.decode('base64')
201 data = data.decode('base64')
202 fopen = lambda fname: open(fname, 'wb')
202 fopen = lambda fname: open(fname, 'wb')
203 else:
203 else:
204 fopen = lambda fname: codecs.open(fname, 'wb', self.default_encoding)
204 fopen = lambda fname: codecs.open(fname, 'wb', self.default_encoding)
205
205
206 with fopen(fullname) as f:
206 with fopen(fullname) as f:
207 f.write(data)
207 f.write(data)
208
208
209 return fullname
209 return fullname
210
210
211 def render_heading(self, cell):
211 def render_heading(self, cell):
212 """convert a heading cell
212 """convert a heading cell
213
213
214 Returns list."""
214 Returns list."""
215 raise NotImplementedError
215 raise NotImplementedError
216
216
217 def render_code(self, cell):
217 def render_code(self, cell):
218 """Convert a code cell
218 """Convert a code cell
219
219
220 Returns list."""
220 Returns list."""
221 raise NotImplementedError
221 raise NotImplementedError
222
222
223 def render_markdown(self, cell):
223 def render_markdown(self, cell):
224 """convert a markdown cell
224 """convert a markdown cell
225
225
226 Returns list."""
226 Returns list."""
227 raise NotImplementedError
227 raise NotImplementedError
228
228
229 def _img_lines(self, img_file):
229 def _img_lines(self, img_file):
230 """Return list of lines to include an image file."""
230 """Return list of lines to include an image file."""
231 # Note: subclasses may choose to implement format-specific _FMT_lines
231 # Note: subclasses may choose to implement format-specific _FMT_lines
232 # methods if they so choose (FMT in {png, svg, jpg, pdf}).
232 # methods if they so choose (FMT in {png, svg, jpg, pdf}).
233 raise NotImplementedError
233 raise NotImplementedError
234
234
235 def render_display_data(self, output):
235 def render_display_data(self, output):
236 """convert display data from the output of a code cell
236 """convert display data from the output of a code cell
237
237
238 Returns list.
238 Returns list.
239 """
239 """
240 # Choose preferred format if available
240 for fmt in self.display_data_priority:
241 preferred = [xx for xx in self.display_data_priority if xx in output]
241 if fmt in output:
242 if preferred:
242 break
243 fmt = preferred[0]
244 else:
243 else:
245 # Choose a format randomly if preference can't be satisfied
244 for fmt in output:
246 available = [k for k in output.keys() if k != 'output_type']
245 if fmt != 'output_type':
247 if available:
246 break
248 fmt = available[0]
249 else:
247 else:
250 raise RuntimeError('no display data')
248 raise RuntimeError('no display data')
251
249
252 # Is it an image?
250 # Is it an image?
253 if fmt in ['png', 'svg', 'jpg', 'pdf']:
251 if fmt in ['png', 'svg', 'jpg', 'pdf']:
254 img_file = self._new_figure(output[fmt], fmt)
252 img_file = self._new_figure(output[fmt], fmt)
255 # Subclasses can have format-specific render functions (e.g.,
253 # Subclasses can have format-specific render functions (e.g.,
256 # latex has to auto-convert all SVG to PDF first).
254 # latex has to auto-convert all SVG to PDF first).
257 lines_fun = getattr(self, '_%s_lines' % fmt, None)
255 lines_fun = getattr(self, '_%s_lines' % fmt, None)
258 if not lines_fun:
256 if not lines_fun:
259 lines_fun = self._img_lines
257 lines_fun = self._img_lines
260 lines = lines_fun(img_file)
258 lines = lines_fun(img_file)
261 else:
259 else:
262 lines_fun = self.dispatch_display_format(fmt)
260 lines_fun = self.dispatch_display_format(fmt)
263 lines = lines_fun(output)
261 lines = lines_fun(output)
264
262
265 return lines
263 return lines
266
264
267 def render_raw(self, cell):
265 def render_raw(self, cell):
268 """convert a cell with raw text
266 """convert a cell with raw text
269
267
270 Returns list."""
268 Returns list."""
271 raise NotImplementedError
269 raise NotImplementedError
272
270
273 def render_unknown(self, cell):
271 def render_unknown(self, cell):
274 """Render cells of unkown type
272 """Render cells of unkown type
275
273
276 Returns list."""
274 Returns list."""
277 data = pprint.pformat(cell)
275 data = pprint.pformat(cell)
278 logging.warning('Unknown cell: %s' % cell.cell_type)
276 logging.warning('Unknown cell: %s' % cell.cell_type)
279 return self._unknown_lines(data)
277 return self._unknown_lines(data)
280
278
281 def render_unknown_display(self, output, type):
279 def render_unknown_display(self, output, type):
282 """Render cells of unkown type
280 """Render cells of unkown type
283
281
284 Returns list."""
282 Returns list."""
285 data = pprint.pformat(output)
283 data = pprint.pformat(output)
286 logging.warning('Unknown output: %s' % output.output_type)
284 logging.warning('Unknown output: %s' % output.output_type)
287 return self._unknown_lines(data)
285 return self._unknown_lines(data)
288
286
289 def render_stream(self, output):
287 def render_stream(self, output):
290 """render the stream part of an output
288 """render the stream part of an output
291
289
292 Returns list.
290 Returns list.
293
291
294 Identical to render_display_format_text
292 Identical to render_display_format_text
295 """
293 """
296 return self.render_display_format_text(output)
294 return self.render_display_format_text(output)
297
295
298 def render_pyout(self, output):
296 def render_pyout(self, output):
299 """convert pyout part of a code cell
297 """convert pyout part of a code cell
300
298
301 Returns list."""
299 Returns list."""
302 raise NotImplementedError
300 raise NotImplementedError
303
301
304
302
305 def render_pyerr(self, output):
303 def render_pyerr(self, output):
306 """convert pyerr part of a code cell
304 """convert pyerr part of a code cell
307
305
308 Returns list."""
306 Returns list."""
309 raise NotImplementedError
307 raise NotImplementedError
310
308
311 def _unknown_lines(self, data):
309 def _unknown_lines(self, data):
312 """Return list of lines for an unknown cell.
310 """Return list of lines for an unknown cell.
313
311
314 Parameters
312 Parameters
315 ----------
313 ----------
316 data : str
314 data : str
317 The content of the unknown data as a single string.
315 The content of the unknown data as a single string.
318 """
316 """
319 raise NotImplementedError
317 raise NotImplementedError
320
318
321 # These are the possible format types in an output node
319 # These are the possible format types in an output node
322
320
323 def render_display_format_text(self, output):
321 def render_display_format_text(self, output):
324 """render the text part of an output
322 """render the text part of an output
325
323
326 Returns list.
324 Returns list.
327 """
325 """
328 raise NotImplementedError
326 raise NotImplementedError
329
327
330 def render_display_format_html(self, output):
328 def render_display_format_html(self, output):
331 """render the html part of an output
329 """render the html part of an output
332
330
333 Returns list.
331 Returns list.
334 """
332 """
335 raise NotImplementedError
333 raise NotImplementedError
336
334
337 def render_display_format_latex(self, output):
335 def render_display_format_latex(self, output):
338 """render the latex part of an output
336 """render the latex part of an output
339
337
340 Returns list.
338 Returns list.
341 """
339 """
342 raise NotImplementedError
340 raise NotImplementedError
343
341
344 def render_display_format_json(self, output):
342 def render_display_format_json(self, output):
345 """render the json part of an output
343 """render the json part of an output
346
344
347 Returns list.
345 Returns list.
348 """
346 """
349 raise NotImplementedError
347 raise NotImplementedError
350
348
351 def render_display_format_javascript(self, output):
349 def render_display_format_javascript(self, output):
352 """render the javascript part of an output
350 """render the javascript part of an output
353
351
354 Returns list.
352 Returns list.
355 """
353 """
356 raise NotImplementedError
354 raise NotImplementedError
357
355
General Comments 0
You need to be logged in to leave comments. Login now