##// END OF EJS Templates
Clearer/simpler code suggested by @fperez....
David Warde-Farley -
Show More
@@ -1,357 +1,355 b''
1 1 from __future__ import print_function, absolute_import
2 2 from converters.utils import remove_fake_files_url
3 3
4 4 # Stdlib
5 5 import codecs
6 6 import io
7 7 import logging
8 8 import os
9 9 import pprint
10 10 import re
11 11 from types import FunctionType
12 12
13 13 # From IPython
14 14 from IPython.nbformat import current as nbformat
15 15
16 16 # local
17 17
18 18 def clean_filename(filename):
19 19 """
20 20 Remove non-alphanumeric characters from filenames.
21 21
22 22 Parameters
23 23 ----------
24 24 filename : str
25 25 The filename to be sanitized.
26 26
27 27 Returns
28 28 -------
29 29 clean : str
30 30 A sanitized filename that contains only alphanumeric
31 31 characters and underscores.
32 32 """
33 33 filename = re.sub(r'[^a-zA-Z0-9_]', '_', filename)
34 34 return filename
35 35
36 36 #-----------------------------------------------------------------------------
37 37 # Class declarations
38 38 #-----------------------------------------------------------------------------
39 39
40 40 class ConversionException(Exception):
41 41 pass
42 42
43 43 class DocStringInheritor(type):
44 44 """
45 45 This metaclass will walk the list of bases until the desired
46 46 superclass method is found AND if that method has a docstring and only
47 47 THEN does it attach the superdocstring to the derived class method.
48 48
49 49 Please use carefully, I just did the metaclass thing by following
50 50 Michael Foord's Metaclass tutorial
51 51 (http://www.voidspace.org.uk/python/articles/metaclasses.shtml), I may
52 52 have missed a step or two.
53 53
54 54 source:
55 55 http://groups.google.com/group/comp.lang.python/msg/26f7b4fcb4d66c95
56 56 by Paul McGuire
57 57 """
58 58 def __new__(meta, classname, bases, classDict):
59 59 newClassDict = {}
60 60 for attributeName, attribute in classDict.items():
61 61 if type(attribute) == FunctionType:
62 62 # look through bases for matching function by name
63 63 for baseclass in bases:
64 64 if hasattr(baseclass, attributeName):
65 65 basefn = getattr(baseclass, attributeName)
66 66 if basefn.__doc__:
67 67 attribute.__doc__ = basefn.__doc__
68 68 break
69 69 newClassDict[attributeName] = attribute
70 70 return type.__new__(meta, classname, bases, newClassDict)
71 71
72 72 class Converter(object):
73 73 __metaclass__ = DocStringInheritor
74 74 default_encoding = 'utf-8'
75 75 extension = str()
76 76 figures_counter = 0
77 77 infile = str()
78 78 infile_dir = str()
79 79 infile_root = str()
80 80 files_dir = str()
81 81 with_preamble = True
82 82 user_preamble = None
83 83 output = unicode()
84 84 raw_as_verbatim = False
85 85 # Which display data format is best? Subclasses can override if
86 86 # they have specific requirements.
87 87 display_data_priority = ['pdf', 'svg', 'png', 'jpg', 'text']
88 88
89 89 def __init__(self, infile):
90 90 self.infile = infile
91 91 self.infile_dir, infile_root = os.path.split(infile)
92 92 infile_root = os.path.splitext(infile_root)[0]
93 93 self.clean_name = clean_filename(infile_root)
94 94 files_dir = os.path.join(self.infile_dir, self.clean_name + '_files')
95 95 if not os.path.isdir(files_dir):
96 96 os.mkdir(files_dir)
97 97 self.infile_root = infile_root
98 98 self.files_dir = files_dir
99 99 self.outbase = os.path.join(self.infile_dir, infile_root)
100 100
101 101 def __del__(self):
102 102 if os.path.isdir(self.files_dir) and not os.listdir(self.files_dir):
103 103 os.rmdir(self.files_dir)
104 104
105 105 def dispatch(self, cell_type):
106 106 """return cell_type dependent render method, for example render_code
107 107 """
108 108 return getattr(self, 'render_' + cell_type, self.render_unknown)
109 109
110 110 def dispatch_display_format(self, format):
111 111 """return output_type dependent render method, for example render_output_text
112 112 """
113 113 return getattr(self, 'render_display_format_' + format, self.render_unknown_display)
114 114
115 115 def convert(self, cell_separator='\n'):
116 116 """
117 117 Generic method to converts notebook to a string representation.
118 118
119 119 This is accomplished by dispatching on the cell_type, so subclasses of
120 120 Convereter class do not need to re-implement this method, but just
121 121 need implementation for the methods that will be dispatched.
122 122
123 123 Parameters
124 124 ----------
125 125 cell_separator : string
126 126 Character or string to join cells with. Default is "\n"
127 127
128 128 Returns
129 129 -------
130 130 out : string
131 131 """
132 132 lines = []
133 133 lines.extend(self.optional_header())
134 134 lines.extend(self.main_body(cell_separator))
135 135 lines.extend(self.optional_footer())
136 136 return u'\n'.join(lines)
137 137
138 138 def main_body(self, cell_separator='\n'):
139 139 converted_cells = []
140 140 for worksheet in self.nb.worksheets:
141 141 for cell in worksheet.cells:
142 142 #print(cell.cell_type) # dbg
143 143 conv_fn = self.dispatch(cell.cell_type)
144 144 if cell.cell_type in ('markdown', 'raw'):
145 145 remove_fake_files_url(cell)
146 146 converted_cells.append('\n'.join(conv_fn(cell)))
147 147 cell_lines = cell_separator.join(converted_cells).split('\n')
148 148 return cell_lines
149 149
150 150 def render(self):
151 151 "read, convert, and save self.infile"
152 152 if not hasattr(self, 'nb'):
153 153 self.read()
154 154 self.output = self.convert()
155 155 assert(type(self.output) == unicode)
156 156 return self.save()
157 157
158 158 def read(self):
159 159 "read and parse notebook into NotebookNode called self.nb"
160 160 with open(self.infile) as f:
161 161 self.nb = nbformat.read(f, 'json')
162 162
163 163 def save(self, outfile=None, encoding=None):
164 164 "read and parse notebook into self.nb"
165 165 if outfile is None:
166 166 outfile = self.outbase + '.' + self.extension
167 167 if encoding is None:
168 168 encoding = self.default_encoding
169 169 with io.open(outfile, 'w', encoding=encoding) as f:
170 170 f.write(self.output)
171 171 return os.path.abspath(outfile)
172 172
173 173 def optional_header(self):
174 174 """
175 175 Optional header to insert at the top of the converted notebook
176 176
177 177 Returns a list
178 178 """
179 179 return []
180 180
181 181 def optional_footer(self):
182 182 """
183 183 Optional footer to insert at the end of the converted notebook
184 184
185 185 Returns a list
186 186 """
187 187 return []
188 188
189 189 def _new_figure(self, data, fmt):
190 190 """Create a new figure file in the given format.
191 191
192 192 Returns a path relative to the input file.
193 193 """
194 194 figname = '%s_fig_%02i.%s' % (self.clean_name,
195 195 self.figures_counter, fmt)
196 196 self.figures_counter += 1
197 197 fullname = os.path.join(self.files_dir, figname)
198 198
199 199 # Binary files are base64-encoded, SVG is already XML
200 200 if fmt in ('png', 'jpg', 'pdf'):
201 201 data = data.decode('base64')
202 202 fopen = lambda fname: open(fname, 'wb')
203 203 else:
204 204 fopen = lambda fname: codecs.open(fname, 'wb', self.default_encoding)
205 205
206 206 with fopen(fullname) as f:
207 207 f.write(data)
208 208
209 209 return fullname
210 210
211 211 def render_heading(self, cell):
212 212 """convert a heading cell
213 213
214 214 Returns list."""
215 215 raise NotImplementedError
216 216
217 217 def render_code(self, cell):
218 218 """Convert a code cell
219 219
220 220 Returns list."""
221 221 raise NotImplementedError
222 222
223 223 def render_markdown(self, cell):
224 224 """convert a markdown cell
225 225
226 226 Returns list."""
227 227 raise NotImplementedError
228 228
229 229 def _img_lines(self, img_file):
230 230 """Return list of lines to include an image file."""
231 231 # Note: subclasses may choose to implement format-specific _FMT_lines
232 232 # methods if they so choose (FMT in {png, svg, jpg, pdf}).
233 233 raise NotImplementedError
234 234
235 235 def render_display_data(self, output):
236 236 """convert display data from the output of a code cell
237 237
238 238 Returns list.
239 239 """
240 # Choose preferred format if available
241 preferred = [xx for xx in self.display_data_priority if xx in output]
242 if preferred:
243 fmt = preferred[0]
240 for fmt in self.display_data_priority:
241 if fmt in output:
242 break
244 243 else:
245 # Choose a format randomly if preference can't be satisfied
246 available = [k for k in output.keys() if k != 'output_type']
247 if available:
248 fmt = available[0]
244 for fmt in output:
245 if fmt != 'output_type':
246 break
249 247 else:
250 248 raise RuntimeError('no display data')
251 249
252 250 # Is it an image?
253 251 if fmt in ['png', 'svg', 'jpg', 'pdf']:
254 252 img_file = self._new_figure(output[fmt], fmt)
255 253 # Subclasses can have format-specific render functions (e.g.,
256 254 # latex has to auto-convert all SVG to PDF first).
257 255 lines_fun = getattr(self, '_%s_lines' % fmt, None)
258 256 if not lines_fun:
259 257 lines_fun = self._img_lines
260 258 lines = lines_fun(img_file)
261 259 else:
262 260 lines_fun = self.dispatch_display_format(fmt)
263 261 lines = lines_fun(output)
264 262
265 263 return lines
266 264
267 265 def render_raw(self, cell):
268 266 """convert a cell with raw text
269 267
270 268 Returns list."""
271 269 raise NotImplementedError
272 270
273 271 def render_unknown(self, cell):
274 272 """Render cells of unkown type
275 273
276 274 Returns list."""
277 275 data = pprint.pformat(cell)
278 276 logging.warning('Unknown cell: %s' % cell.cell_type)
279 277 return self._unknown_lines(data)
280 278
281 279 def render_unknown_display(self, output, type):
282 280 """Render cells of unkown type
283 281
284 282 Returns list."""
285 283 data = pprint.pformat(output)
286 284 logging.warning('Unknown output: %s' % output.output_type)
287 285 return self._unknown_lines(data)
288 286
289 287 def render_stream(self, output):
290 288 """render the stream part of an output
291 289
292 290 Returns list.
293 291
294 292 Identical to render_display_format_text
295 293 """
296 294 return self.render_display_format_text(output)
297 295
298 296 def render_pyout(self, output):
299 297 """convert pyout part of a code cell
300 298
301 299 Returns list."""
302 300 raise NotImplementedError
303 301
304 302
305 303 def render_pyerr(self, output):
306 304 """convert pyerr part of a code cell
307 305
308 306 Returns list."""
309 307 raise NotImplementedError
310 308
311 309 def _unknown_lines(self, data):
312 310 """Return list of lines for an unknown cell.
313 311
314 312 Parameters
315 313 ----------
316 314 data : str
317 315 The content of the unknown data as a single string.
318 316 """
319 317 raise NotImplementedError
320 318
321 319 # These are the possible format types in an output node
322 320
323 321 def render_display_format_text(self, output):
324 322 """render the text part of an output
325 323
326 324 Returns list.
327 325 """
328 326 raise NotImplementedError
329 327
330 328 def render_display_format_html(self, output):
331 329 """render the html part of an output
332 330
333 331 Returns list.
334 332 """
335 333 raise NotImplementedError
336 334
337 335 def render_display_format_latex(self, output):
338 336 """render the latex part of an output
339 337
340 338 Returns list.
341 339 """
342 340 raise NotImplementedError
343 341
344 342 def render_display_format_json(self, output):
345 343 """render the json part of an output
346 344
347 345 Returns list.
348 346 """
349 347 raise NotImplementedError
350 348
351 349 def render_display_format_javascript(self, output):
352 350 """render the javascript part of an output
353 351
354 352 Returns list.
355 353 """
356 354 raise NotImplementedError
357 355
General Comments 0
You need to be logged in to leave comments. Login now