##// END OF EJS Templates
missing comma
Matthias BUSSONNIER -
Show More
@@ -1,432 +1,432 b''
1 1 """Base classes for the notebook conversion pipeline.
2 2
3 3 This module defines Converter, from which all objects designed to implement
4 4 a conversion of IPython notebooks to some other format should inherit.
5 5 """
6 6 #-----------------------------------------------------------------------------
7 7 # Copyright (c) 2012, the IPython Development Team.
8 8 #
9 9 # Distributed under the terms of the Modified BSD License.
10 10 #
11 11 # The full license is in the file COPYING.txt, distributed with this software.
12 12 #-----------------------------------------------------------------------------
13 13
14 14 #-----------------------------------------------------------------------------
15 15 # Imports
16 16 #-----------------------------------------------------------------------------
17 17
18 18 from __future__ import print_function, absolute_import
19 19
20 20 # Stdlib imports
21 21 import codecs
22 22 import io
23 23 import logging
24 24 import os
25 25 import pprint
26 26 import re
27 27 from types import FunctionType
28 28
29 29 # IPython imports
30 30 from IPython.nbformat import current as nbformat
31 31 from IPython.config.configurable import Configurable, SingletonConfigurable
32 32 from IPython.utils.traitlets import (List, Unicode, Type, Bool, Dict, CaselessStrEnum,
33 33 Any)
34 34
35 35 # Our own imports
36 36 from .utils import remove_fake_files_url
37 37
38 38
39 39 #-----------------------------------------------------------------------------
40 40 # Local utilities
41 41 #-----------------------------------------------------------------------------
42 42
43 43 def clean_filename(filename):
44 44 """
45 45 Remove non-alphanumeric characters from filenames.
46 46
47 47 Parameters
48 48 ----------
49 49 filename : str
50 50 The filename to be sanitized.
51 51
52 52 Returns
53 53 -------
54 54 clean : str
55 55 A sanitized filename that contains only alphanumeric
56 56 characters and underscores.
57 57 """
58 58 filename = re.sub(r'[^a-zA-Z0-9_]', '_', filename)
59 59 return filename
60 60
61 61
62 62 #-----------------------------------------------------------------------------
63 63 # Class declarations
64 64 #-----------------------------------------------------------------------------
65 65
66 66 class ConversionException(Exception):
67 67 pass
68 68
69 69
70 70 class DocStringInheritor(type):
71 71 """
72 72 This metaclass will walk the list of bases until the desired
73 73 superclass method is found AND if that method has a docstring and only
74 74 THEN does it attach the superdocstring to the derived class method.
75 75
76 76 Please use carefully, I just did the metaclass thing by following
77 77 Michael Foord's Metaclass tutorial
78 78 (http://www.voidspace.org.uk/python/articles/metaclasses.shtml), I may
79 79 have missed a step or two.
80 80
81 81 source:
82 82 http://groups.google.com/group/comp.lang.python/msg/26f7b4fcb4d66c95
83 83 by Paul McGuire
84 84 """
85 85 def __new__(meta, classname, bases, classDict):
86 86 newClassDict = {}
87 87 for attributeName, attribute in classDict.items():
88 88 if type(attribute) == FunctionType:
89 89 # look through bases for matching function by name
90 90 for baseclass in bases:
91 91 if hasattr(baseclass, attributeName):
92 92 basefn = getattr(baseclass, attributeName)
93 93 if basefn.__doc__:
94 94 attribute.__doc__ = basefn.__doc__
95 95 break
96 96 newClassDict[attributeName] = attribute
97 97 return type.__new__(meta, classname, bases, newClassDict)
98 98
99 99
100 100 class Converter(Configurable):
101 101 #__metaclass__ = DocStringInheritor
102 102 #-------------------------------------------------------------------------
103 103 # Class-level attributes determining the behaviour of the class but
104 104 # probably not varying from instance to instance.
105 105 #-------------------------------------------------------------------------
106 106 default_encoding = 'utf-8'
107 107 extension = str()
108 108 blank_symbol = " "
109 109 # Which display data format is best? Subclasses can override if
110 110 # they have specific requirements.
111 111 display_data_priority = ['pdf', 'svg', 'png', 'jpg', 'text']
112 112 #-------------------------------------------------------------------------
113 113 # Instance-level attributes that are set in the constructor for this
114 114 # class.
115 115 #-------------------------------------------------------------------------
116 116 infile = Any()
117 117
118 118 highlight_source = Bool(True,
119 119 config=True,
120 120 help="Enable syntax highlighting for code blocks.")
121 121
122 122 preamble = Unicode( "" ,
123 123 config=True,
124 124 help="Path to a user-specified preamble file")
125 125
126 126 extract_figures = Bool( True,
127 127 config=True,
128 128 help="""extract base-64 encoded figures of the notebook into separate files,
129 129 replace by link to corresponding file in source.""")
130 130
131 131 infile_dir = Unicode()
132 132 infile_root = Unicode()
133 133 clean_name = Unicode()
134 134 files_dir = Unicode()
135 135 outbase = Unicode()
136 136 #-------------------------------------------------------------------------
137 137 # Instance-level attributes that are set by other methods in the base
138 138 # class.
139 139 #-------------------------------------------------------------------------
140 140 figures_counter = 0
141 141 output = Unicode()
142 142 #-------------------------------------------------------------------------
143 143 # Instance-level attributes that are not actually mentioned further
144 144 # in this class. TODO: Could they be usefully moved to a subclass?
145 145 #-------------------------------------------------------------------------
146 146 with_preamble = Bool(True,config=True)
147 147 user_preamble = None
148 148 raw_as_verbatim = False
149 149
150 150
151 def __init__(self, infile=None, config=None, exclude=[] **kw):
151 def __init__(self, infile=None, config=None, exclude=[], **kw):
152 152 super(Converter,self).__init__(config=config)
153 153
154 154 #DocStringInheritor.__init__(self=config)
155 155 # N.B. Initialized in the same order as defined above. Please try to
156 156 # keep in this way for readability's sake.
157 157 self.exclude_cells = exclude
158 158 self.infile = infile
159 159 if infile:
160 160 self.infile = infile
161 161 self.infile_dir, infile_root = os.path.split(infile)
162 162 self.infile_root = os.path.splitext(infile_root)[0]
163 163 self.clean_name = clean_filename(self.infile_root)
164 164 # Handle the creation of a directory for ancillary files, for
165 165 # formats that need one.
166 166 files_dir = os.path.join(self.infile_dir, self.clean_name + '_files')
167 167 if not os.path.isdir(files_dir):
168 168 os.mkdir(files_dir)
169 169 self.files_dir = files_dir
170 170 self.outbase = os.path.join(self.infile_dir, self.infile_root)
171 171
172 172 def __del__(self):
173 173 if os.path.isdir(self.files_dir) and not os.listdir(self.files_dir):
174 174 os.rmdir(self.files_dir)
175 175
176 176 def _get_prompt_number(self, cell):
177 177 return cell.prompt_number if hasattr(cell, 'prompt_number') \
178 178 else self.blank_symbol
179 179
180 180 def dispatch(self, cell_type):
181 181 """return cell_type dependent render method, for example render_code
182 182 """
183 183 return getattr(self, 'render_' + cell_type, self.render_unknown)
184 184
185 185 def dispatch_display_format(self, format):
186 186 """
187 187 return output_type dependent render method, for example
188 188 render_output_text
189 189 """
190 190 return getattr(self, 'render_display_format_' + format,
191 191 self.render_unknown_display)
192 192
193 193 def convert(self, cell_separator='\n'):
194 194 """
195 195 Generic method to converts notebook to a string representation.
196 196
197 197 This is accomplished by dispatching on the cell_type, so subclasses of
198 198 Convereter class do not need to re-implement this method, but just
199 199 need implementation for the methods that will be dispatched.
200 200
201 201 Parameters
202 202 ----------
203 203 cell_separator : string
204 204 Character or string to join cells with. Default is "\n"
205 205
206 206 Returns
207 207 -------
208 208 out : string
209 209 """
210 210 lines = []
211 211 lines.extend(self.optional_header())
212 212 lines.extend(self.main_body(cell_separator))
213 213 lines.extend(self.optional_footer())
214 214 return u'\n'.join(lines)
215 215
216 216 def main_body(self, cell_separator='\n'):
217 217 converted_cells = []
218 218 for worksheet in self.nb.worksheets:
219 219 for cell in worksheet.cells:
220 220 #print(cell.cell_type) # dbg
221 221 conv_fn = self.dispatch(cell.cell_type)
222 222 if cell.cell_type in ('markdown', 'raw'):
223 223 remove_fake_files_url(cell)
224 224 converted_cells.append('\n'.join(conv_fn(cell)))
225 225 cell_lines = cell_separator.join(converted_cells).split('\n')
226 226 return cell_lines
227 227
228 228 def render(self):
229 229 "read, convert, and save self.infile"
230 230 if not hasattr(self, 'nb'):
231 231 self.read()
232 232 self.output = self.convert()
233 233 assert(type(self.output) == unicode)
234 234 return self.save()
235 235
236 236 def read(self):
237 237 "read and parse notebook into NotebookNode called self.nb"
238 238 with open(self.infile) as f:
239 239 self.nb = nbformat.read(f, 'json')
240 240
241 241 def save(self, outfile=None, encoding=None):
242 242 "read and parse notebook into self.nb"
243 243 if outfile is None:
244 244 outfile = self.outbase + '.' + self.extension
245 245 if encoding is None:
246 246 encoding = self.default_encoding
247 247 with io.open(outfile, 'w', encoding=encoding) as f:
248 248 f.write(self.output)
249 249 return os.path.abspath(outfile)
250 250
251 251 def optional_header(self):
252 252 """
253 253 Optional header to insert at the top of the converted notebook
254 254
255 255 Returns a list
256 256 """
257 257 return []
258 258
259 259 def optional_footer(self):
260 260 """
261 261 Optional footer to insert at the end of the converted notebook
262 262
263 263 Returns a list
264 264 """
265 265 return []
266 266
267 267 def _new_figure(self, data, fmt):
268 268 """Create a new figure file in the given format.
269 269
270 270 Returns a path relative to the input file.
271 271 """
272 272 figname = '%s_fig_%02i.%s' % (self.clean_name,
273 273 self.figures_counter, fmt)
274 274 self.figures_counter += 1
275 275 fullname = os.path.join(self.files_dir, figname)
276 276
277 277 # Binary files are base64-encoded, SVG is already XML
278 278 if fmt in ('png', 'jpg', 'pdf'):
279 279 data = data.decode('base64')
280 280 fopen = lambda fname: open(fname, 'wb')
281 281 else:
282 282 fopen = lambda fname: codecs.open(fname, 'wb',
283 283 self.default_encoding)
284 284
285 285 with fopen(fullname) as f:
286 286 f.write(data)
287 287
288 288 return fullname
289 289
290 290 def render_heading(self, cell):
291 291 """convert a heading cell
292 292
293 293 Returns list."""
294 294 raise NotImplementedError
295 295
296 296 def render_code(self, cell):
297 297 """Convert a code cell
298 298
299 299 Returns list."""
300 300 raise NotImplementedError
301 301
302 302 def render_markdown(self, cell):
303 303 """convert a markdown cell
304 304
305 305 Returns list."""
306 306 raise NotImplementedError
307 307
308 308 def _img_lines(self, img_file):
309 309 """Return list of lines to include an image file."""
310 310 # Note: subclasses may choose to implement format-specific _FMT_lines
311 311 # methods if they so choose (FMT in {png, svg, jpg, pdf}).
312 312 raise NotImplementedError
313 313
314 314 def render_display_data(self, output):
315 315 """convert display data from the output of a code cell
316 316
317 317 Returns list.
318 318 """
319 319 for fmt in self.display_data_priority:
320 320 if fmt in output:
321 321 break
322 322 else:
323 323 for fmt in output:
324 324 if fmt != 'output_type':
325 325 break
326 326 else:
327 327 raise RuntimeError('no display data')
328 328
329 329 # Is it an image?
330 330 if fmt in ['png', 'svg', 'jpg', 'pdf'] and self.extract_figures:
331 331 img_file = self._new_figure(output[fmt], fmt)
332 332 # Subclasses can have format-specific render functions (e.g.,
333 333 # latex has to auto-convert all SVG to PDF first).
334 334 lines_fun = getattr(self, '_%s_lines' % fmt, None)
335 335 if not lines_fun:
336 336 lines_fun = self._img_lines
337 337 lines = lines_fun(img_file)
338 338 else:
339 339 lines_fun = self.dispatch_display_format(fmt)
340 340 lines = lines_fun(output)
341 341
342 342 return lines
343 343
344 344 def render_raw(self, cell):
345 345 """convert a cell with raw text
346 346
347 347 Returns list."""
348 348 raise NotImplementedError
349 349
350 350 def render_unknown(self, cell):
351 351 """Render cells of unkown type
352 352
353 353 Returns list."""
354 354 data = pprint.pformat(cell)
355 355 logging.warning('Unknown cell: %s' % cell.cell_type)
356 356 return self._unknown_lines(data)
357 357
358 358 def render_unknown_display(self, output, type):
359 359 """Render cells of unkown type
360 360
361 361 Returns list."""
362 362 data = pprint.pformat(output)
363 363 logging.warning('Unknown output: %s' % output.output_type)
364 364 return self._unknown_lines(data)
365 365
366 366 def render_stream(self, output):
367 367 """render the stream part of an output
368 368
369 369 Returns list.
370 370
371 371 Identical to render_display_format_text
372 372 """
373 373 return self.render_display_format_text(output)
374 374
375 375 def render_pyout(self, output):
376 376 """convert pyout part of a code cell
377 377
378 378 Returns list."""
379 379 raise NotImplementedError
380 380
381 381 def render_pyerr(self, output):
382 382 """convert pyerr part of a code cell
383 383
384 384 Returns list."""
385 385 raise NotImplementedError
386 386
387 387 def _unknown_lines(self, data):
388 388 """Return list of lines for an unknown cell.
389 389
390 390 Parameters
391 391 ----------
392 392 data : str
393 393 The content of the unknown data as a single string.
394 394 """
395 395 raise NotImplementedError
396 396
397 397 # These are the possible format types in an output node
398 398
399 399 def render_display_format_text(self, output):
400 400 """render the text part of an output
401 401
402 402 Returns list.
403 403 """
404 404 raise NotImplementedError
405 405
406 406 def render_display_format_html(self, output):
407 407 """render the html part of an output
408 408
409 409 Returns list.
410 410 """
411 411 raise NotImplementedError
412 412
413 413 def render_display_format_latex(self, output):
414 414 """render the latex part of an output
415 415
416 416 Returns list.
417 417 """
418 418 raise NotImplementedError
419 419
420 420 def render_display_format_json(self, output):
421 421 """render the json part of an output
422 422
423 423 Returns list.
424 424 """
425 425 raise NotImplementedError
426 426
427 427 def render_display_format_javascript(self, output):
428 428 """render the javascript part of an output
429 429
430 430 Returns list.
431 431 """
432 432 raise NotImplementedError
General Comments 0
You need to be logged in to leave comments. Login now