##// END OF EJS Templates
Allow to build a converter without input file
Matthias BUSSONNIER -
Show More
@@ -1,430 +1,433 b''
1 1 """Base classes for the notebook conversion pipeline.
2 2
3 3 This module defines Converter, from which all objects designed to implement
4 4 a conversion of IPython notebooks to some other format should inherit.
5 5 """
6 6 #-----------------------------------------------------------------------------
7 7 # Copyright (c) 2012, the IPython Development Team.
8 8 #
9 9 # Distributed under the terms of the Modified BSD License.
10 10 #
11 11 # The full license is in the file COPYING.txt, distributed with this software.
12 12 #-----------------------------------------------------------------------------
13 13
14 14 #-----------------------------------------------------------------------------
15 15 # Imports
16 16 #-----------------------------------------------------------------------------
17 17
18 18 from __future__ import print_function, absolute_import
19 19
20 20 # Stdlib imports
21 21 import codecs
22 22 import io
23 23 import logging
24 24 import os
25 25 import pprint
26 26 import re
27 27 from types import FunctionType
28 28
29 29 # IPython imports
30 30 from IPython.nbformat import current as nbformat
31 31 from IPython.config.configurable import Configurable, SingletonConfigurable
32 from IPython.utils.traitlets import List, Unicode, Type, Bool, Dict, CaselessStrEnum
32 from IPython.utils.traitlets import (List, Unicode, Type, Bool, Dict, CaselessStrEnum,
33 Any)
33 34
34 35 # Our own imports
35 36 from .utils import remove_fake_files_url
36 37
37 38
38 39 #-----------------------------------------------------------------------------
39 40 # Local utilities
40 41 #-----------------------------------------------------------------------------
41 42
42 43 def clean_filename(filename):
43 44 """
44 45 Remove non-alphanumeric characters from filenames.
45 46
46 47 Parameters
47 48 ----------
48 49 filename : str
49 50 The filename to be sanitized.
50 51
51 52 Returns
52 53 -------
53 54 clean : str
54 55 A sanitized filename that contains only alphanumeric
55 56 characters and underscores.
56 57 """
57 58 filename = re.sub(r'[^a-zA-Z0-9_]', '_', filename)
58 59 return filename
59 60
60 61
61 62 #-----------------------------------------------------------------------------
62 63 # Class declarations
63 64 #-----------------------------------------------------------------------------
64 65
65 66 class ConversionException(Exception):
66 67 pass
67 68
68 69
69 70 class DocStringInheritor(type):
70 71 """
71 72 This metaclass will walk the list of bases until the desired
72 73 superclass method is found AND if that method has a docstring and only
73 74 THEN does it attach the superdocstring to the derived class method.
74 75
75 76 Please use carefully, I just did the metaclass thing by following
76 77 Michael Foord's Metaclass tutorial
77 78 (http://www.voidspace.org.uk/python/articles/metaclasses.shtml), I may
78 79 have missed a step or two.
79 80
80 81 source:
81 82 http://groups.google.com/group/comp.lang.python/msg/26f7b4fcb4d66c95
82 83 by Paul McGuire
83 84 """
84 85 def __new__(meta, classname, bases, classDict):
85 86 newClassDict = {}
86 87 for attributeName, attribute in classDict.items():
87 88 if type(attribute) == FunctionType:
88 89 # look through bases for matching function by name
89 90 for baseclass in bases:
90 91 if hasattr(baseclass, attributeName):
91 92 basefn = getattr(baseclass, attributeName)
92 93 if basefn.__doc__:
93 94 attribute.__doc__ = basefn.__doc__
94 95 break
95 96 newClassDict[attributeName] = attribute
96 97 return type.__new__(meta, classname, bases, newClassDict)
97 98
98 99
99 100 class Converter(Configurable):
100 101 #__metaclass__ = DocStringInheritor
101 102 #-------------------------------------------------------------------------
102 103 # Class-level attributes determining the behaviour of the class but
103 104 # probably not varying from instance to instance.
104 105 #-------------------------------------------------------------------------
105 106 default_encoding = 'utf-8'
106 107 extension = str()
107 108 blank_symbol = " "
108 109 # Which display data format is best? Subclasses can override if
109 110 # they have specific requirements.
110 111 display_data_priority = ['pdf', 'svg', 'png', 'jpg', 'text']
111 112 #-------------------------------------------------------------------------
112 113 # Instance-level attributes that are set in the constructor for this
113 114 # class.
114 115 #-------------------------------------------------------------------------
115 infile = Unicode()
116 infile = Any()
116 117
117 118 highlight_source = Bool(True,
118 119 config=True,
119 120 help="Enable syntax highlighting for code blocks.")
120 121
121 preamble = Unicode("" ,
122 preamble = Unicode( "" ,
122 123 config=True,
123 124 help="Path to a user-specified preamble file")
125
124 126 extract_figures = Bool( True,
125 127 config=True,
126 128 help="""extract base-64 encoded figures of the notebook into separate files,
127 129 replace by link to corresponding file in source.""")
128 130
129 131 infile_dir = Unicode()
130 132 infile_root = Unicode()
131 133 clean_name = Unicode()
132 134 files_dir = Unicode()
133 135 outbase = Unicode()
134 136 #-------------------------------------------------------------------------
135 137 # Instance-level attributes that are set by other methods in the base
136 138 # class.
137 139 #-------------------------------------------------------------------------
138 140 figures_counter = 0
139 141 output = Unicode()
140 142 #-------------------------------------------------------------------------
141 143 # Instance-level attributes that are not actually mentioned further
142 144 # in this class. TODO: Could they be usefully moved to a subclass?
143 145 #-------------------------------------------------------------------------
144 146 with_preamble = Bool(True,config=True)
145 147 user_preamble = None
146 148 raw_as_verbatim = False
147 149
148 150
149 151 def __init__(self, infile=None, config=None, exclude=[] **kw):
150 152 super(Converter,self).__init__(config=config)
151 153
152 154 #DocStringInheritor.__init__(self=config)
153 155 # N.B. Initialized in the same order as defined above. Please try to
154 156 # keep in this way for readability's sake.
155 157 self.exclude_cells = exclude
156 158 self.infile = infile
157 self.infile_dir, infile_root = os.path.split(infile)
158 self.infile_root = os.path.splitext(infile_root)[0]
159 self.clean_name = clean_filename(self.infile_root)
160 # Handle the creation of a directory for ancillary files, for
161 # formats that need one.
162 files_dir = os.path.join(self.infile_dir, self.clean_name + '_files')
163 if not os.path.isdir(files_dir):
164 os.mkdir(files_dir)
165 self.files_dir = files_dir
166 self.outbase = os.path.join(self.infile_dir, self.infile_root)
159 if infile:
160 self.infile_dir, infile_root = os.path.split(infile)
161 self.infile_root = os.path.splitext(infile_root)[0]
162 self.clean_name = clean_filename(self.infile_root)
163 # Handle the creation of a directory for ancillary files, for
164 # formats that need one.
165 files_dir = os.path.join(self.infile_dir, self.clean_name + '_files')
166 if not os.path.isdir(files_dir):
167 os.mkdir(files_dir)
168 self.files_dir = files_dir
169 self.outbase = os.path.join(self.infile_dir, self.infile_root)
167 170
168 171 def __del__(self):
169 172 if os.path.isdir(self.files_dir) and not os.listdir(self.files_dir):
170 173 os.rmdir(self.files_dir)
171 174
172 175 def _get_prompt_number(self, cell):
173 176 return cell.prompt_number if hasattr(cell, 'prompt_number') \
174 177 else self.blank_symbol
175 178
176 179 def dispatch(self, cell_type):
177 180 """return cell_type dependent render method, for example render_code
178 181 """
179 182 return getattr(self, 'render_' + cell_type, self.render_unknown)
180 183
181 184 def dispatch_display_format(self, format):
182 185 """
183 186 return output_type dependent render method, for example
184 187 render_output_text
185 188 """
186 189 return getattr(self, 'render_display_format_' + format,
187 190 self.render_unknown_display)
188 191
189 192 def convert(self, cell_separator='\n'):
190 193 """
191 194 Generic method to converts notebook to a string representation.
192 195
193 196 This is accomplished by dispatching on the cell_type, so subclasses of
194 197 Convereter class do not need to re-implement this method, but just
195 198 need implementation for the methods that will be dispatched.
196 199
197 200 Parameters
198 201 ----------
199 202 cell_separator : string
200 203 Character or string to join cells with. Default is "\n"
201 204
202 205 Returns
203 206 -------
204 207 out : string
205 208 """
206 209 lines = []
207 210 lines.extend(self.optional_header())
208 211 lines.extend(self.main_body(cell_separator))
209 212 lines.extend(self.optional_footer())
210 213 return u'\n'.join(lines)
211 214
212 215 def main_body(self, cell_separator='\n'):
213 216 converted_cells = []
214 217 for worksheet in self.nb.worksheets:
215 218 for cell in worksheet.cells:
216 219 #print(cell.cell_type) # dbg
217 220 conv_fn = self.dispatch(cell.cell_type)
218 221 if cell.cell_type in ('markdown', 'raw'):
219 222 remove_fake_files_url(cell)
220 223 converted_cells.append('\n'.join(conv_fn(cell)))
221 224 cell_lines = cell_separator.join(converted_cells).split('\n')
222 225 return cell_lines
223 226
224 227 def render(self):
225 228 "read, convert, and save self.infile"
226 229 if not hasattr(self, 'nb'):
227 230 self.read()
228 231 self.output = self.convert()
229 232 assert(type(self.output) == unicode)
230 233 return self.save()
231 234
232 235 def read(self):
233 236 "read and parse notebook into NotebookNode called self.nb"
234 237 with open(self.infile) as f:
235 238 self.nb = nbformat.read(f, 'json')
236 239
237 240 def save(self, outfile=None, encoding=None):
238 241 "read and parse notebook into self.nb"
239 242 if outfile is None:
240 243 outfile = self.outbase + '.' + self.extension
241 244 if encoding is None:
242 245 encoding = self.default_encoding
243 246 with io.open(outfile, 'w', encoding=encoding) as f:
244 247 f.write(self.output)
245 248 return os.path.abspath(outfile)
246 249
247 250 def optional_header(self):
248 251 """
249 252 Optional header to insert at the top of the converted notebook
250 253
251 254 Returns a list
252 255 """
253 256 return []
254 257
255 258 def optional_footer(self):
256 259 """
257 260 Optional footer to insert at the end of the converted notebook
258 261
259 262 Returns a list
260 263 """
261 264 return []
262 265
263 266 def _new_figure(self, data, fmt):
264 267 """Create a new figure file in the given format.
265 268
266 269 Returns a path relative to the input file.
267 270 """
268 271 figname = '%s_fig_%02i.%s' % (self.clean_name,
269 272 self.figures_counter, fmt)
270 273 self.figures_counter += 1
271 274 fullname = os.path.join(self.files_dir, figname)
272 275
273 276 # Binary files are base64-encoded, SVG is already XML
274 277 if fmt in ('png', 'jpg', 'pdf'):
275 278 data = data.decode('base64')
276 279 fopen = lambda fname: open(fname, 'wb')
277 280 else:
278 281 fopen = lambda fname: codecs.open(fname, 'wb',
279 282 self.default_encoding)
280 283
281 284 with fopen(fullname) as f:
282 285 f.write(data)
283 286
284 287 return fullname
285 288
286 289 def render_heading(self, cell):
287 290 """convert a heading cell
288 291
289 292 Returns list."""
290 293 raise NotImplementedError
291 294
292 295 def render_code(self, cell):
293 296 """Convert a code cell
294 297
295 298 Returns list."""
296 299 raise NotImplementedError
297 300
298 301 def render_markdown(self, cell):
299 302 """convert a markdown cell
300 303
301 304 Returns list."""
302 305 raise NotImplementedError
303 306
304 307 def _img_lines(self, img_file):
305 308 """Return list of lines to include an image file."""
306 309 # Note: subclasses may choose to implement format-specific _FMT_lines
307 310 # methods if they so choose (FMT in {png, svg, jpg, pdf}).
308 311 raise NotImplementedError
309 312
310 313 def render_display_data(self, output):
311 314 """convert display data from the output of a code cell
312 315
313 316 Returns list.
314 317 """
315 318 for fmt in self.display_data_priority:
316 319 if fmt in output:
317 320 break
318 321 else:
319 322 for fmt in output:
320 323 if fmt != 'output_type':
321 324 break
322 325 else:
323 326 raise RuntimeError('no display data')
324 327
325 328 # Is it an image?
326 329 if fmt in ['png', 'svg', 'jpg', 'pdf'] and self.extract_figures:
327 330 print('I will extract this', fmt)
328 331 img_file = self._new_figure(output[fmt], fmt)
329 332 # Subclasses can have format-specific render functions (e.g.,
330 333 # latex has to auto-convert all SVG to PDF first).
331 334 lines_fun = getattr(self, '_%s_lines' % fmt, None)
332 335 if not lines_fun:
333 336 lines_fun = self._img_lines
334 337 lines = lines_fun(img_file)
335 338 else:
336 339 print('I will NOT extract this', fmt)
337 340 lines_fun = self.dispatch_display_format(fmt)
338 341 lines = lines_fun(output)
339 342
340 343 return lines
341 344
342 345 def render_raw(self, cell):
343 346 """convert a cell with raw text
344 347
345 348 Returns list."""
346 349 raise NotImplementedError
347 350
348 351 def render_unknown(self, cell):
349 352 """Render cells of unkown type
350 353
351 354 Returns list."""
352 355 data = pprint.pformat(cell)
353 356 logging.warning('Unknown cell: %s' % cell.cell_type)
354 357 return self._unknown_lines(data)
355 358
356 359 def render_unknown_display(self, output, type):
357 360 """Render cells of unkown type
358 361
359 362 Returns list."""
360 363 data = pprint.pformat(output)
361 364 logging.warning('Unknown output: %s' % output.output_type)
362 365 return self._unknown_lines(data)
363 366
364 367 def render_stream(self, output):
365 368 """render the stream part of an output
366 369
367 370 Returns list.
368 371
369 372 Identical to render_display_format_text
370 373 """
371 374 return self.render_display_format_text(output)
372 375
373 376 def render_pyout(self, output):
374 377 """convert pyout part of a code cell
375 378
376 379 Returns list."""
377 380 raise NotImplementedError
378 381
379 382 def render_pyerr(self, output):
380 383 """convert pyerr part of a code cell
381 384
382 385 Returns list."""
383 386 raise NotImplementedError
384 387
385 388 def _unknown_lines(self, data):
386 389 """Return list of lines for an unknown cell.
387 390
388 391 Parameters
389 392 ----------
390 393 data : str
391 394 The content of the unknown data as a single string.
392 395 """
393 396 raise NotImplementedError
394 397
395 398 # These are the possible format types in an output node
396 399
397 400 def render_display_format_text(self, output):
398 401 """render the text part of an output
399 402
400 403 Returns list.
401 404 """
402 405 raise NotImplementedError
403 406
404 407 def render_display_format_html(self, output):
405 408 """render the html part of an output
406 409
407 410 Returns list.
408 411 """
409 412 raise NotImplementedError
410 413
411 414 def render_display_format_latex(self, output):
412 415 """render the latex part of an output
413 416
414 417 Returns list.
415 418 """
416 419 raise NotImplementedError
417 420
418 421 def render_display_format_json(self, output):
419 422 """render the json part of an output
420 423
421 424 Returns list.
422 425 """
423 426 raise NotImplementedError
424 427
425 428 def render_display_format_javascript(self, output):
426 429 """render the javascript part of an output
427 430
428 431 Returns list.
429 432 """
430 433 raise NotImplementedError
General Comments 0
You need to be logged in to leave comments. Login now