##// END OF EJS Templates
nbconvert is now configurable app
Matthias BUSSONNIER -
Show More
@@ -1,411 +1,411 b''
1 1 """Base classes for the notebook conversion pipeline.
2 2
3 3 This module defines Converter, from which all objects designed to implement
4 4 a conversion of IPython notebooks to some other format should inherit.
5 5 """
6 6 #-----------------------------------------------------------------------------
7 7 # Copyright (c) 2012, the IPython Development Team.
8 8 #
9 9 # Distributed under the terms of the Modified BSD License.
10 10 #
11 11 # The full license is in the file COPYING.txt, distributed with this software.
12 12 #-----------------------------------------------------------------------------
13 13
14 14 #-----------------------------------------------------------------------------
15 15 # Imports
16 16 #-----------------------------------------------------------------------------
17 17
18 18 from __future__ import print_function, absolute_import
19 19
20 20 # Stdlib imports
21 21 import codecs
22 22 import io
23 23 import logging
24 24 import os
25 25 import pprint
26 26 import re
27 27 from types import FunctionType
28 28
29 29 # IPython imports
30 30 from IPython.nbformat import current as nbformat
31 31
32 32 # Our own imports
33 33 from .utils import remove_fake_files_url
34 34
35 35
36 36 #-----------------------------------------------------------------------------
37 37 # Local utilities
38 38 #-----------------------------------------------------------------------------
39 39
40 40 def clean_filename(filename):
41 41 """
42 42 Remove non-alphanumeric characters from filenames.
43 43
44 44 Parameters
45 45 ----------
46 46 filename : str
47 47 The filename to be sanitized.
48 48
49 49 Returns
50 50 -------
51 51 clean : str
52 52 A sanitized filename that contains only alphanumeric
53 53 characters and underscores.
54 54 """
55 55 filename = re.sub(r'[^a-zA-Z0-9_]', '_', filename)
56 56 return filename
57 57
58 58
59 59 #-----------------------------------------------------------------------------
60 60 # Class declarations
61 61 #-----------------------------------------------------------------------------
62 62
63 63 class ConversionException(Exception):
64 64 pass
65 65
66 66
67 67 class DocStringInheritor(type):
68 68 """
69 69 This metaclass will walk the list of bases until the desired
70 70 superclass method is found AND if that method has a docstring and only
71 71 THEN does it attach the superdocstring to the derived class method.
72 72
73 73 Please use carefully, I just did the metaclass thing by following
74 74 Michael Foord's Metaclass tutorial
75 75 (http://www.voidspace.org.uk/python/articles/metaclasses.shtml), I may
76 76 have missed a step or two.
77 77
78 78 source:
79 79 http://groups.google.com/group/comp.lang.python/msg/26f7b4fcb4d66c95
80 80 by Paul McGuire
81 81 """
82 82 def __new__(meta, classname, bases, classDict):
83 83 newClassDict = {}
84 84 for attributeName, attribute in classDict.items():
85 85 if type(attribute) == FunctionType:
86 86 # look through bases for matching function by name
87 87 for baseclass in bases:
88 88 if hasattr(baseclass, attributeName):
89 89 basefn = getattr(baseclass, attributeName)
90 90 if basefn.__doc__:
91 91 attribute.__doc__ = basefn.__doc__
92 92 break
93 93 newClassDict[attributeName] = attribute
94 94 return type.__new__(meta, classname, bases, newClassDict)
95 95
96 96
97 97 class Converter(object):
98 98 __metaclass__ = DocStringInheritor
99 99 #-------------------------------------------------------------------------
100 100 # Class-level attributes determining the behaviour of the class but
101 101 # probably not varying from instance to instance.
102 102 #-------------------------------------------------------------------------
103 103 default_encoding = 'utf-8'
104 104 extension = str()
105 105 blank_symbol = " "
106 106 # Which display data format is best? Subclasses can override if
107 107 # they have specific requirements.
108 108 display_data_priority = ['pdf', 'svg', 'png', 'jpg', 'text']
109 109 #-------------------------------------------------------------------------
110 110 # Instance-level attributes that are set in the constructor for this
111 111 # class.
112 112 #-------------------------------------------------------------------------
113 113 infile = str()
114 114 highlight_source = True
115 115 infile_dir = str()
116 116 infile_root = str()
117 117 clean_name = str()
118 118 files_dir = str()
119 119 outbase = str()
120 120 #-------------------------------------------------------------------------
121 121 # Instance-level attributes that are set by other methods in the base
122 122 # class.
123 123 #-------------------------------------------------------------------------
124 124 figures_counter = 0
125 125 output = unicode()
126 126 #-------------------------------------------------------------------------
127 127 # Instance-level attributes that are not actually mentioned further
128 128 # in this class. TODO: Could they be usefully moved to a subclass?
129 129 #-------------------------------------------------------------------------
130 130 with_preamble = True
131 131 user_preamble = None
132 132 raw_as_verbatim = False
133 133
134 def __init__(self, infile, highlight_source=True, exclude=[]):
134 def __init__(self, infile, highlight_source=True, exclude=[], **kw):
135 135 # N.B. Initialized in the same order as defined above. Please try to
136 136 # keep in this way for readability's sake.
137 137 self.exclude_cells = exclude
138 138 self.infile = infile
139 139 self.highlight_source = highlight_source
140 140 self.infile_dir, infile_root = os.path.split(infile)
141 141 self.infile_root = os.path.splitext(infile_root)[0]
142 142 self.clean_name = clean_filename(self.infile_root)
143 143 # Handle the creation of a directory for ancillary files, for
144 144 # formats that need one.
145 145 files_dir = os.path.join(self.infile_dir, self.clean_name + '_files')
146 146 if not os.path.isdir(files_dir):
147 147 os.mkdir(files_dir)
148 148 self.files_dir = files_dir
149 149 self.outbase = os.path.join(self.infile_dir, self.infile_root)
150 150
151 151 def __del__(self):
152 152 if os.path.isdir(self.files_dir) and not os.listdir(self.files_dir):
153 153 os.rmdir(self.files_dir)
154 154
155 155 def _get_prompt_number(self, cell):
156 156 return cell.prompt_number if hasattr(cell, 'prompt_number') \
157 157 else self.blank_symbol
158 158
159 159 def dispatch(self, cell_type):
160 160 """return cell_type dependent render method, for example render_code
161 161 """
162 162 return getattr(self, 'render_' + cell_type, self.render_unknown)
163 163
164 164 def dispatch_display_format(self, format):
165 165 """
166 166 return output_type dependent render method, for example
167 167 render_output_text
168 168 """
169 169 return getattr(self, 'render_display_format_' + format,
170 170 self.render_unknown_display)
171 171
172 172 def convert(self, cell_separator='\n'):
173 173 """
174 174 Generic method to converts notebook to a string representation.
175 175
176 176 This is accomplished by dispatching on the cell_type, so subclasses of
177 177 Convereter class do not need to re-implement this method, but just
178 178 need implementation for the methods that will be dispatched.
179 179
180 180 Parameters
181 181 ----------
182 182 cell_separator : string
183 183 Character or string to join cells with. Default is "\n"
184 184
185 185 Returns
186 186 -------
187 187 out : string
188 188 """
189 189 lines = []
190 190 lines.extend(self.optional_header())
191 191 lines.extend(self.main_body(cell_separator))
192 192 lines.extend(self.optional_footer())
193 193 return u'\n'.join(lines)
194 194
195 195 def main_body(self, cell_separator='\n'):
196 196 converted_cells = []
197 197 for worksheet in self.nb.worksheets:
198 198 for cell in worksheet.cells:
199 199 #print(cell.cell_type) # dbg
200 200 conv_fn = self.dispatch(cell.cell_type)
201 201 if cell.cell_type in ('markdown', 'raw'):
202 202 remove_fake_files_url(cell)
203 203 converted_cells.append('\n'.join(conv_fn(cell)))
204 204 cell_lines = cell_separator.join(converted_cells).split('\n')
205 205 return cell_lines
206 206
207 207 def render(self):
208 208 "read, convert, and save self.infile"
209 209 if not hasattr(self, 'nb'):
210 210 self.read()
211 211 self.output = self.convert()
212 212 assert(type(self.output) == unicode)
213 213 return self.save()
214 214
215 215 def read(self):
216 216 "read and parse notebook into NotebookNode called self.nb"
217 217 with open(self.infile) as f:
218 218 self.nb = nbformat.read(f, 'json')
219 219
220 220 def save(self, outfile=None, encoding=None):
221 221 "read and parse notebook into self.nb"
222 222 if outfile is None:
223 223 outfile = self.outbase + '.' + self.extension
224 224 if encoding is None:
225 225 encoding = self.default_encoding
226 226 with io.open(outfile, 'w', encoding=encoding) as f:
227 227 f.write(self.output)
228 228 return os.path.abspath(outfile)
229 229
230 230 def optional_header(self):
231 231 """
232 232 Optional header to insert at the top of the converted notebook
233 233
234 234 Returns a list
235 235 """
236 236 return []
237 237
238 238 def optional_footer(self):
239 239 """
240 240 Optional footer to insert at the end of the converted notebook
241 241
242 242 Returns a list
243 243 """
244 244 return []
245 245
246 246 def _new_figure(self, data, fmt):
247 247 """Create a new figure file in the given format.
248 248
249 249 Returns a path relative to the input file.
250 250 """
251 251 figname = '%s_fig_%02i.%s' % (self.clean_name,
252 252 self.figures_counter, fmt)
253 253 self.figures_counter += 1
254 254 fullname = os.path.join(self.files_dir, figname)
255 255
256 256 # Binary files are base64-encoded, SVG is already XML
257 257 if fmt in ('png', 'jpg', 'pdf'):
258 258 data = data.decode('base64')
259 259 fopen = lambda fname: open(fname, 'wb')
260 260 else:
261 261 fopen = lambda fname: codecs.open(fname, 'wb',
262 262 self.default_encoding)
263 263
264 264 with fopen(fullname) as f:
265 265 f.write(data)
266 266
267 267 return fullname
268 268
269 269 def render_heading(self, cell):
270 270 """convert a heading cell
271 271
272 272 Returns list."""
273 273 raise NotImplementedError
274 274
275 275 def render_code(self, cell):
276 276 """Convert a code cell
277 277
278 278 Returns list."""
279 279 raise NotImplementedError
280 280
281 281 def render_markdown(self, cell):
282 282 """convert a markdown cell
283 283
284 284 Returns list."""
285 285 raise NotImplementedError
286 286
287 287 def _img_lines(self, img_file):
288 288 """Return list of lines to include an image file."""
289 289 # Note: subclasses may choose to implement format-specific _FMT_lines
290 290 # methods if they so choose (FMT in {png, svg, jpg, pdf}).
291 291 raise NotImplementedError
292 292
293 293 def render_display_data(self, output):
294 294 """convert display data from the output of a code cell
295 295
296 296 Returns list.
297 297 """
298 298 for fmt in self.display_data_priority:
299 299 if fmt in output:
300 300 break
301 301 else:
302 302 for fmt in output:
303 303 if fmt != 'output_type':
304 304 break
305 305 else:
306 306 raise RuntimeError('no display data')
307 307
308 308 # Is it an image?
309 309 if fmt in ['png', 'svg', 'jpg', 'pdf']:
310 310 img_file = self._new_figure(output[fmt], fmt)
311 311 # Subclasses can have format-specific render functions (e.g.,
312 312 # latex has to auto-convert all SVG to PDF first).
313 313 lines_fun = getattr(self, '_%s_lines' % fmt, None)
314 314 if not lines_fun:
315 315 lines_fun = self._img_lines
316 316 lines = lines_fun(img_file)
317 317 else:
318 318 lines_fun = self.dispatch_display_format(fmt)
319 319 lines = lines_fun(output)
320 320
321 321 return lines
322 322
323 323 def render_raw(self, cell):
324 324 """convert a cell with raw text
325 325
326 326 Returns list."""
327 327 raise NotImplementedError
328 328
329 329 def render_unknown(self, cell):
330 330 """Render cells of unkown type
331 331
332 332 Returns list."""
333 333 data = pprint.pformat(cell)
334 334 logging.warning('Unknown cell: %s' % cell.cell_type)
335 335 return self._unknown_lines(data)
336 336
337 337 def render_unknown_display(self, output, type):
338 338 """Render cells of unkown type
339 339
340 340 Returns list."""
341 341 data = pprint.pformat(output)
342 342 logging.warning('Unknown output: %s' % output.output_type)
343 343 return self._unknown_lines(data)
344 344
345 345 def render_stream(self, output):
346 346 """render the stream part of an output
347 347
348 348 Returns list.
349 349
350 350 Identical to render_display_format_text
351 351 """
352 352 return self.render_display_format_text(output)
353 353
354 354 def render_pyout(self, output):
355 355 """convert pyout part of a code cell
356 356
357 357 Returns list."""
358 358 raise NotImplementedError
359 359
360 360 def render_pyerr(self, output):
361 361 """convert pyerr part of a code cell
362 362
363 363 Returns list."""
364 364 raise NotImplementedError
365 365
366 366 def _unknown_lines(self, data):
367 367 """Return list of lines for an unknown cell.
368 368
369 369 Parameters
370 370 ----------
371 371 data : str
372 372 The content of the unknown data as a single string.
373 373 """
374 374 raise NotImplementedError
375 375
376 376 # These are the possible format types in an output node
377 377
378 378 def render_display_format_text(self, output):
379 379 """render the text part of an output
380 380
381 381 Returns list.
382 382 """
383 383 raise NotImplementedError
384 384
385 385 def render_display_format_html(self, output):
386 386 """render the html part of an output
387 387
388 388 Returns list.
389 389 """
390 390 raise NotImplementedError
391 391
392 392 def render_display_format_latex(self, output):
393 393 """render the latex part of an output
394 394
395 395 Returns list.
396 396 """
397 397 raise NotImplementedError
398 398
399 399 def render_display_format_json(self, output):
400 400 """render the json part of an output
401 401
402 402 Returns list.
403 403 """
404 404 raise NotImplementedError
405 405
406 406 def render_display_format_javascript(self, output):
407 407 """render the javascript part of an output
408 408
409 409 Returns list.
410 410 """
411 411 raise NotImplementedError
@@ -1,87 +1,145 b''
1 1 #!/usr/bin/env python
2 2 """Convert IPython notebooks to other formats, such as ReST, and HTML.
3 3
4 4 Example:
5 5 ./nbconvert.py --format rst file.ipynb
6 6
7 7 Produces 'file.rst', along with auto-generated figure files
8 8 called nb_figure_NN.png.
9 9 """
10 10 #-----------------------------------------------------------------------------
11 11 # Imports
12 12 #-----------------------------------------------------------------------------
13 13 from __future__ import print_function
14 14
15 15 # From IPython
16 16 from IPython.external import argparse
17 17
18 # All the stuff needed for the configurable things
19 from IPython.config.application import Application, catch_config_error
20 from IPython.config.configurable import Configurable
21 from IPython.config.loader import Config, ConfigFileNotFound
22 from IPython.utils.traitlets import List, Unicode, Type, Bool, Dict, CaselessStrEnum
23
24
18 25 # local
19 26 from converters.html import ConverterHTML
20 27 from converters.markdown import ConverterMarkdown
21 28 from converters.bloggerhtml import ConverterBloggerHTML
22 29 from converters.rst import ConverterRST
23 30 from converters.latex import ConverterLaTeX
24 31 from converters.python import ConverterPy
25 32
26 33
27 34 # When adding a new format, make sure to add it to the `converters`
28 35 # dictionary below. This is used to create the list of known formats,
29 36 # which gets printed in case an unknown format is encounteres, as well
30 37 # as in the help
31 38
32 39 converters = {
33 40 'rst': ConverterRST,
34 41 'markdown': ConverterMarkdown,
35 42 'html': ConverterHTML,
36 43 'blogger-html': ConverterBloggerHTML,
37 44 'latex': ConverterLaTeX,
38 45 'py': ConverterPy,
39 46 }
40 47
41 48 default_format = 'rst'
42 49
43 50 # Extract the list of known formats and mark the first format as the default.
44 51 known_formats = ', '.join([key + " (default)" if key == default_format else key
45 52 for key in converters])
46 53
54 class NbconvertApp(Application):
47 55
48 def main(infile, format='rst', preamble=None, exclude=[],
49 highlight_source=True):
50 """Convert a notebook to html in one step"""
51 try:
52 ConverterClass = converters[format]
53 except KeyError:
54 raise SystemExit("Unknown format '%s', " % format +
55 "known formats are: " + known_formats)
56 name = Unicode('thisIsNbconvertApp',config=True)
57
58 fmt = CaselessStrEnum(converters.keys(),
59 default_value='rst',
60 config=True,
61 help="Supported conversion format")
62
63 preamble = Unicode("" ,
64 config=True,
65 help="Path to a user-specified preamble file")
66
67 highlight = Bool(True,
68 config=True,
69 help="Enable syntax highlighting for code blocks.")
70
71 exclude = List( [],
72 config=True,
73 help = 'list of cells to exclude while converting')
74
75 infile = Unicode("", config=True)
56 76
57 77 converter = ConverterClass(infile, highlight_source=highlight_source, exclude=exclude)
58 78 converter.render()
59 79
80
81 aliases = {
82 'format':'NbconvertApp.fmt',
83 'highlight':'NbconvertApp.highlight',
84 'preamble':'NbconvertApp.preamble',
85 'infile' : 'NbconvertApp.infile'
86 }
87
88 def __init__(self, **kwargs):
89 super(NbconvertApp, self).__init__(**kwargs)
90
91 def initialize(self, argv=None):
92 # don't hook up crash handler before parsing command-line
93 self.parse_command_line(argv)
94 cl_config = self.config
95 print(self.config)
96 self.update_config(cl_config)
97 #self.init_crash_handler()
98 #self.foo = Cnf(config=self.config)
99 #if self.subapp is not None:
100 # stop here if subapp is taking over
101 #return
102 #cl_config = self.config
103 #self.init_profile_dir()
104 #self.init_config_files()
105 #self.load_config_file()
106 # enforce cl-opts override configfile opts:
107 #self.update_config(cl_config)
108
109
110 def run(self):
111 """Convert a notebook to html in one step"""
112 ConverterClass = converters[self.fmt]
113
114 converter = ConverterClass(self.infile, highlight_source=self.highlight, preamble=self.preamble, exclude=self.exclude)
115 converter.render()
116
117 def main():
118 """Convert a notebook to html in one step"""
119 app = NbconvertApp.instance()
120 print(app.classes)
121 app.initialize()
122 app.start()
123 app.run()
60 124 #-----------------------------------------------------------------------------
61 125 # Script main
62 126 #-----------------------------------------------------------------------------
63 127
64 128 if __name__ == '__main__':
65 parser = argparse.ArgumentParser(description=__doc__,
66 formatter_class=argparse.RawTextHelpFormatter)
129 #parser = argparse.ArgumentParser(description=__doc__,
130 # formatter_class=argparse.RawTextHelpFormatter)
67 131 # TODO: consider passing file like object around, rather than filenames
68 132 # would allow us to process stdin, or even http streams
69 133 #parser.add_argument('infile', nargs='?', type=argparse.FileType('r'),
70 134 # default=sys.stdin)
71 135
72 136 #Require a filename as a positional argument
73 parser.add_argument('infile', nargs=1)
74 parser.add_argument('-f', '--format', default='rst',
75 help='Output format. Supported formats: \n' +
76 known_formats)
77 parser.add_argument('-p', '--preamble',
78 help='Path to a user-specified preamble file')
79 parser.add_argument('-e', '--exclude', default='',
80 help='Comma-separated list of cells to exclude')
81 parser.add_argument('-H', '--no-highlighting', action='store_false',
82 help='Disable syntax highlighting for code blocks.')
83 args = parser.parse_args()
84 exclude_cells = [s.strip() for s in args.exclude.split(',')]
85
86 main(infile=args.infile[0], format=args.format, preamble=args.preamble,
87 exclude=exclude_cells, highlight_source=args.no_highlighting)
137 #parser.add_argument('infile', nargs=1)
138 #parser.add_argument('-e', '--exclude', default='',
139 # help='Comma-separated list of cells to exclude')
140 #parser.add_argument('-H', '--no-highlighting', action='store_false',
141 # help='Disable syntax highlighting for code blocks.')
142 #args = parser.parse_args()
143 #exclude_cells = [s.strip() for s in args.exclude.split(',')]
144
145 main()
1 NO CONTENT: file was removed
This diff has been collapsed as it changes many lines, (520 lines changed) Show them Hide them
1 NO CONTENT: file was removed
1 NO CONTENT: file was removed
This diff has been collapsed as it changes many lines, (701 lines changed) Show them Hide them
1 NO CONTENT: file was removed
This diff has been collapsed as it changes many lines, (850 lines changed) Show them Hide them
1 NO CONTENT: file was removed
This diff has been collapsed as it changes many lines, (1439 lines changed) Show them Hide them
General Comments 0
You need to be logged in to leave comments. Login now