##// END OF EJS Templates
Write output to current directory, instead of to source directory.
Stefan van der Walt -
Show More
@@ -1,705 +1,706 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 """Convert IPython notebooks to other formats, such as ReST, and HTML.
2 """Convert IPython notebooks to other formats, such as ReST, and HTML.
3
3
4 Example:
4 Example:
5 ./nbconvert.py --format html file.ipynb
5 ./nbconvert.py --format html file.ipynb
6
6
7 Produces 'file.rst' and 'file.html', along with auto-generated figure files
7 Produces 'file.rst' and 'file.html', along with auto-generated figure files
8 called nb_figure_NN.png. To avoid the two-step process, ipynb -> rst -> html,
8 called nb_figure_NN.png. To avoid the two-step process, ipynb -> rst -> html,
9 use '--format quick-html' which will do ipynb -> html, but won't look as
9 use '--format quick-html' which will do ipynb -> html, but won't look as
10 pretty.
10 pretty.
11 """
11 """
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13 # Imports
13 # Imports
14 #-----------------------------------------------------------------------------
14 #-----------------------------------------------------------------------------
15 from __future__ import print_function
15 from __future__ import print_function
16
16
17 # Stdlib
17 # Stdlib
18 import codecs
18 import codecs
19 import logging
19 import logging
20 import os
20 import os
21 import pprint
21 import pprint
22 import re
22 import re
23 import subprocess
23 import subprocess
24 import sys
24 import sys
25
25
26 # From IPython
26 # From IPython
27 from IPython.external import argparse
27 from IPython.external import argparse
28 from IPython.nbformat import current as nbformat
28 from IPython.nbformat import current as nbformat
29 from IPython.utils.text import indent
29 from IPython.utils.text import indent
30 from decorators import DocInherit
30 from decorators import DocInherit
31
31
32 #-----------------------------------------------------------------------------
32 #-----------------------------------------------------------------------------
33 # Utility functions
33 # Utility functions
34 #-----------------------------------------------------------------------------
34 #-----------------------------------------------------------------------------
35
35
36 def remove_fake_files_url(cell):
36 def remove_fake_files_url(cell):
37 """Remove from the cell source the /files/ pseudo-path we use.
37 """Remove from the cell source the /files/ pseudo-path we use.
38 """
38 """
39 src = cell.source
39 src = cell.source
40 cell.source = src.replace('/files/', '')
40 cell.source = src.replace('/files/', '')
41
41
42
42
43 def remove_ansi(src):
43 def remove_ansi(src):
44 """Strip all ANSI color escape sequences from input string.
44 """Strip all ANSI color escape sequences from input string.
45
45
46 Parameters
46 Parameters
47 ----------
47 ----------
48 src : string
48 src : string
49
49
50 Returns
50 Returns
51 -------
51 -------
52 string
52 string
53 """
53 """
54 return re.sub(r'\033\[(0|\d;\d\d)m', '', src)
54 return re.sub(r'\033\[(0|\d;\d\d)m', '', src)
55
55
56
56
57 # Pandoc-dependent code
57 # Pandoc-dependent code
58 def markdown2latex(src):
58 def markdown2latex(src):
59 """Convert a markdown string to LaTeX via pandoc.
59 """Convert a markdown string to LaTeX via pandoc.
60
60
61 This function will raise an error if pandoc is not installed.
61 This function will raise an error if pandoc is not installed.
62
62
63 Any error messages generated by pandoc are printed to stderr.
63 Any error messages generated by pandoc are printed to stderr.
64
64
65 Parameters
65 Parameters
66 ----------
66 ----------
67 src : string
67 src : string
68 Input string, assumed to be valid markdown.
68 Input string, assumed to be valid markdown.
69
69
70 Returns
70 Returns
71 -------
71 -------
72 out : string
72 out : string
73 Output as returned by pandoc.
73 Output as returned by pandoc.
74 """
74 """
75 p = subprocess.Popen('pandoc -f markdown -t latex'.split(),
75 p = subprocess.Popen('pandoc -f markdown -t latex'.split(),
76 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
76 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
77 out, err = p.communicate(src)
77 out, err = p.communicate(src)
78 if err:
78 if err:
79 print(err, file=sys.stderr)
79 print(err, file=sys.stderr)
80 #print('*'*20+'\n', out, '\n'+'*'*20) # dbg
80 #print('*'*20+'\n', out, '\n'+'*'*20) # dbg
81 return out
81 return out
82
82
83
83
84 def rst_directive(directive, text=''):
84 def rst_directive(directive, text=''):
85 out = [directive, '']
85 out = [directive, '']
86 if text:
86 if text:
87 out.extend([indent(text), ''])
87 out.extend([indent(text), ''])
88 return out
88 return out
89
89
90 #-----------------------------------------------------------------------------
90 #-----------------------------------------------------------------------------
91 # Class declarations
91 # Class declarations
92 #-----------------------------------------------------------------------------
92 #-----------------------------------------------------------------------------
93
93
94 class ConversionException(Exception):
94 class ConversionException(Exception):
95 pass
95 pass
96
96
97
97
98 class Converter(object):
98 class Converter(object):
99 default_encoding = 'utf-8'
99 default_encoding = 'utf-8'
100 extension = str()
100 extension = str()
101 figures_counter = 0
101 figures_counter = 0
102 infile = str()
102 infile = str()
103 infile_dir = str()
103 infile_dir = str()
104 infile_root = str()
104 infile_root = str()
105 files_dir = str()
105 files_dir = str()
106 with_preamble = True
106 with_preamble = True
107 user_preamble = None
107 user_preamble = None
108 output = str()
108 output = str()
109 raw_as_verbatim = False
109 raw_as_verbatim = False
110
110
111 def __init__(self, infile):
111 def __init__(self, infile):
112 self.infile = infile
112 self.infile = infile
113 self.infile_dir = os.path.dirname(infile)
113 self.infile_dir = os.path.dirname(infile)
114 infile_root = os.path.splitext(infile)[0]
114 infile_root = os.path.splitext(infile)[0]
115 files_dir = infile_root + '_files'
115 files_dir = infile_root + '_files'
116 if not os.path.isdir(files_dir):
116 if not os.path.isdir(files_dir):
117 os.mkdir(files_dir)
117 os.mkdir(files_dir)
118 self.infile_root = infile_root
118 self.infile_root = infile_root
119 self.files_dir = files_dir
119 self.files_dir = files_dir
120
120
121 def dispatch(self, cell_type):
121 def dispatch(self, cell_type):
122 """return cell_type dependent render method, for example render_code
122 """return cell_type dependent render method, for example render_code
123 """
123 """
124 return getattr(self, 'render_' + cell_type, self.render_unknown)
124 return getattr(self, 'render_' + cell_type, self.render_unknown)
125
125
126 def convert(self):
126 def convert(self):
127 lines = []
127 lines = []
128 lines.extend(self.optional_header())
128 lines.extend(self.optional_header())
129 for worksheet in self.nb.worksheets:
129 for worksheet in self.nb.worksheets:
130 for cell in worksheet.cells:
130 for cell in worksheet.cells:
131 #print(cell.cell_type) # dbg
131 #print(cell.cell_type) # dbg
132 conv_fn = self.dispatch(cell.cell_type)
132 conv_fn = self.dispatch(cell.cell_type)
133 if cell.cell_type in ('markdown', 'raw'):
133 if cell.cell_type in ('markdown', 'raw'):
134 remove_fake_files_url(cell)
134 remove_fake_files_url(cell)
135 lines.extend(conv_fn(cell))
135 lines.extend(conv_fn(cell))
136 lines.append('')
136 lines.append('')
137 lines.extend(self.optional_footer())
137 lines.extend(self.optional_footer())
138 return '\n'.join(lines)
138 return '\n'.join(lines)
139
139
140 def render(self):
140 def render(self):
141 "read, convert, and save self.infile"
141 "read, convert, and save self.infile"
142 self.read()
142 self.read()
143 self.output = self.convert()
143 self.output = self.convert()
144 return self.save()
144 return self.save()
145
145
146 def read(self):
146 def read(self):
147 "read and parse notebook into NotebookNode called self.nb"
147 "read and parse notebook into NotebookNode called self.nb"
148 with open(self.infile) as f:
148 with open(self.infile) as f:
149 self.nb = nbformat.read(f, 'json')
149 self.nb = nbformat.read(f, 'json')
150
150
151 def save(self, infile=None, encoding=None):
151 def save(self, infile=None, encoding=None):
152 "read and parse notebook into self.nb"
152 "read and parse notebook into self.nb"
153 if infile is None:
153 if infile is None:
154 infile = os.path.splitext(self.infile)[0] + '.' + self.extension
154 outfile = os.path.basename(self.infile)
155 outfile = os.path.splitext(outfile)[0] + '.' + self.extension
155 if encoding is None:
156 if encoding is None:
156 encoding = self.default_encoding
157 encoding = self.default_encoding
157 with open(infile, 'w') as f:
158 with open(outfile, 'w') as f:
158 f.write(self.output.encode(encoding))
159 f.write(self.output.encode(encoding))
159 return infile
160 return os.path.abspath(outfile)
160
161
161 def optional_header(self):
162 def optional_header(self):
162 return []
163 return []
163
164
164 def optional_footer(self):
165 def optional_footer(self):
165 return []
166 return []
166
167
167 def _new_figure(self, data, fmt):
168 def _new_figure(self, data, fmt):
168 """Create a new figure file in the given format.
169 """Create a new figure file in the given format.
169
170
170 Returns a path relative to the input file.
171 Returns a path relative to the input file.
171 """
172 """
172 figname = '%s_fig_%02i.%s' % (self.infile_root,
173 figname = '%s_fig_%02i.%s' % (self.infile_root,
173 self.figures_counter, fmt)
174 self.figures_counter, fmt)
174 self.figures_counter += 1
175 self.figures_counter += 1
175 fullname = os.path.join(self.files_dir, figname)
176 fullname = os.path.join(self.files_dir, figname)
176
177
177 # Binary files are base64-encoded, SVG is already XML
178 # Binary files are base64-encoded, SVG is already XML
178 if fmt in ('png', 'jpg', 'pdf'):
179 if fmt in ('png', 'jpg', 'pdf'):
179 data = data.decode('base64')
180 data = data.decode('base64')
180 fopen = lambda fname: open(fname, 'wb')
181 fopen = lambda fname: open(fname, 'wb')
181 else:
182 else:
182 fopen = lambda fname: codecs.open(fname, 'wb', self.default_encoding)
183 fopen = lambda fname: codecs.open(fname, 'wb', self.default_encoding)
183
184
184 with fopen(fullname) as f:
185 with fopen(fullname) as f:
185 f.write(data)
186 f.write(data)
186
187
187 return fullname
188 return fullname
188
189
189 def render_heading(self, cell):
190 def render_heading(self, cell):
190 """convert a heading cell
191 """convert a heading cell
191
192
192 Returns list."""
193 Returns list."""
193 raise NotImplementedError
194 raise NotImplementedError
194
195
195 def render_code(self, cell):
196 def render_code(self, cell):
196 """Convert a code cell
197 """Convert a code cell
197
198
198 Returns list."""
199 Returns list."""
199 raise NotImplementedError
200 raise NotImplementedError
200
201
201 def render_markdown(self, cell):
202 def render_markdown(self, cell):
202 """convert a markdown cell
203 """convert a markdown cell
203
204
204 Returns list."""
205 Returns list."""
205 raise NotImplementedError
206 raise NotImplementedError
206
207
207 def render_pyout(self, output):
208 def render_pyout(self, output):
208 """convert pyout part of a code cell
209 """convert pyout part of a code cell
209
210
210 Returns list."""
211 Returns list."""
211 raise NotImplementedError
212 raise NotImplementedError
212
213
213
214
214 def render_pyerr(self, output):
215 def render_pyerr(self, output):
215 """convert pyerr part of a code cell
216 """convert pyerr part of a code cell
216
217
217 Returns list."""
218 Returns list."""
218 raise NotImplementedError
219 raise NotImplementedError
219
220
220 def _img_lines(self, img_file):
221 def _img_lines(self, img_file):
221 """Return list of lines to include an image file."""
222 """Return list of lines to include an image file."""
222 # Note: subclasses may choose to implement format-specific _FMT_lines
223 # Note: subclasses may choose to implement format-specific _FMT_lines
223 # methods if they so choose (FMT in {png, svg, jpg, pdf}).
224 # methods if they so choose (FMT in {png, svg, jpg, pdf}).
224 raise NotImplementedError
225 raise NotImplementedError
225
226
226 def render_display_data(self, output):
227 def render_display_data(self, output):
227 """convert display data from the output of a code cell
228 """convert display data from the output of a code cell
228
229
229 Returns list.
230 Returns list.
230 """
231 """
231 lines = []
232 lines = []
232
233
233 for fmt in ['png', 'svg', 'jpg', 'pdf']:
234 for fmt in ['png', 'svg', 'jpg', 'pdf']:
234 if fmt in output:
235 if fmt in output:
235 img_file = self._new_figure(output[fmt], fmt)
236 img_file = self._new_figure(output[fmt], fmt)
236 # Subclasses can have format-specific render functions (e.g.,
237 # Subclasses can have format-specific render functions (e.g.,
237 # latex has to auto-convert all SVG to PDF first).
238 # latex has to auto-convert all SVG to PDF first).
238 lines_fun = getattr(self, '_%s_lines' % fmt, None)
239 lines_fun = getattr(self, '_%s_lines' % fmt, None)
239 if not lines_fun:
240 if not lines_fun:
240 lines_fun = self._img_lines
241 lines_fun = self._img_lines
241 lines.extend(lines_fun(img_file))
242 lines.extend(lines_fun(img_file))
242
243
243 return lines
244 return lines
244
245
245 def render_stream(self, cell):
246 def render_stream(self, cell):
246 """convert stream part of a code cell
247 """convert stream part of a code cell
247
248
248 Returns list."""
249 Returns list."""
249 raise NotImplementedError
250 raise NotImplementedError
250
251
251 def render_raw(self, cell):
252 def render_raw(self, cell):
252 """convert a cell with raw text
253 """convert a cell with raw text
253
254
254 Returns list."""
255 Returns list."""
255 raise NotImplementedError
256 raise NotImplementedError
256
257
257 def render_unknown(self, cell):
258 def render_unknown(self, cell):
258 """Render cells of unkown type
259 """Render cells of unkown type
259
260
260 Returns list."""
261 Returns list."""
261 data = pprint.pformat(cell)
262 data = pprint.pformat(cell)
262 logging.warning('Unknown cell:\n%s' % data)
263 logging.warning('Unknown cell:\n%s' % data)
263 return self._unknown_lines(data)
264 return self._unknown_lines(data)
264
265
265 def _unknown_lines(self, data):
266 def _unknown_lines(self, data):
266 """Return list of lines for an unknown cell.
267 """Return list of lines for an unknown cell.
267
268
268 Parameters
269 Parameters
269 ----------
270 ----------
270 data : str
271 data : str
271 The content of the unknown data as a single string.
272 The content of the unknown data as a single string.
272 """
273 """
273 raise NotImplementedError
274 raise NotImplementedError
274
275
275
276
276 class ConverterRST(Converter):
277 class ConverterRST(Converter):
277 extension = 'rst'
278 extension = 'rst'
278 heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}
279 heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}
279
280
280 @DocInherit
281 @DocInherit
281 def render_heading(self, cell):
282 def render_heading(self, cell):
282 marker = self.heading_level[cell.level]
283 marker = self.heading_level[cell.level]
283 return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]
284 return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]
284
285
285 @DocInherit
286 @DocInherit
286 def render_code(self, cell):
287 def render_code(self, cell):
287 if not cell.input:
288 if not cell.input:
288 return []
289 return []
289
290
290 lines = ['In[%s]:' % cell.prompt_number, '']
291 lines = ['In[%s]:' % cell.prompt_number, '']
291 lines.extend(rst_directive('.. code:: python', cell.input))
292 lines.extend(rst_directive('.. code:: python', cell.input))
292
293
293 for output in cell.outputs:
294 for output in cell.outputs:
294 conv_fn = self.dispatch(output.output_type)
295 conv_fn = self.dispatch(output.output_type)
295 lines.extend(conv_fn(output))
296 lines.extend(conv_fn(output))
296
297
297 return lines
298 return lines
298
299
299 @DocInherit
300 @DocInherit
300 def render_markdown(self, cell):
301 def render_markdown(self, cell):
301 return [cell.source]
302 return [cell.source]
302
303
303 @DocInherit
304 @DocInherit
304 def render_raw(self, cell):
305 def render_raw(self, cell):
305 if self.raw_as_verbatim:
306 if self.raw_as_verbatim:
306 return ['::', '', indent(cell.source), '']
307 return ['::', '', indent(cell.source), '']
307 else:
308 else:
308 return [cell.source]
309 return [cell.source]
309
310
310 @DocInherit
311 @DocInherit
311 def render_pyout(self, output):
312 def render_pyout(self, output):
312 lines = ['Out[%s]:' % output.prompt_number, '']
313 lines = ['Out[%s]:' % output.prompt_number, '']
313
314
314 # output is a dictionary like object with type as a key
315 # output is a dictionary like object with type as a key
315 if 'latex' in output:
316 if 'latex' in output:
316 lines.extend(rst_directive('.. math::', output.latex))
317 lines.extend(rst_directive('.. math::', output.latex))
317
318
318 if 'text' in output:
319 if 'text' in output:
319 lines.extend(rst_directive('.. parsed-literal::', output.text))
320 lines.extend(rst_directive('.. parsed-literal::', output.text))
320
321
321 return lines
322 return lines
322
323
323 @DocInherit
324 @DocInherit
324 def render_pyerr(self, output):
325 def render_pyerr(self, output):
325 # Note: a traceback is a *list* of frames.
326 # Note: a traceback is a *list* of frames.
326 return ['::', '', indent(remove_ansi('\n'.join(output.traceback))), '']
327 return ['::', '', indent(remove_ansi('\n'.join(output.traceback))), '']
327
328
328 @DocInherit
329 @DocInherit
329 def _img_lines(self, img_file):
330 def _img_lines(self, img_file):
330 return ['.. image:: %s' % img_file, '']
331 return ['.. image:: %s' % img_file, '']
331
332
332 @DocInherit
333 @DocInherit
333 def render_stream(self, output):
334 def render_stream(self, output):
334 lines = []
335 lines = []
335
336
336 if 'text' in output:
337 if 'text' in output:
337 lines.extend(rst_directive('.. parsed-literal::', output.text))
338 lines.extend(rst_directive('.. parsed-literal::', output.text))
338
339
339 return lines
340 return lines
340
341
341 @DocInherit
342 @DocInherit
342 def _unknown_lines(self, data):
343 def _unknown_lines(self, data):
343 return rst_directive('.. warning:: Unknown cell') + [data]
344 return rst_directive('.. warning:: Unknown cell') + [data]
344
345
345
346
346 class ConverterQuickHTML(Converter):
347 class ConverterQuickHTML(Converter):
347 extension = 'html'
348 extension = 'html'
348
349
349 def in_tag(self, tag, src):
350 def in_tag(self, tag, src):
350 """Return a list of elements bracketed by the given tag"""
351 """Return a list of elements bracketed by the given tag"""
351 return ['<%s>' % tag, src, '</%s>' % tag]
352 return ['<%s>' % tag, src, '</%s>' % tag]
352
353
353 def optional_header(self):
354 def optional_header(self):
354 # XXX: inject the IPython standard CSS into here
355 # XXX: inject the IPython standard CSS into here
355 s = """<html>
356 s = """<html>
356 <head>
357 <head>
357 </head>
358 </head>
358
359
359 <body>
360 <body>
360 """
361 """
361 return s.splitlines()
362 return s.splitlines()
362
363
363 def optional_footer(self):
364 def optional_footer(self):
364 s = """</body>
365 s = """</body>
365 </html>
366 </html>
366 """
367 """
367 return s.splitlines()
368 return s.splitlines()
368
369
369 @DocInherit
370 @DocInherit
370 def render_heading(self, cell):
371 def render_heading(self, cell):
371 marker = cell.level
372 marker = cell.level
372 return ['<h{1}>\n {0}\n</h{1}>'.format(cell.source, marker)]
373 return ['<h{1}>\n {0}\n</h{1}>'.format(cell.source, marker)]
373
374
374 @DocInherit
375 @DocInherit
375 def render_code(self, cell):
376 def render_code(self, cell):
376 if not cell.input:
377 if not cell.input:
377 return []
378 return []
378
379
379 lines = ['<table>']
380 lines = ['<table>']
380 lines.append('<tr><td><tt>In [<b>%s</b>]:</tt></td><td><tt>' % cell.prompt_number)
381 lines.append('<tr><td><tt>In [<b>%s</b>]:</tt></td><td><tt>' % cell.prompt_number)
381 lines.append("<br>\n".join(cell.input.splitlines()))
382 lines.append("<br>\n".join(cell.input.splitlines()))
382 lines.append('</tt></td></tr>')
383 lines.append('</tt></td></tr>')
383
384
384 for output in cell.outputs:
385 for output in cell.outputs:
385 lines.append('<tr><td></td><td>')
386 lines.append('<tr><td></td><td>')
386 conv_fn = self.dispatch(output.output_type)
387 conv_fn = self.dispatch(output.output_type)
387 lines.extend(conv_fn(output))
388 lines.extend(conv_fn(output))
388 lines.append('</td></tr>')
389 lines.append('</td></tr>')
389
390
390 lines.append('</table>')
391 lines.append('</table>')
391 return lines
392 return lines
392
393
393 @DocInherit
394 @DocInherit
394 def render_markdown(self, cell):
395 def render_markdown(self, cell):
395 return self.in_tag('pre', cell.source)
396 return self.in_tag('pre', cell.source)
396
397
397 @DocInherit
398 @DocInherit
398 def render_raw(self, cell):
399 def render_raw(self, cell):
399 if self.raw_as_verbatim:
400 if self.raw_as_verbatim:
400 return self.in_tag('pre', cell.source)
401 return self.in_tag('pre', cell.source)
401 else:
402 else:
402 return [cell.source]
403 return [cell.source]
403
404
404 @DocInherit
405 @DocInherit
405 def render_pyout(self, output):
406 def render_pyout(self, output):
406 lines = ['<tr><td><tt>Out[<b>%s</b>]:</tt></td></tr>' %
407 lines = ['<tr><td><tt>Out[<b>%s</b>]:</tt></td></tr>' %
407 output.prompt_number, '<td>']
408 output.prompt_number, '<td>']
408
409
409 # output is a dictionary like object with type as a key
410 # output is a dictionary like object with type as a key
410 for out_type in ('text', 'latex'):
411 for out_type in ('text', 'latex'):
411 if out_type in output:
412 if out_type in output:
412 lines.extend(self.in_tag('pre', indent(output[out_type])))
413 lines.extend(self.in_tag('pre', indent(output[out_type])))
413
414
414 return lines
415 return lines
415
416
416 @DocInherit
417 @DocInherit
417 def render_pyerr(self, output):
418 def render_pyerr(self, output):
418 # Note: a traceback is a *list* of frames.
419 # Note: a traceback is a *list* of frames.
419 return self.in_tag('pre', remove_ansi('\n'.join(output.traceback)))
420 return self.in_tag('pre', remove_ansi('\n'.join(output.traceback)))
420
421
421 @DocInherit
422 @DocInherit
422 def _img_lines(self, img_file):
423 def _img_lines(self, img_file):
423 return ['<img src="%s">' % img_file, '']
424 return ['<img src="%s">' % img_file, '']
424
425
425 @DocInherit
426 @DocInherit
426 def render_stream(self, output):
427 def render_stream(self, output):
427 lines = []
428 lines = []
428
429
429 if 'text' in output:
430 if 'text' in output:
430 lines.append(output.text)
431 lines.append(output.text)
431
432
432 return lines
433 return lines
433
434
434 @DocInherit
435 @DocInherit
435 def _unknown_lines(self, data):
436 def _unknown_lines(self, data):
436 return ['<h2>Warning:: Unknown cell</h2>'] + self.in_tag('pre', data)
437 return ['<h2>Warning:: Unknown cell</h2>'] + self.in_tag('pre', data)
437
438
438
439
439 class ConverterLaTeX(Converter):
440 class ConverterLaTeX(Converter):
440 """Converts a notebook to a .tex file suitable for pdflatex.
441 """Converts a notebook to a .tex file suitable for pdflatex.
441
442
442 Note: this converter *needs*:
443 Note: this converter *needs*:
443
444
444 - `pandoc`: for all conversion of markdown cells. If your notebook only
445 - `pandoc`: for all conversion of markdown cells. If your notebook only
445 has Raw cells, pandoc will not be needed.
446 has Raw cells, pandoc will not be needed.
446
447
447 - `inkscape`: if your notebook has SVG figures. These need to be
448 - `inkscape`: if your notebook has SVG figures. These need to be
448 converted to PDF before inclusion in the TeX file, as LaTeX doesn't
449 converted to PDF before inclusion in the TeX file, as LaTeX doesn't
449 understand SVG natively.
450 understand SVG natively.
450
451
451 You will in general obtain much better final PDF results if you configure
452 You will in general obtain much better final PDF results if you configure
452 the matplotlib backend to create SVG output with
453 the matplotlib backend to create SVG output with
453
454
454 %config InlineBackend.figure_format = 'svg'
455 %config InlineBackend.figure_format = 'svg'
455
456
456 (or set the equivalent flag at startup or in your configuration profile).
457 (or set the equivalent flag at startup or in your configuration profile).
457 """
458 """
458 extension = 'tex'
459 extension = 'tex'
459 documentclass = 'article'
460 documentclass = 'article'
460 documentclass_options = '11pt,english'
461 documentclass_options = '11pt,english'
461 heading_map = {1: r'\section',
462 heading_map = {1: r'\section',
462 2: r'\subsection',
463 2: r'\subsection',
463 3: r'\subsubsection',
464 3: r'\subsubsection',
464 4: r'\paragraph',
465 4: r'\paragraph',
465 5: r'\subparagraph',
466 5: r'\subparagraph',
466 6: r'\subparagraph'}
467 6: r'\subparagraph'}
467
468
468 def in_env(self, environment, lines):
469 def in_env(self, environment, lines):
469 """Return list of environment lines for input lines
470 """Return list of environment lines for input lines
470
471
471 Parameters
472 Parameters
472 ----------
473 ----------
473 env : string
474 env : string
474 Name of the environment to bracket with begin/end.
475 Name of the environment to bracket with begin/end.
475
476
476 lines: """
477 lines: """
477 out = [r'\begin{%s}' % environment]
478 out = [r'\begin{%s}' % environment]
478 if isinstance(lines, basestring):
479 if isinstance(lines, basestring):
479 out.append(lines)
480 out.append(lines)
480 else: # list
481 else: # list
481 out.extend(lines)
482 out.extend(lines)
482 out.append(r'\end{%s}' % environment)
483 out.append(r'\end{%s}' % environment)
483 return out
484 return out
484
485
485 def convert(self):
486 def convert(self):
486 # The main body is done by the logic in the parent class, and that's
487 # The main body is done by the logic in the parent class, and that's
487 # all we need if preamble support has been turned off.
488 # all we need if preamble support has been turned off.
488 body = super(ConverterLaTeX, self).convert()
489 body = super(ConverterLaTeX, self).convert()
489 if not self.with_preamble:
490 if not self.with_preamble:
490 return body
491 return body
491 # But if preamble is on, then we need to construct a proper, standalone
492 # But if preamble is on, then we need to construct a proper, standalone
492 # tex file.
493 # tex file.
493
494
494 # Tag the document at the top and set latex class
495 # Tag the document at the top and set latex class
495 final = [ r'%% This file was auto-generated by IPython, do NOT edit',
496 final = [ r'%% This file was auto-generated by IPython, do NOT edit',
496 r'%% Conversion from the original notebook file:',
497 r'%% Conversion from the original notebook file:',
497 r'%% {0}'.format(self.infile),
498 r'%% {0}'.format(self.infile),
498 r'%%',
499 r'%%',
499 r'\documentclass[%s]{%s}' % (self.documentclass_options,
500 r'\documentclass[%s]{%s}' % (self.documentclass_options,
500 self.documentclass),
501 self.documentclass),
501 '',
502 '',
502 ]
503 ]
503 # Load our own preamble, which is stored next to the main file. We
504 # Load our own preamble, which is stored next to the main file. We
504 # need to be careful in case the script entry point is a symlink
505 # need to be careful in case the script entry point is a symlink
505 myfile = __file__ if not os.path.islink(__file__) else \
506 myfile = __file__ if not os.path.islink(__file__) else \
506 os.readlink(__file__)
507 os.readlink(__file__)
507 with open(os.path.join(os.path.dirname(myfile), 'preamble.tex')) as f:
508 with open(os.path.join(os.path.dirname(myfile), 'preamble.tex')) as f:
508 final.append(f.read())
509 final.append(f.read())
509
510
510 # Load any additional user-supplied preamble
511 # Load any additional user-supplied preamble
511 if self.user_preamble:
512 if self.user_preamble:
512 final.extend(['', '%% Adding user preamble from file:',
513 final.extend(['', '%% Adding user preamble from file:',
513 '%% {0}'.format(self.user_preamble), ''])
514 '%% {0}'.format(self.user_preamble), ''])
514 with open(self.user_preamble) as f:
515 with open(self.user_preamble) as f:
515 final.append(f.read())
516 final.append(f.read())
516
517
517 # Include document body
518 # Include document body
518 final.extend([ r'\begin{document}', '',
519 final.extend([ r'\begin{document}', '',
519 body,
520 body,
520 r'\end{document}', ''])
521 r'\end{document}', ''])
521 # Retun value must be a string
522 # Retun value must be a string
522 return '\n'.join(final)
523 return '\n'.join(final)
523
524
524 @DocInherit
525 @DocInherit
525 def render_heading(self, cell):
526 def render_heading(self, cell):
526 marker = self.heading_map[cell.level]
527 marker = self.heading_map[cell.level]
527 return ['%s{%s}' % (marker, cell.source) ]
528 return ['%s{%s}' % (marker, cell.source) ]
528
529
529 @DocInherit
530 @DocInherit
530 def render_code(self, cell):
531 def render_code(self, cell):
531 if not cell.input:
532 if not cell.input:
532 return []
533 return []
533
534
534 # Cell codes first carry input code, we use lstlisting for that
535 # Cell codes first carry input code, we use lstlisting for that
535 lines = [r'\begin{codecell}']
536 lines = [r'\begin{codecell}']
536
537
537 lines.extend(self.in_env('codeinput',
538 lines.extend(self.in_env('codeinput',
538 self.in_env('lstlisting', cell.input)))
539 self.in_env('lstlisting', cell.input)))
539
540
540 outlines = []
541 outlines = []
541 for output in cell.outputs:
542 for output in cell.outputs:
542 conv_fn = self.dispatch(output.output_type)
543 conv_fn = self.dispatch(output.output_type)
543 outlines.extend(conv_fn(output))
544 outlines.extend(conv_fn(output))
544
545
545 # And then output of many possible types; use a frame for all of it.
546 # And then output of many possible types; use a frame for all of it.
546 if outlines:
547 if outlines:
547 lines.extend(self.in_env('codeoutput', outlines))
548 lines.extend(self.in_env('codeoutput', outlines))
548
549
549 lines.append(r'\end{codecell}')
550 lines.append(r'\end{codecell}')
550
551
551 return lines
552 return lines
552
553
553
554
554 @DocInherit
555 @DocInherit
555 def _img_lines(self, img_file):
556 def _img_lines(self, img_file):
556 return self.in_env('center',
557 return self.in_env('center',
557 [r'\includegraphics[width=3in]{%s}' % img_file, r'\par'])
558 [r'\includegraphics[width=3in]{%s}' % img_file, r'\par'])
558
559
559 def _svg_lines(self, img_file):
560 def _svg_lines(self, img_file):
560 base_file = os.path.splitext(img_file)[0]
561 base_file = os.path.splitext(img_file)[0]
561 pdf_file = base_file + '.pdf'
562 pdf_file = base_file + '.pdf'
562 subprocess.check_call(['inkscape', '--export-pdf=%s' % pdf_file,
563 subprocess.check_call(['inkscape', '--export-pdf=%s' % pdf_file,
563 img_file])
564 img_file])
564 return self._img_lines(pdf_file)
565 return self._img_lines(pdf_file)
565
566
566 @DocInherit
567 @DocInherit
567 def render_stream(self, output):
568 def render_stream(self, output):
568 lines = []
569 lines = []
569
570
570 if 'text' in output:
571 if 'text' in output:
571 lines.extend(self.in_env('verbatim', output.text.strip()))
572 lines.extend(self.in_env('verbatim', output.text.strip()))
572
573
573 return lines
574 return lines
574
575
575 @DocInherit
576 @DocInherit
576 def render_markdown(self, cell):
577 def render_markdown(self, cell):
577 return [markdown2latex(cell.source)]
578 return [markdown2latex(cell.source)]
578
579
579 @DocInherit
580 @DocInherit
580 def render_pyout(self, output):
581 def render_pyout(self, output):
581 lines = []
582 lines = []
582
583
583 # output is a dictionary like object with type as a key
584 # output is a dictionary like object with type as a key
584 if 'latex' in output:
585 if 'latex' in output:
585 lines.extend(output.latex)
586 lines.extend(output.latex)
586
587
587 if 'text' in output:
588 if 'text' in output:
588 lines.extend(self.in_env('verbatim', output.text))
589 lines.extend(self.in_env('verbatim', output.text))
589
590
590 return lines
591 return lines
591
592
592 @DocInherit
593 @DocInherit
593 def render_pyerr(self, output):
594 def render_pyerr(self, output):
594 # Note: a traceback is a *list* of frames.
595 # Note: a traceback is a *list* of frames.
595 return self.in_env('traceback',
596 return self.in_env('traceback',
596 self.in_env('verbatim',
597 self.in_env('verbatim',
597 remove_ansi('\n'.join(output.traceback))))
598 remove_ansi('\n'.join(output.traceback))))
598
599
599 @DocInherit
600 @DocInherit
600 def render_raw(self, cell):
601 def render_raw(self, cell):
601 if self.raw_as_verbatim:
602 if self.raw_as_verbatim:
602 return self.in_env('verbatim', cell.source)
603 return self.in_env('verbatim', cell.source)
603 else:
604 else:
604 return [cell.source]
605 return [cell.source]
605
606
606 @DocInherit
607 @DocInherit
607 def _unknown_lines(self, data):
608 def _unknown_lines(self, data):
608 return [r'{\vspace{5mm}\bf WARNING:: unknown cell:}'] + \
609 return [r'{\vspace{5mm}\bf WARNING:: unknown cell:}'] + \
609 self.in_env('verbatim', data)
610 self.in_env('verbatim', data)
610
611
611 #-----------------------------------------------------------------------------
612 #-----------------------------------------------------------------------------
612 # Standalone conversion functions
613 # Standalone conversion functions
613 #-----------------------------------------------------------------------------
614 #-----------------------------------------------------------------------------
614
615
615 def rst2simplehtml(infile):
616 def rst2simplehtml(infile):
616 """Convert a rst file to simplified html suitable for blogger.
617 """Convert a rst file to simplified html suitable for blogger.
617
618
618 This just runs rst2html with certain parameters to produce really simple
619 This just runs rst2html with certain parameters to produce really simple
619 html and strips the document header, so the resulting file can be easily
620 html and strips the document header, so the resulting file can be easily
620 pasted into a blogger edit window.
621 pasted into a blogger edit window.
621 """
622 """
622
623
623 # This is the template for the rst2html call that produces the cleanest,
624 # This is the template for the rst2html call that produces the cleanest,
624 # simplest html I could find. This should help in making it easier to
625 # simplest html I could find. This should help in making it easier to
625 # paste into the blogspot html window, though I'm still having problems
626 # paste into the blogspot html window, though I'm still having problems
626 # with linebreaks there...
627 # with linebreaks there...
627 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
628 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
628 "--no-generator --no-datestamp --no-source-link "
629 "--no-generator --no-datestamp --no-source-link "
629 "--no-toc-backlinks --no-section-numbering "
630 "--no-toc-backlinks --no-section-numbering "
630 "--strip-comments ")
631 "--strip-comments ")
631
632
632 cmd = "%s %s" % (cmd_template, infile)
633 cmd = "%s %s" % (cmd_template, infile)
633 proc = subprocess.Popen(cmd,
634 proc = subprocess.Popen(cmd,
634 stdout=subprocess.PIPE,
635 stdout=subprocess.PIPE,
635 stderr=subprocess.PIPE,
636 stderr=subprocess.PIPE,
636 shell=True)
637 shell=True)
637 html, stderr = proc.communicate()
638 html, stderr = proc.communicate()
638 if stderr:
639 if stderr:
639 raise IOError(stderr)
640 raise IOError(stderr)
640
641
641 # Make an iterator so breaking out holds state. Our implementation of
642 # Make an iterator so breaking out holds state. Our implementation of
642 # searching for the html body below is basically a trivial little state
643 # searching for the html body below is basically a trivial little state
643 # machine, so we need this.
644 # machine, so we need this.
644 walker = iter(html.splitlines())
645 walker = iter(html.splitlines())
645
646
646 # Find start of main text, break out to then print until we find end /div.
647 # Find start of main text, break out to then print until we find end /div.
647 # This may only work if there's a real title defined so we get a 'div class'
648 # This may only work if there's a real title defined so we get a 'div class'
648 # tag, I haven't really tried.
649 # tag, I haven't really tried.
649 for line in walker:
650 for line in walker:
650 if line.startswith('<body>'):
651 if line.startswith('<body>'):
651 break
652 break
652
653
653 newfname = os.path.splitext(infile)[0] + '.html'
654 newfname = os.path.splitext(infile)[0] + '.html'
654 with open(newfname, 'w') as f:
655 with open(newfname, 'w') as f:
655 for line in walker:
656 for line in walker:
656 if line.startswith('</body>'):
657 if line.startswith('</body>'):
657 break
658 break
658 f.write(line)
659 f.write(line)
659 f.write('\n')
660 f.write('\n')
660
661
661 return newfname
662 return newfname
662
663
663 known_formats = "rst (default), html, quick-html, latex"
664 known_formats = "rst (default), html, quick-html, latex"
664
665
665 def main(infile, format='rst'):
666 def main(infile, format='rst'):
666 """Convert a notebook to html in one step"""
667 """Convert a notebook to html in one step"""
667 # XXX: this is just quick and dirty for now. When adding a new format,
668 # XXX: this is just quick and dirty for now. When adding a new format,
668 # make sure to add it to the `known_formats` string above, which gets
669 # make sure to add it to the `known_formats` string above, which gets
669 # printed in in the catch-all else, as well as in the help
670 # printed in in the catch-all else, as well as in the help
670 if format == 'rst':
671 if format == 'rst':
671 converter = ConverterRST(infile)
672 converter = ConverterRST(infile)
672 converter.render()
673 converter.render()
673 elif format == 'html':
674 elif format == 'html':
674 #Currently, conversion to html is a 2 step process, nb->rst->html
675 #Currently, conversion to html is a 2 step process, nb->rst->html
675 converter = ConverterRST(infile)
676 converter = ConverterRST(infile)
676 rstfname = converter.render()
677 rstfname = converter.render()
677 rst2simplehtml(rstfname)
678 rst2simplehtml(rstfname)
678 elif format == 'quick-html':
679 elif format == 'quick-html':
679 converter = ConverterQuickHTML(infile)
680 converter = ConverterQuickHTML(infile)
680 rstfname = converter.render()
681 rstfname = converter.render()
681 elif format == 'latex':
682 elif format == 'latex':
682 converter = ConverterLaTeX(infile)
683 converter = ConverterLaTeX(infile)
683 latexfname = converter.render()
684 latexfname = converter.render()
684 else:
685 else:
685 raise SystemExit("Unknown format '%s', " % format +
686 raise SystemExit("Unknown format '%s', " % format +
686 "known formats are: " + known_formats)
687 "known formats are: " + known_formats)
687
688
688 #-----------------------------------------------------------------------------
689 #-----------------------------------------------------------------------------
689 # Script main
690 # Script main
690 #-----------------------------------------------------------------------------
691 #-----------------------------------------------------------------------------
691
692
692 if __name__ == '__main__':
693 if __name__ == '__main__':
693 parser = argparse.ArgumentParser(description=__doc__,
694 parser = argparse.ArgumentParser(description=__doc__,
694 formatter_class=argparse.RawTextHelpFormatter)
695 formatter_class=argparse.RawTextHelpFormatter)
695 # TODO: consider passing file like object around, rather than filenames
696 # TODO: consider passing file like object around, rather than filenames
696 # would allow us to process stdin, or even http streams
697 # would allow us to process stdin, or even http streams
697 #parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
698 #parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
698
699
699 #Require a filename as a positional argument
700 #Require a filename as a positional argument
700 parser.add_argument('infile', nargs=1)
701 parser.add_argument('infile', nargs=1)
701 parser.add_argument('-f', '--format', default='rst',
702 parser.add_argument('-f', '--format', default='rst',
702 help='Output format. Supported formats: \n' +
703 help='Output format. Supported formats: \n' +
703 known_formats)
704 known_formats)
704 args = parser.parse_args()
705 args = parser.parse_args()
705 main(infile=args.infile[0], format=args.format)
706 main(infile=args.infile[0], format=args.format)
General Comments 0
You need to be logged in to leave comments. Login now