##// END OF EJS Templates
full inkscape path on OS X...
Matthias BUSSONNIER -
Show More
@@ -1,705 +1,711 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 """Convert IPython notebooks to other formats, such as ReST, and HTML.
2 """Convert IPython notebooks to other formats, such as ReST, and HTML.
3
3
4 Example:
4 Example:
5 ./nbconvert.py --format html file.ipynb
5 ./nbconvert.py --format html file.ipynb
6
6
7 Produces 'file.rst' and 'file.html', along with auto-generated figure files
7 Produces 'file.rst' and 'file.html', along with auto-generated figure files
8 called nb_figure_NN.png. To avoid the two-step process, ipynb -> rst -> html,
8 called nb_figure_NN.png. To avoid the two-step process, ipynb -> rst -> html,
9 use '--format quick-html' which will do ipynb -> html, but won't look as
9 use '--format quick-html' which will do ipynb -> html, but won't look as
10 pretty.
10 pretty.
11 """
11 """
12 #-----------------------------------------------------------------------------
12 #-----------------------------------------------------------------------------
13 # Imports
13 # Imports
14 #-----------------------------------------------------------------------------
14 #-----------------------------------------------------------------------------
15 from __future__ import print_function
15 from __future__ import print_function
16
16
17 # Stdlib
17 # Stdlib
18 import codecs
18 import codecs
19 import logging
19 import logging
20 import os
20 import os
21 import pprint
21 import pprint
22 import re
22 import re
23 import subprocess
23 import subprocess
24 import sys
24 import sys
25
25
26 inkscape = 'inkscape'
27 if sys.platform == 'darwin':
28 inkscape = '/Applications/Inkscape.app/Contents/Resources/bin/inkscape'
29 if not os.path.exists(inkscape):
30 inkscape = None
31
26 # From IPython
32 # From IPython
27 from IPython.external import argparse
33 from IPython.external import argparse
28 from IPython.nbformat import current as nbformat
34 from IPython.nbformat import current as nbformat
29 from IPython.utils.text import indent
35 from IPython.utils.text import indent
30 from decorators import DocInherit
36 from decorators import DocInherit
31
37
32 #-----------------------------------------------------------------------------
38 #-----------------------------------------------------------------------------
33 # Utility functions
39 # Utility functions
34 #-----------------------------------------------------------------------------
40 #-----------------------------------------------------------------------------
35
41
36 def remove_fake_files_url(cell):
42 def remove_fake_files_url(cell):
37 """Remove from the cell source the /files/ pseudo-path we use.
43 """Remove from the cell source the /files/ pseudo-path we use.
38 """
44 """
39 src = cell.source
45 src = cell.source
40 cell.source = src.replace('/files/', '')
46 cell.source = src.replace('/files/', '')
41
47
42
48
43 def remove_ansi(src):
49 def remove_ansi(src):
44 """Strip all ANSI color escape sequences from input string.
50 """Strip all ANSI color escape sequences from input string.
45
51
46 Parameters
52 Parameters
47 ----------
53 ----------
48 src : string
54 src : string
49
55
50 Returns
56 Returns
51 -------
57 -------
52 string
58 string
53 """
59 """
54 return re.sub(r'\033\[(0|\d;\d\d)m', '', src)
60 return re.sub(r'\033\[(0|\d;\d\d)m', '', src)
55
61
56
62
57 # Pandoc-dependent code
63 # Pandoc-dependent code
58 def markdown2latex(src):
64 def markdown2latex(src):
59 """Convert a markdown string to LaTeX via pandoc.
65 """Convert a markdown string to LaTeX via pandoc.
60
66
61 This function will raise an error if pandoc is not installed.
67 This function will raise an error if pandoc is not installed.
62
68
63 Any error messages generated by pandoc are printed to stderr.
69 Any error messages generated by pandoc are printed to stderr.
64
70
65 Parameters
71 Parameters
66 ----------
72 ----------
67 src : string
73 src : string
68 Input string, assumed to be valid markdown.
74 Input string, assumed to be valid markdown.
69
75
70 Returns
76 Returns
71 -------
77 -------
72 out : string
78 out : string
73 Output as returned by pandoc.
79 Output as returned by pandoc.
74 """
80 """
75 p = subprocess.Popen('pandoc -f markdown -t latex'.split(),
81 p = subprocess.Popen('pandoc -f markdown -t latex'.split(),
76 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
82 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
77 out, err = p.communicate(src)
83 out, err = p.communicate(src)
78 if err:
84 if err:
79 print(err, file=sys.stderr)
85 print(err, file=sys.stderr)
80 #print('*'*20+'\n', out, '\n'+'*'*20) # dbg
86 #print('*'*20+'\n', out, '\n'+'*'*20) # dbg
81 return out
87 return out
82
88
83
89
84 def rst_directive(directive, text=''):
90 def rst_directive(directive, text=''):
85 out = [directive, '']
91 out = [directive, '']
86 if text:
92 if text:
87 out.extend([indent(text), ''])
93 out.extend([indent(text), ''])
88 return out
94 return out
89
95
90 #-----------------------------------------------------------------------------
96 #-----------------------------------------------------------------------------
91 # Class declarations
97 # Class declarations
92 #-----------------------------------------------------------------------------
98 #-----------------------------------------------------------------------------
93
99
94 class ConversionException(Exception):
100 class ConversionException(Exception):
95 pass
101 pass
96
102
97
103
98 class Converter(object):
104 class Converter(object):
99 default_encoding = 'utf-8'
105 default_encoding = 'utf-8'
100 extension = str()
106 extension = str()
101 figures_counter = 0
107 figures_counter = 0
102 infile = str()
108 infile = str()
103 infile_dir = str()
109 infile_dir = str()
104 infile_root = str()
110 infile_root = str()
105 files_dir = str()
111 files_dir = str()
106 with_preamble = True
112 with_preamble = True
107 user_preamble = None
113 user_preamble = None
108 output = str()
114 output = str()
109 raw_as_verbatim = False
115 raw_as_verbatim = False
110
116
111 def __init__(self, infile):
117 def __init__(self, infile):
112 self.infile = infile
118 self.infile = infile
113 self.infile_dir = os.path.dirname(infile)
119 self.infile_dir = os.path.dirname(infile)
114 infile_root = os.path.splitext(infile)[0]
120 infile_root = os.path.splitext(infile)[0]
115 files_dir = infile_root + '_files'
121 files_dir = infile_root + '_files'
116 if not os.path.isdir(files_dir):
122 if not os.path.isdir(files_dir):
117 os.mkdir(files_dir)
123 os.mkdir(files_dir)
118 self.infile_root = infile_root
124 self.infile_root = infile_root
119 self.files_dir = files_dir
125 self.files_dir = files_dir
120
126
121 def dispatch(self, cell_type):
127 def dispatch(self, cell_type):
122 """return cell_type dependent render method, for example render_code
128 """return cell_type dependent render method, for example render_code
123 """
129 """
124 return getattr(self, 'render_' + cell_type, self.render_unknown)
130 return getattr(self, 'render_' + cell_type, self.render_unknown)
125
131
126 def convert(self):
132 def convert(self):
127 lines = []
133 lines = []
128 lines.extend(self.optional_header())
134 lines.extend(self.optional_header())
129 for worksheet in self.nb.worksheets:
135 for worksheet in self.nb.worksheets:
130 for cell in worksheet.cells:
136 for cell in worksheet.cells:
131 #print(cell.cell_type) # dbg
137 #print(cell.cell_type) # dbg
132 conv_fn = self.dispatch(cell.cell_type)
138 conv_fn = self.dispatch(cell.cell_type)
133 if cell.cell_type in ('markdown', 'raw'):
139 if cell.cell_type in ('markdown', 'raw'):
134 remove_fake_files_url(cell)
140 remove_fake_files_url(cell)
135 lines.extend(conv_fn(cell))
141 lines.extend(conv_fn(cell))
136 lines.append('')
142 lines.append('')
137 lines.extend(self.optional_footer())
143 lines.extend(self.optional_footer())
138 return '\n'.join(lines)
144 return '\n'.join(lines)
139
145
140 def render(self):
146 def render(self):
141 "read, convert, and save self.infile"
147 "read, convert, and save self.infile"
142 self.read()
148 self.read()
143 self.output = self.convert()
149 self.output = self.convert()
144 return self.save()
150 return self.save()
145
151
146 def read(self):
152 def read(self):
147 "read and parse notebook into NotebookNode called self.nb"
153 "read and parse notebook into NotebookNode called self.nb"
148 with open(self.infile) as f:
154 with open(self.infile) as f:
149 self.nb = nbformat.read(f, 'json')
155 self.nb = nbformat.read(f, 'json')
150
156
151 def save(self, infile=None, encoding=None):
157 def save(self, infile=None, encoding=None):
152 "read and parse notebook into self.nb"
158 "read and parse notebook into self.nb"
153 if infile is None:
159 if infile is None:
154 infile = os.path.splitext(self.infile)[0] + '.' + self.extension
160 infile = os.path.splitext(self.infile)[0] + '.' + self.extension
155 if encoding is None:
161 if encoding is None:
156 encoding = self.default_encoding
162 encoding = self.default_encoding
157 with open(infile, 'w') as f:
163 with open(infile, 'w') as f:
158 f.write(self.output.encode(encoding))
164 f.write(self.output.encode(encoding))
159 return infile
165 return infile
160
166
161 def optional_header(self):
167 def optional_header(self):
162 return []
168 return []
163
169
164 def optional_footer(self):
170 def optional_footer(self):
165 return []
171 return []
166
172
167 def _new_figure(self, data, fmt):
173 def _new_figure(self, data, fmt):
168 """Create a new figure file in the given format.
174 """Create a new figure file in the given format.
169
175
170 Returns a path relative to the input file.
176 Returns a path relative to the input file.
171 """
177 """
172 figname = '%s_fig_%02i.%s' % (self.infile_root,
178 figname = '%s_fig_%02i.%s' % (self.infile_root,
173 self.figures_counter, fmt)
179 self.figures_counter, fmt)
174 self.figures_counter += 1
180 self.figures_counter += 1
175 fullname = os.path.join(self.files_dir, figname)
181 fullname = os.path.join(self.files_dir, figname)
176
182
177 # Binary files are base64-encoded, SVG is already XML
183 # Binary files are base64-encoded, SVG is already XML
178 if fmt in ('png', 'jpg', 'pdf'):
184 if fmt in ('png', 'jpg', 'pdf'):
179 data = data.decode('base64')
185 data = data.decode('base64')
180 fopen = lambda fname: open(fname, 'wb')
186 fopen = lambda fname: open(fname, 'wb')
181 else:
187 else:
182 fopen = lambda fname: codecs.open(fname, 'wb', self.default_encoding)
188 fopen = lambda fname: codecs.open(fname, 'wb', self.default_encoding)
183
189
184 with fopen(fullname) as f:
190 with fopen(fullname) as f:
185 f.write(data)
191 f.write(data)
186
192
187 return fullname
193 return fullname
188
194
189 def render_heading(self, cell):
195 def render_heading(self, cell):
190 """convert a heading cell
196 """convert a heading cell
191
197
192 Returns list."""
198 Returns list."""
193 raise NotImplementedError
199 raise NotImplementedError
194
200
195 def render_code(self, cell):
201 def render_code(self, cell):
196 """Convert a code cell
202 """Convert a code cell
197
203
198 Returns list."""
204 Returns list."""
199 raise NotImplementedError
205 raise NotImplementedError
200
206
201 def render_markdown(self, cell):
207 def render_markdown(self, cell):
202 """convert a markdown cell
208 """convert a markdown cell
203
209
204 Returns list."""
210 Returns list."""
205 raise NotImplementedError
211 raise NotImplementedError
206
212
207 def render_pyout(self, output):
213 def render_pyout(self, output):
208 """convert pyout part of a code cell
214 """convert pyout part of a code cell
209
215
210 Returns list."""
216 Returns list."""
211 raise NotImplementedError
217 raise NotImplementedError
212
218
213
219
214 def render_pyerr(self, output):
220 def render_pyerr(self, output):
215 """convert pyerr part of a code cell
221 """convert pyerr part of a code cell
216
222
217 Returns list."""
223 Returns list."""
218 raise NotImplementedError
224 raise NotImplementedError
219
225
220 def _img_lines(self, img_file):
226 def _img_lines(self, img_file):
221 """Return list of lines to include an image file."""
227 """Return list of lines to include an image file."""
222 # Note: subclasses may choose to implement format-specific _FMT_lines
228 # Note: subclasses may choose to implement format-specific _FMT_lines
223 # methods if they so choose (FMT in {png, svg, jpg, pdf}).
229 # methods if they so choose (FMT in {png, svg, jpg, pdf}).
224 raise NotImplementedError
230 raise NotImplementedError
225
231
226 def render_display_data(self, output):
232 def render_display_data(self, output):
227 """convert display data from the output of a code cell
233 """convert display data from the output of a code cell
228
234
229 Returns list.
235 Returns list.
230 """
236 """
231 lines = []
237 lines = []
232
238
233 for fmt in ['png', 'svg', 'jpg', 'pdf']:
239 for fmt in ['png', 'svg', 'jpg', 'pdf']:
234 if fmt in output:
240 if fmt in output:
235 img_file = self._new_figure(output[fmt], fmt)
241 img_file = self._new_figure(output[fmt], fmt)
236 # Subclasses can have format-specific render functions (e.g.,
242 # Subclasses can have format-specific render functions (e.g.,
237 # latex has to auto-convert all SVG to PDF first).
243 # latex has to auto-convert all SVG to PDF first).
238 lines_fun = getattr(self, '_%s_lines' % fmt, None)
244 lines_fun = getattr(self, '_%s_lines' % fmt, None)
239 if not lines_fun:
245 if not lines_fun:
240 lines_fun = self._img_lines
246 lines_fun = self._img_lines
241 lines.extend(lines_fun(img_file))
247 lines.extend(lines_fun(img_file))
242
248
243 return lines
249 return lines
244
250
245 def render_stream(self, cell):
251 def render_stream(self, cell):
246 """convert stream part of a code cell
252 """convert stream part of a code cell
247
253
248 Returns list."""
254 Returns list."""
249 raise NotImplementedError
255 raise NotImplementedError
250
256
251 def render_raw(self, cell):
257 def render_raw(self, cell):
252 """convert a cell with raw text
258 """convert a cell with raw text
253
259
254 Returns list."""
260 Returns list."""
255 raise NotImplementedError
261 raise NotImplementedError
256
262
257 def render_unknown(self, cell):
263 def render_unknown(self, cell):
258 """Render cells of unkown type
264 """Render cells of unkown type
259
265
260 Returns list."""
266 Returns list."""
261 data = pprint.pformat(cell)
267 data = pprint.pformat(cell)
262 logging.warning('Unknown cell:\n%s' % data)
268 logging.warning('Unknown cell:\n%s' % data)
263 return self._unknown_lines(data)
269 return self._unknown_lines(data)
264
270
265 def _unknown_lines(self, data):
271 def _unknown_lines(self, data):
266 """Return list of lines for an unknown cell.
272 """Return list of lines for an unknown cell.
267
273
268 Parameters
274 Parameters
269 ----------
275 ----------
270 data : str
276 data : str
271 The content of the unknown data as a single string.
277 The content of the unknown data as a single string.
272 """
278 """
273 raise NotImplementedError
279 raise NotImplementedError
274
280
275
281
276 class ConverterRST(Converter):
282 class ConverterRST(Converter):
277 extension = 'rst'
283 extension = 'rst'
278 heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}
284 heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}
279
285
280 @DocInherit
286 @DocInherit
281 def render_heading(self, cell):
287 def render_heading(self, cell):
282 marker = self.heading_level[cell.level]
288 marker = self.heading_level[cell.level]
283 return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]
289 return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]
284
290
285 @DocInherit
291 @DocInherit
286 def render_code(self, cell):
292 def render_code(self, cell):
287 if not cell.input:
293 if not cell.input:
288 return []
294 return []
289
295
290 lines = ['In[%s]:' % cell.prompt_number, '']
296 lines = ['In[%s]:' % cell.prompt_number, '']
291 lines.extend(rst_directive('.. code:: python', cell.input))
297 lines.extend(rst_directive('.. code:: python', cell.input))
292
298
293 for output in cell.outputs:
299 for output in cell.outputs:
294 conv_fn = self.dispatch(output.output_type)
300 conv_fn = self.dispatch(output.output_type)
295 lines.extend(conv_fn(output))
301 lines.extend(conv_fn(output))
296
302
297 return lines
303 return lines
298
304
299 @DocInherit
305 @DocInherit
300 def render_markdown(self, cell):
306 def render_markdown(self, cell):
301 return [cell.source]
307 return [cell.source]
302
308
303 @DocInherit
309 @DocInherit
304 def render_raw(self, cell):
310 def render_raw(self, cell):
305 if self.raw_as_verbatim:
311 if self.raw_as_verbatim:
306 return ['::', '', indent(cell.source), '']
312 return ['::', '', indent(cell.source), '']
307 else:
313 else:
308 return [cell.source]
314 return [cell.source]
309
315
310 @DocInherit
316 @DocInherit
311 def render_pyout(self, output):
317 def render_pyout(self, output):
312 lines = ['Out[%s]:' % output.prompt_number, '']
318 lines = ['Out[%s]:' % output.prompt_number, '']
313
319
314 # output is a dictionary like object with type as a key
320 # output is a dictionary like object with type as a key
315 if 'latex' in output:
321 if 'latex' in output:
316 lines.extend(rst_directive('.. math::', output.latex))
322 lines.extend(rst_directive('.. math::', output.latex))
317
323
318 if 'text' in output:
324 if 'text' in output:
319 lines.extend(rst_directive('.. parsed-literal::', output.text))
325 lines.extend(rst_directive('.. parsed-literal::', output.text))
320
326
321 return lines
327 return lines
322
328
323 @DocInherit
329 @DocInherit
324 def render_pyerr(self, output):
330 def render_pyerr(self, output):
325 # Note: a traceback is a *list* of frames.
331 # Note: a traceback is a *list* of frames.
326 return ['::', '', indent(remove_ansi('\n'.join(output.traceback))), '']
332 return ['::', '', indent(remove_ansi('\n'.join(output.traceback))), '']
327
333
328 @DocInherit
334 @DocInherit
329 def _img_lines(self, img_file):
335 def _img_lines(self, img_file):
330 return ['.. image:: %s' % img_file, '']
336 return ['.. image:: %s' % img_file, '']
331
337
332 @DocInherit
338 @DocInherit
333 def render_stream(self, output):
339 def render_stream(self, output):
334 lines = []
340 lines = []
335
341
336 if 'text' in output:
342 if 'text' in output:
337 lines.extend(rst_directive('.. parsed-literal::', output.text))
343 lines.extend(rst_directive('.. parsed-literal::', output.text))
338
344
339 return lines
345 return lines
340
346
341 @DocInherit
347 @DocInherit
342 def _unknown_lines(self, data):
348 def _unknown_lines(self, data):
343 return rst_directive('.. warning:: Unknown cell') + [data]
349 return rst_directive('.. warning:: Unknown cell') + [data]
344
350
345
351
346 class ConverterQuickHTML(Converter):
352 class ConverterQuickHTML(Converter):
347 extension = 'html'
353 extension = 'html'
348
354
349 def in_tag(self, tag, src):
355 def in_tag(self, tag, src):
350 """Return a list of elements bracketed by the given tag"""
356 """Return a list of elements bracketed by the given tag"""
351 return ['<%s>' % tag, src, '</%s>' % tag]
357 return ['<%s>' % tag, src, '</%s>' % tag]
352
358
353 def optional_header(self):
359 def optional_header(self):
354 # XXX: inject the IPython standard CSS into here
360 # XXX: inject the IPython standard CSS into here
355 s = """<html>
361 s = """<html>
356 <head>
362 <head>
357 </head>
363 </head>
358
364
359 <body>
365 <body>
360 """
366 """
361 return s.splitlines()
367 return s.splitlines()
362
368
363 def optional_footer(self):
369 def optional_footer(self):
364 s = """</body>
370 s = """</body>
365 </html>
371 </html>
366 """
372 """
367 return s.splitlines()
373 return s.splitlines()
368
374
369 @DocInherit
375 @DocInherit
370 def render_heading(self, cell):
376 def render_heading(self, cell):
371 marker = cell.level
377 marker = cell.level
372 return ['<h{1}>\n {0}\n</h{1}>'.format(cell.source, marker)]
378 return ['<h{1}>\n {0}\n</h{1}>'.format(cell.source, marker)]
373
379
374 @DocInherit
380 @DocInherit
375 def render_code(self, cell):
381 def render_code(self, cell):
376 if not cell.input:
382 if not cell.input:
377 return []
383 return []
378
384
379 lines = ['<table>']
385 lines = ['<table>']
380 lines.append('<tr><td><tt>In [<b>%s</b>]:</tt></td><td><tt>' % cell.prompt_number)
386 lines.append('<tr><td><tt>In [<b>%s</b>]:</tt></td><td><tt>' % cell.prompt_number)
381 lines.append("<br>\n".join(cell.input.splitlines()))
387 lines.append("<br>\n".join(cell.input.splitlines()))
382 lines.append('</tt></td></tr>')
388 lines.append('</tt></td></tr>')
383
389
384 for output in cell.outputs:
390 for output in cell.outputs:
385 lines.append('<tr><td></td><td>')
391 lines.append('<tr><td></td><td>')
386 conv_fn = self.dispatch(output.output_type)
392 conv_fn = self.dispatch(output.output_type)
387 lines.extend(conv_fn(output))
393 lines.extend(conv_fn(output))
388 lines.append('</td></tr>')
394 lines.append('</td></tr>')
389
395
390 lines.append('</table>')
396 lines.append('</table>')
391 return lines
397 return lines
392
398
393 @DocInherit
399 @DocInherit
394 def render_markdown(self, cell):
400 def render_markdown(self, cell):
395 return self.in_tag('pre', cell.source)
401 return self.in_tag('pre', cell.source)
396
402
397 @DocInherit
403 @DocInherit
398 def render_raw(self, cell):
404 def render_raw(self, cell):
399 if self.raw_as_verbatim:
405 if self.raw_as_verbatim:
400 return self.in_tag('pre', cell.source)
406 return self.in_tag('pre', cell.source)
401 else:
407 else:
402 return [cell.source]
408 return [cell.source]
403
409
404 @DocInherit
410 @DocInherit
405 def render_pyout(self, output):
411 def render_pyout(self, output):
406 lines = ['<tr><td><tt>Out[<b>%s</b>]:</tt></td></tr>' %
412 lines = ['<tr><td><tt>Out[<b>%s</b>]:</tt></td></tr>' %
407 output.prompt_number, '<td>']
413 output.prompt_number, '<td>']
408
414
409 # output is a dictionary like object with type as a key
415 # output is a dictionary like object with type as a key
410 for out_type in ('text', 'latex'):
416 for out_type in ('text', 'latex'):
411 if out_type in output:
417 if out_type in output:
412 lines.extend(self.in_tag('pre', indent(output[out_type])))
418 lines.extend(self.in_tag('pre', indent(output[out_type])))
413
419
414 return lines
420 return lines
415
421
416 @DocInherit
422 @DocInherit
417 def render_pyerr(self, output):
423 def render_pyerr(self, output):
418 # Note: a traceback is a *list* of frames.
424 # Note: a traceback is a *list* of frames.
419 return self.in_tag('pre', remove_ansi('\n'.join(output.traceback)))
425 return self.in_tag('pre', remove_ansi('\n'.join(output.traceback)))
420
426
421 @DocInherit
427 @DocInherit
422 def _img_lines(self, img_file):
428 def _img_lines(self, img_file):
423 return ['<img src="%s">' % img_file, '']
429 return ['<img src="%s">' % img_file, '']
424
430
425 @DocInherit
431 @DocInherit
426 def render_stream(self, output):
432 def render_stream(self, output):
427 lines = []
433 lines = []
428
434
429 if 'text' in output:
435 if 'text' in output:
430 lines.append(output.text)
436 lines.append(output.text)
431
437
432 return lines
438 return lines
433
439
434 @DocInherit
440 @DocInherit
435 def _unknown_lines(self, data):
441 def _unknown_lines(self, data):
436 return ['<h2>Warning:: Unknown cell</h2>'] + self.in_tag('pre', data)
442 return ['<h2>Warning:: Unknown cell</h2>'] + self.in_tag('pre', data)
437
443
438
444
439 class ConverterLaTeX(Converter):
445 class ConverterLaTeX(Converter):
440 """Converts a notebook to a .tex file suitable for pdflatex.
446 """Converts a notebook to a .tex file suitable for pdflatex.
441
447
442 Note: this converter *needs*:
448 Note: this converter *needs*:
443
449
444 - `pandoc`: for all conversion of markdown cells. If your notebook only
450 - `pandoc`: for all conversion of markdown cells. If your notebook only
445 has Raw cells, pandoc will not be needed.
451 has Raw cells, pandoc will not be needed.
446
452
447 - `inkscape`: if your notebook has SVG figures. These need to be
453 - `inkscape`: if your notebook has SVG figures. These need to be
448 converted to PDF before inclusion in the TeX file, as LaTeX doesn't
454 converted to PDF before inclusion in the TeX file, as LaTeX doesn't
449 understand SVG natively.
455 understand SVG natively.
450
456
451 You will in general obtain much better final PDF results if you configure
457 You will in general obtain much better final PDF results if you configure
452 the matplotlib backend to create SVG output with
458 the matplotlib backend to create SVG output with
453
459
454 %config InlineBackend.figure_format = 'svg'
460 %config InlineBackend.figure_format = 'svg'
455
461
456 (or set the equivalent flag at startup or in your configuration profile).
462 (or set the equivalent flag at startup or in your configuration profile).
457 """
463 """
458 extension = 'tex'
464 extension = 'tex'
459 documentclass = 'article'
465 documentclass = 'article'
460 documentclass_options = '11pt,english'
466 documentclass_options = '11pt,english'
461 heading_map = {1: r'\section',
467 heading_map = {1: r'\section',
462 2: r'\subsection',
468 2: r'\subsection',
463 3: r'\subsubsection',
469 3: r'\subsubsection',
464 4: r'\paragraph',
470 4: r'\paragraph',
465 5: r'\subparagraph',
471 5: r'\subparagraph',
466 6: r'\subparagraph'}
472 6: r'\subparagraph'}
467
473
468 def in_env(self, environment, lines):
474 def in_env(self, environment, lines):
469 """Return list of environment lines for input lines
475 """Return list of environment lines for input lines
470
476
471 Parameters
477 Parameters
472 ----------
478 ----------
473 env : string
479 env : string
474 Name of the environment to bracket with begin/end.
480 Name of the environment to bracket with begin/end.
475
481
476 lines: """
482 lines: """
477 out = [r'\begin{%s}' % environment]
483 out = [r'\begin{%s}' % environment]
478 if isinstance(lines, basestring):
484 if isinstance(lines, basestring):
479 out.append(lines)
485 out.append(lines)
480 else: # list
486 else: # list
481 out.extend(lines)
487 out.extend(lines)
482 out.append(r'\end{%s}' % environment)
488 out.append(r'\end{%s}' % environment)
483 return out
489 return out
484
490
485 def convert(self):
491 def convert(self):
486 # The main body is done by the logic in the parent class, and that's
492 # The main body is done by the logic in the parent class, and that's
487 # all we need if preamble support has been turned off.
493 # all we need if preamble support has been turned off.
488 body = super(ConverterLaTeX, self).convert()
494 body = super(ConverterLaTeX, self).convert()
489 if not self.with_preamble:
495 if not self.with_preamble:
490 return body
496 return body
491 # But if preamble is on, then we need to construct a proper, standalone
497 # But if preamble is on, then we need to construct a proper, standalone
492 # tex file.
498 # tex file.
493
499
494 # Tag the document at the top and set latex class
500 # Tag the document at the top and set latex class
495 final = [ r'%% This file was auto-generated by IPython, do NOT edit',
501 final = [ r'%% This file was auto-generated by IPython, do NOT edit',
496 r'%% Conversion from the original notebook file:',
502 r'%% Conversion from the original notebook file:',
497 r'%% {0}'.format(self.infile),
503 r'%% {0}'.format(self.infile),
498 r'%%',
504 r'%%',
499 r'\documentclass[%s]{%s}' % (self.documentclass_options,
505 r'\documentclass[%s]{%s}' % (self.documentclass_options,
500 self.documentclass),
506 self.documentclass),
501 '',
507 '',
502 ]
508 ]
503 # Load our own preamble, which is stored next to the main file. We
509 # Load our own preamble, which is stored next to the main file. We
504 # need to be careful in case the script entry point is a symlink
510 # need to be careful in case the script entry point is a symlink
505 myfile = __file__ if not os.path.islink(__file__) else \
511 myfile = __file__ if not os.path.islink(__file__) else \
506 os.readlink(__file__)
512 os.readlink(__file__)
507 with open(os.path.join(os.path.dirname(myfile), 'preamble.tex')) as f:
513 with open(os.path.join(os.path.dirname(myfile), 'preamble.tex')) as f:
508 final.append(f.read())
514 final.append(f.read())
509
515
510 # Load any additional user-supplied preamble
516 # Load any additional user-supplied preamble
511 if self.user_preamble:
517 if self.user_preamble:
512 final.extend(['', '%% Adding user preamble from file:',
518 final.extend(['', '%% Adding user preamble from file:',
513 '%% {0}'.format(self.user_preamble), ''])
519 '%% {0}'.format(self.user_preamble), ''])
514 with open(self.user_preamble) as f:
520 with open(self.user_preamble) as f:
515 final.append(f.read())
521 final.append(f.read())
516
522
517 # Include document body
523 # Include document body
518 final.extend([ r'\begin{document}', '',
524 final.extend([ r'\begin{document}', '',
519 body,
525 body,
520 r'\end{document}', ''])
526 r'\end{document}', ''])
521 # Retun value must be a string
527 # Retun value must be a string
522 return '\n'.join(final)
528 return '\n'.join(final)
523
529
524 @DocInherit
530 @DocInherit
525 def render_heading(self, cell):
531 def render_heading(self, cell):
526 marker = self.heading_map[cell.level]
532 marker = self.heading_map[cell.level]
527 return ['%s{%s}' % (marker, cell.source) ]
533 return ['%s{%s}' % (marker, cell.source) ]
528
534
529 @DocInherit
535 @DocInherit
530 def render_code(self, cell):
536 def render_code(self, cell):
531 if not cell.input:
537 if not cell.input:
532 return []
538 return []
533
539
534 # Cell codes first carry input code, we use lstlisting for that
540 # Cell codes first carry input code, we use lstlisting for that
535 lines = [r'\begin{codecell}']
541 lines = [r'\begin{codecell}']
536
542
537 lines.extend(self.in_env('codeinput',
543 lines.extend(self.in_env('codeinput',
538 self.in_env('lstlisting', cell.input)))
544 self.in_env('lstlisting', cell.input)))
539
545
540 outlines = []
546 outlines = []
541 for output in cell.outputs:
547 for output in cell.outputs:
542 conv_fn = self.dispatch(output.output_type)
548 conv_fn = self.dispatch(output.output_type)
543 outlines.extend(conv_fn(output))
549 outlines.extend(conv_fn(output))
544
550
545 # And then output of many possible types; use a frame for all of it.
551 # And then output of many possible types; use a frame for all of it.
546 if outlines:
552 if outlines:
547 lines.extend(self.in_env('codeoutput', outlines))
553 lines.extend(self.in_env('codeoutput', outlines))
548
554
549 lines.append(r'\end{codecell}')
555 lines.append(r'\end{codecell}')
550
556
551 return lines
557 return lines
552
558
553
559
554 @DocInherit
560 @DocInherit
555 def _img_lines(self, img_file):
561 def _img_lines(self, img_file):
556 return self.in_env('center',
562 return self.in_env('center',
557 [r'\includegraphics[width=3in]{%s}' % img_file, r'\par'])
563 [r'\includegraphics[width=3in]{%s}' % img_file, r'\par'])
558
564
559 def _svg_lines(self, img_file):
565 def _svg_lines(self, img_file):
560 base_file = os.path.splitext(img_file)[0]
566 base_file = os.path.splitext(img_file)[0]
561 pdf_file = base_file + '.pdf'
567 pdf_file = base_file + '.pdf'
562 subprocess.check_call(['inkscape', '--export-pdf=%s' % pdf_file,
568 subprocess.check_call([ inkscape, '--export-pdf=%s' % pdf_file,
563 img_file])
569 img_file])
564 return self._img_lines(pdf_file)
570 return self._img_lines(pdf_file)
565
571
566 @DocInherit
572 @DocInherit
567 def render_stream(self, output):
573 def render_stream(self, output):
568 lines = []
574 lines = []
569
575
570 if 'text' in output:
576 if 'text' in output:
571 lines.extend(self.in_env('verbatim', output.text.strip()))
577 lines.extend(self.in_env('verbatim', output.text.strip()))
572
578
573 return lines
579 return lines
574
580
575 @DocInherit
581 @DocInherit
576 def render_markdown(self, cell):
582 def render_markdown(self, cell):
577 return [markdown2latex(cell.source)]
583 return [markdown2latex(cell.source)]
578
584
579 @DocInherit
585 @DocInherit
580 def render_pyout(self, output):
586 def render_pyout(self, output):
581 lines = []
587 lines = []
582
588
583 # output is a dictionary like object with type as a key
589 # output is a dictionary like object with type as a key
584 if 'latex' in output:
590 if 'latex' in output:
585 lines.extend(output.latex)
591 lines.extend(output.latex)
586
592
587 if 'text' in output:
593 if 'text' in output:
588 lines.extend(self.in_env('verbatim', output.text))
594 lines.extend(self.in_env('verbatim', output.text))
589
595
590 return lines
596 return lines
591
597
592 @DocInherit
598 @DocInherit
593 def render_pyerr(self, output):
599 def render_pyerr(self, output):
594 # Note: a traceback is a *list* of frames.
600 # Note: a traceback is a *list* of frames.
595 return self.in_env('traceback',
601 return self.in_env('traceback',
596 self.in_env('verbatim',
602 self.in_env('verbatim',
597 remove_ansi('\n'.join(output.traceback))))
603 remove_ansi('\n'.join(output.traceback))))
598
604
599 @DocInherit
605 @DocInherit
600 def render_raw(self, cell):
606 def render_raw(self, cell):
601 if self.raw_as_verbatim:
607 if self.raw_as_verbatim:
602 return self.in_env('verbatim', cell.source)
608 return self.in_env('verbatim', cell.source)
603 else:
609 else:
604 return [cell.source]
610 return [cell.source]
605
611
606 @DocInherit
612 @DocInherit
607 def _unknown_lines(self, data):
613 def _unknown_lines(self, data):
608 return [r'{\vspace{5mm}\bf WARNING:: unknown cell:}'] + \
614 return [r'{\vspace{5mm}\bf WARNING:: unknown cell:}'] + \
609 self.in_env('verbatim', data)
615 self.in_env('verbatim', data)
610
616
611 #-----------------------------------------------------------------------------
617 #-----------------------------------------------------------------------------
612 # Standalone conversion functions
618 # Standalone conversion functions
613 #-----------------------------------------------------------------------------
619 #-----------------------------------------------------------------------------
614
620
615 def rst2simplehtml(infile):
621 def rst2simplehtml(infile):
616 """Convert a rst file to simplified html suitable for blogger.
622 """Convert a rst file to simplified html suitable for blogger.
617
623
618 This just runs rst2html with certain parameters to produce really simple
624 This just runs rst2html with certain parameters to produce really simple
619 html and strips the document header, so the resulting file can be easily
625 html and strips the document header, so the resulting file can be easily
620 pasted into a blogger edit window.
626 pasted into a blogger edit window.
621 """
627 """
622
628
623 # This is the template for the rst2html call that produces the cleanest,
629 # This is the template for the rst2html call that produces the cleanest,
624 # simplest html I could find. This should help in making it easier to
630 # simplest html I could find. This should help in making it easier to
625 # paste into the blogspot html window, though I'm still having problems
631 # paste into the blogspot html window, though I'm still having problems
626 # with linebreaks there...
632 # with linebreaks there...
627 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
633 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
628 "--no-generator --no-datestamp --no-source-link "
634 "--no-generator --no-datestamp --no-source-link "
629 "--no-toc-backlinks --no-section-numbering "
635 "--no-toc-backlinks --no-section-numbering "
630 "--strip-comments ")
636 "--strip-comments ")
631
637
632 cmd = "%s %s" % (cmd_template, infile)
638 cmd = "%s %s" % (cmd_template, infile)
633 proc = subprocess.Popen(cmd,
639 proc = subprocess.Popen(cmd,
634 stdout=subprocess.PIPE,
640 stdout=subprocess.PIPE,
635 stderr=subprocess.PIPE,
641 stderr=subprocess.PIPE,
636 shell=True)
642 shell=True)
637 html, stderr = proc.communicate()
643 html, stderr = proc.communicate()
638 if stderr:
644 if stderr:
639 raise IOError(stderr)
645 raise IOError(stderr)
640
646
641 # Make an iterator so breaking out holds state. Our implementation of
647 # Make an iterator so breaking out holds state. Our implementation of
642 # searching for the html body below is basically a trivial little state
648 # searching for the html body below is basically a trivial little state
643 # machine, so we need this.
649 # machine, so we need this.
644 walker = iter(html.splitlines())
650 walker = iter(html.splitlines())
645
651
646 # Find start of main text, break out to then print until we find end /div.
652 # Find start of main text, break out to then print until we find end /div.
647 # This may only work if there's a real title defined so we get a 'div class'
653 # This may only work if there's a real title defined so we get a 'div class'
648 # tag, I haven't really tried.
654 # tag, I haven't really tried.
649 for line in walker:
655 for line in walker:
650 if line.startswith('<body>'):
656 if line.startswith('<body>'):
651 break
657 break
652
658
653 newfname = os.path.splitext(infile)[0] + '.html'
659 newfname = os.path.splitext(infile)[0] + '.html'
654 with open(newfname, 'w') as f:
660 with open(newfname, 'w') as f:
655 for line in walker:
661 for line in walker:
656 if line.startswith('</body>'):
662 if line.startswith('</body>'):
657 break
663 break
658 f.write(line)
664 f.write(line)
659 f.write('\n')
665 f.write('\n')
660
666
661 return newfname
667 return newfname
662
668
663 known_formats = "rst (default), html, quick-html, latex"
669 known_formats = "rst (default), html, quick-html, latex"
664
670
665 def main(infile, format='rst'):
671 def main(infile, format='rst'):
666 """Convert a notebook to html in one step"""
672 """Convert a notebook to html in one step"""
667 # XXX: this is just quick and dirty for now. When adding a new format,
673 # XXX: this is just quick and dirty for now. When adding a new format,
668 # make sure to add it to the `known_formats` string above, which gets
674 # make sure to add it to the `known_formats` string above, which gets
669 # printed in in the catch-all else, as well as in the help
675 # printed in in the catch-all else, as well as in the help
670 if format == 'rst':
676 if format == 'rst':
671 converter = ConverterRST(infile)
677 converter = ConverterRST(infile)
672 converter.render()
678 converter.render()
673 elif format == 'html':
679 elif format == 'html':
674 #Currently, conversion to html is a 2 step process, nb->rst->html
680 #Currently, conversion to html is a 2 step process, nb->rst->html
675 converter = ConverterRST(infile)
681 converter = ConverterRST(infile)
676 rstfname = converter.render()
682 rstfname = converter.render()
677 rst2simplehtml(rstfname)
683 rst2simplehtml(rstfname)
678 elif format == 'quick-html':
684 elif format == 'quick-html':
679 converter = ConverterQuickHTML(infile)
685 converter = ConverterQuickHTML(infile)
680 rstfname = converter.render()
686 rstfname = converter.render()
681 elif format == 'latex':
687 elif format == 'latex':
682 converter = ConverterLaTeX(infile)
688 converter = ConverterLaTeX(infile)
683 latexfname = converter.render()
689 latexfname = converter.render()
684 else:
690 else:
685 raise SystemExit("Unknown format '%s', " % format +
691 raise SystemExit("Unknown format '%s', " % format +
686 "known formats are: " + known_formats)
692 "known formats are: " + known_formats)
687
693
688 #-----------------------------------------------------------------------------
694 #-----------------------------------------------------------------------------
689 # Script main
695 # Script main
690 #-----------------------------------------------------------------------------
696 #-----------------------------------------------------------------------------
691
697
692 if __name__ == '__main__':
698 if __name__ == '__main__':
693 parser = argparse.ArgumentParser(description=__doc__,
699 parser = argparse.ArgumentParser(description=__doc__,
694 formatter_class=argparse.RawTextHelpFormatter)
700 formatter_class=argparse.RawTextHelpFormatter)
695 # TODO: consider passing file like object around, rather than filenames
701 # TODO: consider passing file like object around, rather than filenames
696 # would allow us to process stdin, or even http streams
702 # would allow us to process stdin, or even http streams
697 #parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
703 #parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
698
704
699 #Require a filename as a positional argument
705 #Require a filename as a positional argument
700 parser.add_argument('infile', nargs=1)
706 parser.add_argument('infile', nargs=1)
701 parser.add_argument('-f', '--format', default='rst',
707 parser.add_argument('-f', '--format', default='rst',
702 help='Output format. Supported formats: \n' +
708 help='Output format. Supported formats: \n' +
703 known_formats)
709 known_formats)
704 args = parser.parse_args()
710 args = parser.parse_args()
705 main(infile=args.infile[0], format=args.format)
711 main(infile=args.infile[0], format=args.format)
General Comments 0
You need to be logged in to leave comments. Login now