nbconvert.py
1461 lines
| 43.5 KiB
| text/x-python
|
PythonLexer
Fernando Perez
|
r6220 | #!/usr/bin/env python | ||
Paul Ivanov
|
r6280 | """Convert IPython notebooks to other formats, such as ReST, and HTML. | ||
Fernando Perez
|
r6220 | |||
Paul Ivanov
|
r6280 | Example: | ||
MinRK
|
r7914 | ./nbconvert.py --format rst file.ipynb | ||
Fernando Perez
|
r6220 | |||
MinRK
|
r7914 | Produces 'file.rst', along with auto-generated figure files | ||
called nb_figure_NN.png. | ||||
Fernando Perez
|
r6220 | """ | ||
Fernando Perez
|
r6677 | #----------------------------------------------------------------------------- | ||
# Imports | ||||
#----------------------------------------------------------------------------- | ||||
Fernando Perez
|
r6671 | from __future__ import print_function | ||
Fernando Perez
|
r6220 | |||
Fernando Perez
|
r6677 | # Stdlib | ||
Fernando Perez
|
r6672 | import codecs | ||
MinRK
|
r7914 | import io | ||
Fernando Perez
|
r6674 | import logging | ||
Fernando Perez
|
r6220 | import os | ||
Fernando Perez
|
r6671 | import pprint | ||
import re | ||||
Fernando Perez
|
r6220 | import subprocess | ||
import sys | ||||
Jonathan Taylor
|
r7366 | import json | ||
import copy | ||||
from shutil import rmtree | ||||
Matthias BUSSONNIER
|
r8163 | from markdown import markdown | ||
Fernando Perez
|
r6671 | |||
Matthias BUSSONNIER
|
r6678 | inkscape = 'inkscape' | ||
if sys.platform == 'darwin': | ||||
inkscape = '/Applications/Inkscape.app/Contents/Resources/bin/inkscape' | ||||
if not os.path.exists(inkscape): | ||||
inkscape = None | ||||
Fernando Perez
|
r6677 | # From IPython | ||
Paul Ivanov
|
r6262 | from IPython.external import argparse | ||
Fernando Perez
|
r6220 | from IPython.nbformat import current as nbformat | ||
Paul Ivanov
|
r6257 | from IPython.utils.text import indent | ||
Jonathan Taylor
|
r7366 | from IPython.nbformat.v3.nbjson import BytesEncoder | ||
MinRK
|
r7914 | from IPython.utils import path, py3compat | ||
# local | ||||
from decorators import DocInherit | ||||
from lexers import IPythonLexer | ||||
Fernando Perez
|
r6220 | |||
Fernando Perez
|
r6677 | #----------------------------------------------------------------------------- | ||
# Utility functions | ||||
#----------------------------------------------------------------------------- | ||||
Jonathan Taylor
|
r7368 | def DocInherit(f): | ||
return f | ||||
Fernando Perez
|
r6677 | def remove_fake_files_url(cell): | ||
"""Remove from the cell source the /files/ pseudo-path we use. | ||||
""" | ||||
src = cell.source | ||||
cell.source = src.replace('/files/', '') | ||||
MinRK
|
r7914 | # ANSI color functions: | ||
Fernando Perez
|
r6671 | def remove_ansi(src): | ||
"""Strip all ANSI color escape sequences from input string. | ||||
Parameters | ||||
---------- | ||||
src : string | ||||
Returns | ||||
------- | ||||
string | ||||
""" | ||||
return re.sub(r'\033\[(0|\d;\d\d)m', '', src) | ||||
Fernando Perez
|
r6677 | |||
MinRK
|
r7914 | def ansi2html(txt): | ||
"""Render ANSI colors as HTML colors | ||||
This is equivalent to util.fixConsole in utils.js | ||||
Parameters | ||||
---------- | ||||
txt : string | ||||
Returns | ||||
------- | ||||
string | ||||
""" | ||||
ansi_colormap = { | ||||
'30': 'ansiblack', | ||||
'31': 'ansired', | ||||
'32': 'ansigreen', | ||||
'33': 'ansiyellow', | ||||
'34': 'ansiblue', | ||||
'35': 'ansipurple', | ||||
'36': 'ansicyan', | ||||
'37': 'ansigrey', | ||||
'01': 'ansibold', | ||||
} | ||||
# do ampersand first | ||||
txt = txt.replace('&', '&') | ||||
html_escapes = { | ||||
'<': '<', | ||||
'>': '>', | ||||
"'": ''', | ||||
'"': '"', | ||||
'`': '`', | ||||
} | ||||
for c, escape in html_escapes.iteritems(): | ||||
txt = txt.replace(c, escape) | ||||
ansi_re = re.compile('\x1b' + r'\[([\dA-Fa-f;]*?)m') | ||||
m = ansi_re.search(txt) | ||||
opened = False | ||||
cmds = [] | ||||
opener = '' | ||||
closer = '' | ||||
while m: | ||||
cmds = m.groups()[0].split(';') | ||||
closer = '</span>' if opened else '' | ||||
opened = len(cmds) > 1 or cmds[0] != '0'*len(cmds[0]); | ||||
classes = [] | ||||
for cmd in cmds: | ||||
if cmd in ansi_colormap: | ||||
classes.append(ansi_colormap.get(cmd)) | ||||
if classes: | ||||
opener = '<span class="%s">' % (' '.join(classes)) | ||||
else: | ||||
opener = '' | ||||
txt = re.sub(ansi_re, closer + opener, txt, 1) | ||||
m = ansi_re.search(txt) | ||||
if opened: | ||||
txt += '</span>' | ||||
return txt | ||||
Fernando Perez
|
r6671 | # Pandoc-dependent code | ||
MinRK
|
r7914 | |||
Fernando Perez
|
r6671 | def markdown2latex(src): | ||
"""Convert a markdown string to LaTeX via pandoc. | ||||
Fernando Perez
|
r6220 | |||
Fernando Perez
|
r6671 | This function will raise an error if pandoc is not installed. | ||
Any error messages generated by pandoc are printed to stderr. | ||||
Parameters | ||||
---------- | ||||
src : string | ||||
Input string, assumed to be valid markdown. | ||||
Returns | ||||
------- | ||||
out : string | ||||
Output as returned by pandoc. | ||||
Fernando Perez
|
r6220 | """ | ||
Fernando Perez
|
r6671 | p = subprocess.Popen('pandoc -f markdown -t latex'.split(), | ||
stdin=subprocess.PIPE, stdout=subprocess.PIPE) | ||||
Matthias BUSSONNIER
|
r7046 | out, err = p.communicate(src.encode('utf-8')) | ||
Fernando Perez
|
r6671 | if err: | ||
print(err, file=sys.stderr) | ||||
#print('*'*20+'\n', out, '\n'+'*'*20) # dbg | ||||
Matthias BUSSONNIER
|
r7046 | return unicode(out,'utf-8') | ||
Fernando Perez
|
r6220 | |||
Fernando Perez
|
r7345 | def markdown2rst(src): | ||
"""Convert a markdown string to LaTeX via pandoc. | ||||
This function will raise an error if pandoc is not installed. | ||||
Any error messages generated by pandoc are printed to stderr. | ||||
Parameters | ||||
---------- | ||||
src : string | ||||
Input string, assumed to be valid markdown. | ||||
Returns | ||||
------- | ||||
out : string | ||||
Output as returned by pandoc. | ||||
""" | ||||
p = subprocess.Popen('pandoc -f markdown -t rst'.split(), | ||||
stdin=subprocess.PIPE, stdout=subprocess.PIPE) | ||||
out, err = p.communicate(src.encode('utf-8')) | ||||
if err: | ||||
print(err, file=sys.stderr) | ||||
#print('*'*20+'\n', out, '\n'+'*'*20) # dbg | ||||
return unicode(out,'utf-8') | ||||
Fernando Perez
|
r6222 | def rst_directive(directive, text=''): | ||
out = [directive, ''] | ||||
if text: | ||||
out.extend([indent(text), '']) | ||||
return out | ||||
Fernando Perez
|
r6220 | |||
MinRK
|
r7914 | |||
def coalesce_streams(outputs): | ||||
"""merge consecutive sequences of stream output into single stream | ||||
to prevent extra newlines inserted at flush calls | ||||
TODO: handle \r deletion | ||||
""" | ||||
new_outputs = [] | ||||
last = outputs[0] | ||||
new_outputs = [last] | ||||
for output in outputs[1:]: | ||||
if (output.output_type == 'stream' and | ||||
last.output_type == 'stream' and | ||||
last.stream == output.stream | ||||
): | ||||
last.text += output.text | ||||
else: | ||||
new_outputs.append(output) | ||||
return new_outputs | ||||
Fernando Perez
|
r6677 | #----------------------------------------------------------------------------- | ||
# Class declarations | ||||
#----------------------------------------------------------------------------- | ||||
Anton I. Sipos
|
r6261 | |||
Paul Ivanov
|
r6239 | class ConversionException(Exception): | ||
pass | ||||
Anton I. Sipos
|
r6253 | |||
Paul Ivanov
|
r6239 | class Converter(object): | ||
default_encoding = 'utf-8' | ||||
Fernando Perez
|
r6672 | extension = str() | ||
Fernando Perez
|
r6671 | figures_counter = 0 | ||
Fernando Perez
|
r6672 | infile = str() | ||
infile_dir = str() | ||||
infile_root = str() | ||||
files_dir = str() | ||||
Fernando Perez
|
r6673 | with_preamble = True | ||
user_preamble = None | ||||
output = str() | ||||
Fernando Perez
|
r6674 | raw_as_verbatim = False | ||
Fernando Perez
|
r7871 | |||
Anton I. Sipos
|
r6261 | def __init__(self, infile): | ||
self.infile = infile | ||||
Jonathan Taylor
|
r7372 | self.infile_dir, infile_root = os.path.split(infile) | ||
infile_root = os.path.splitext(infile_root)[0] | ||||
files_dir = os.path.join(self.infile_dir, infile_root + '_files') | ||||
Fernando Perez
|
r6672 | if not os.path.isdir(files_dir): | ||
os.mkdir(files_dir) | ||||
self.infile_root = infile_root | ||||
self.files_dir = files_dir | ||||
Jonathan Taylor
|
r7372 | self.outbase = os.path.join(self.infile_dir, infile_root) | ||
Paul Ivanov
|
r6239 | |||
Fernando Perez
|
r8430 | def __del__(self): | ||
if not os.listdir(self.files_dir): | ||||
os.rmdir(self.files_dir) | ||||
Anton I. Sipos
|
r6253 | def dispatch(self, cell_type): | ||
Paul Ivanov
|
r6239 | """return cell_type dependent render method, for example render_code | ||
""" | ||||
Fernando Perez
|
r6671 | return getattr(self, 'render_' + cell_type, self.render_unknown) | ||
Paul Ivanov
|
r6239 | |||
Jonathan Taylor
|
r7373 | def dispatch_display_format(self, format): | ||
Jonathan Taylor
|
r7372 | """return output_type dependent render method, for example render_output_text | ||
""" | ||||
MinRK
|
r7914 | return getattr(self, 'render_display_format_' + format, self.render_unknown_display) | ||
Jonathan Taylor
|
r7372 | |||
Jonathan Taylor
|
r7366 | def convert(self, cell_separator='\n'): | ||
Paul Ivanov
|
r6239 | lines = [] | ||
Paul Ivanov
|
r6267 | lines.extend(self.optional_header()) | ||
Jonathan Taylor
|
r7366 | converted_cells = [] | ||
Fernando Perez
|
r6671 | for worksheet in self.nb.worksheets: | ||
for cell in worksheet.cells: | ||||
Fernando Perez
|
r6674 | #print(cell.cell_type) # dbg | ||
Fernando Perez
|
r6671 | conv_fn = self.dispatch(cell.cell_type) | ||
Fernando Perez
|
r6677 | if cell.cell_type in ('markdown', 'raw'): | ||
remove_fake_files_url(cell) | ||||
Jonathan Taylor
|
r7366 | converted_cells.append('\n'.join(conv_fn(cell))) | ||
cell_lines = cell_separator.join(converted_cells).split('\n') | ||||
lines.extend(cell_lines) | ||||
Paul Ivanov
|
r6267 | lines.extend(self.optional_footer()) | ||
Matthias BUSSONNIER
|
r7046 | return u'\n'.join(lines) | ||
Paul Ivanov
|
r6239 | |||
def render(self): | ||||
Anton I. Sipos
|
r6261 | "read, convert, and save self.infile" | ||
Jonathan Taylor
|
r7371 | if not hasattr(self, 'nb'): | ||
self.read() | ||||
Paul Ivanov
|
r6239 | self.output = self.convert() | ||
return self.save() | ||||
def read(self): | ||||
"read and parse notebook into NotebookNode called self.nb" | ||||
Anton I. Sipos
|
r6261 | with open(self.infile) as f: | ||
Paul Ivanov
|
r6239 | self.nb = nbformat.read(f, 'json') | ||
Jonathan Taylor
|
r7366 | def save(self, outfile=None, encoding=None): | ||
Paul Ivanov
|
r6239 | "read and parse notebook into self.nb" | ||
Jonathan Taylor
|
r7366 | if outfile is None: | ||
outfile = self.outbase + '.' + self.extension | ||||
Paul Ivanov
|
r6239 | if encoding is None: | ||
encoding = self.default_encoding | ||||
Matthias BUSSONNIER
|
r8174 | with io.open(outfile, 'w', encoding=encoding) as f: | ||
f.write(self.output) | ||||
Stefan van der Walt
|
r6680 | return os.path.abspath(outfile) | ||
Fernando Perez
|
r6220 | |||
Paul Ivanov
|
r6279 | def optional_header(self): | ||
return [] | ||||
Paul Ivanov
|
r6267 | |||
Paul Ivanov
|
r6279 | def optional_footer(self): | ||
return [] | ||||
Paul Ivanov
|
r6267 | |||
Fernando Perez
|
r6672 | def _new_figure(self, data, fmt): | ||
"""Create a new figure file in the given format. | ||||
Returns a path relative to the input file. | ||||
""" | ||||
figname = '%s_fig_%02i.%s' % (self.infile_root, | ||||
self.figures_counter, fmt) | ||||
Fernando Perez
|
r6671 | self.figures_counter += 1 | ||
Fernando Perez
|
r6672 | fullname = os.path.join(self.files_dir, figname) | ||
# Binary files are base64-encoded, SVG is already XML | ||||
if fmt in ('png', 'jpg', 'pdf'): | ||||
data = data.decode('base64') | ||||
fopen = lambda fname: open(fname, 'wb') | ||||
else: | ||||
fopen = lambda fname: codecs.open(fname, 'wb', self.default_encoding) | ||||
with fopen(fullname) as f: | ||||
f.write(data) | ||||
return fullname | ||||
Fernando Perez
|
r6671 | |||
Anton I. Sipos
|
r6253 | def render_heading(self, cell): | ||
Anton I. Sipos
|
r6266 | """convert a heading cell | ||
Returns list.""" | ||||
Anton I. Sipos
|
r6253 | raise NotImplementedError | ||
def render_code(self, cell): | ||||
Anton I. Sipos
|
r6266 | """Convert a code cell | ||
Returns list.""" | ||||
Anton I. Sipos
|
r6253 | raise NotImplementedError | ||
Paul Ivanov
|
r6249 | |||
Anton I. Sipos
|
r6253 | def render_markdown(self, cell): | ||
Anton I. Sipos
|
r6266 | """convert a markdown cell | ||
Returns list.""" | ||||
Anton I. Sipos
|
r6253 | raise NotImplementedError | ||
Paul Ivanov
|
r6249 | |||
Fernando Perez
|
r6672 | def _img_lines(self, img_file): | ||
"""Return list of lines to include an image file.""" | ||||
# Note: subclasses may choose to implement format-specific _FMT_lines | ||||
# methods if they so choose (FMT in {png, svg, jpg, pdf}). | ||||
raise NotImplementedError | ||||
Fernando Perez
|
r6671 | def render_display_data(self, output): | ||
Anton I. Sipos
|
r6266 | """convert display data from the output of a code cell | ||
Returns list. | ||||
""" | ||||
Fernando Perez
|
r6672 | lines = [] | ||
Jonathan Taylor
|
r7372 | for fmt in output.keys(): | ||
if fmt in ['png', 'svg', 'jpg', 'pdf']: | ||||
Fernando Perez
|
r6672 | img_file = self._new_figure(output[fmt], fmt) | ||
# Subclasses can have format-specific render functions (e.g., | ||||
# latex has to auto-convert all SVG to PDF first). | ||||
lines_fun = getattr(self, '_%s_lines' % fmt, None) | ||||
if not lines_fun: | ||||
lines_fun = self._img_lines | ||||
lines.extend(lines_fun(img_file)) | ||||
Jonathan Taylor
|
r7372 | elif fmt != 'output_type': | ||
Jonathan Taylor
|
r7373 | conv_fn = self.dispatch_display_format(fmt) | ||
Jonathan Taylor
|
r7372 | lines.extend(conv_fn(output)) | ||
Fernando Perez
|
r6672 | return lines | ||
Paul Ivanov
|
r6249 | |||
Fernando Perez
|
r6674 | def render_raw(self, cell): | ||
"""convert a cell with raw text | ||||
Anton I. Sipos
|
r6266 | |||
Returns list.""" | ||||
Anton I. Sipos
|
r6253 | raise NotImplementedError | ||
Fernando Perez
|
r6220 | |||
Fernando Perez
|
r6671 | def render_unknown(self, cell): | ||
"""Render cells of unkown type | ||||
Returns list.""" | ||||
Fernando Perez
|
r6674 | data = pprint.pformat(cell) | ||
MinRK
|
r7914 | logging.warning('Unknown cell: %s' % cell.cell_type) | ||
return self._unknown_lines(data) | ||||
def render_unknown_display(self, output, type): | ||||
"""Render cells of unkown type | ||||
Returns list.""" | ||||
data = pprint.pformat(output) | ||||
logging.warning('Unknown output: %s' % output.output_type) | ||||
Fernando Perez
|
r6674 | return self._unknown_lines(data) | ||
Jonathan Taylor
|
r7373 | def render_stream(self, output): | ||
"""render the stream part of an output | ||||
Returns list. | ||||
Identical to render_display_format_text | ||||
""" | ||||
return self.render_display_format_text(output) | ||||
def render_pyout(self, output): | ||||
"""convert pyout part of a code cell | ||||
Returns list.""" | ||||
raise NotImplementedError | ||||
def render_pyerr(self, output): | ||||
"""convert pyerr part of a code cell | ||||
Returns list.""" | ||||
raise NotImplementedError | ||||
Fernando Perez
|
r6674 | def _unknown_lines(self, data): | ||
"""Return list of lines for an unknown cell. | ||||
Parameters | ||||
---------- | ||||
data : str | ||||
The content of the unknown data as a single string. | ||||
""" | ||||
Fernando Perez
|
r6671 | raise NotImplementedError | ||
Jonathan Taylor
|
r7372 | # These are the possible format types in an output node | ||
Jonathan Taylor
|
r7373 | def render_display_format_text(self, output): | ||
Jonathan Taylor
|
r7372 | """render the text part of an output | ||
Returns list. | ||||
""" | ||||
raise NotImplementedError | ||||
Jonathan Taylor
|
r7373 | def render_display_format_html(self, output): | ||
Jonathan Taylor
|
r7372 | """render the html part of an output | ||
Returns list. | ||||
""" | ||||
raise NotImplementedError | ||||
Jonathan Taylor
|
r7373 | def render_display_format_latex(self, output): | ||
Jonathan Taylor
|
r7372 | """render the latex part of an output | ||
Returns list. | ||||
""" | ||||
raise NotImplementedError | ||||
Jonathan Taylor
|
r7373 | def render_display_format_json(self, output): | ||
Jonathan Taylor
|
r7372 | """render the json part of an output | ||
Returns list. | ||||
""" | ||||
raise NotImplementedError | ||||
Jonathan Taylor
|
r7373 | def render_display_format_javascript(self, output): | ||
Jonathan Taylor
|
r7372 | """render the javascript part of an output | ||
Returns list. | ||||
""" | ||||
raise NotImplementedError | ||||
Paul Ivanov
|
r6249 | |||
Paul Ivanov
|
r6239 | class ConverterRST(Converter): | ||
extension = 'rst' | ||||
Paul Ivanov
|
r6264 | heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'} | ||
Anton I. Sipos
|
r6253 | |||
Anton I. Sipos
|
r6266 | @DocInherit | ||
Anton I. Sipos
|
r6253 | def render_heading(self, cell): | ||
Paul Ivanov
|
r6264 | marker = self.heading_level[cell.level] | ||
Anton I. Sipos
|
r6253 | return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))] | ||
Fernando Perez
|
r6220 | |||
Anton I. Sipos
|
r6266 | @DocInherit | ||
Anton I. Sipos
|
r6253 | def render_code(self, cell): | ||
Paul Ivanov
|
r6239 | if not cell.input: | ||
return [] | ||||
Fernando Perez
|
r6220 | |||
Paul Ivanov
|
r6239 | lines = ['In[%s]:' % cell.prompt_number, ''] | ||
lines.extend(rst_directive('.. code:: python', cell.input)) | ||||
Fernando Perez
|
r6220 | |||
Paul Ivanov
|
r6239 | for output in cell.outputs: | ||
conv_fn = self.dispatch(output.output_type) | ||||
lines.extend(conv_fn(output)) | ||||
Anton I. Sipos
|
r6266 | |||
Paul Ivanov
|
r6239 | return lines | ||
Fernando Perez
|
r6220 | |||
Anton I. Sipos
|
r6266 | @DocInherit | ||
Anton I. Sipos
|
r6253 | def render_markdown(self, cell): | ||
Fernando Perez
|
r7345 | #return [cell.source] | ||
return [markdown2rst(cell.source)] | ||||
Fernando Perez
|
r6220 | |||
Anton I. Sipos
|
r6266 | @DocInherit | ||
Fernando Perez
|
r6674 | def render_raw(self, cell): | ||
if self.raw_as_verbatim: | ||||
return ['::', '', indent(cell.source), ''] | ||||
else: | ||||
return [cell.source] | ||||
Anton I. Sipos
|
r6256 | |||
Anton I. Sipos
|
r6266 | @DocInherit | ||
Anton I. Sipos
|
r6253 | def render_pyout(self, output): | ||
Paul Ivanov
|
r6239 | lines = ['Out[%s]:' % output.prompt_number, ''] | ||
Paul Ivanov
|
r6249 | |||
Anton I. Sipos
|
r6252 | # output is a dictionary like object with type as a key | ||
Paul Ivanov
|
r6239 | if 'latex' in output: | ||
lines.extend(rst_directive('.. math::', output.latex)) | ||||
Fernando Perez
|
r6220 | |||
Paul Ivanov
|
r6239 | if 'text' in output: | ||
lines.extend(rst_directive('.. parsed-literal::', output.text)) | ||||
Fernando Perez
|
r6220 | |||
Paul Ivanov
|
r6239 | return lines | ||
Fernando Perez
|
r6220 | |||
Anton I. Sipos
|
r6266 | @DocInherit | ||
Fernando Perez
|
r6674 | def render_pyerr(self, output): | ||
# Note: a traceback is a *list* of frames. | ||||
return ['::', '', indent(remove_ansi('\n'.join(output.traceback))), ''] | ||||
@DocInherit | ||||
Fernando Perez
|
r6672 | def _img_lines(self, img_file): | ||
Fernando Perez
|
r6673 | return ['.. image:: %s' % img_file, ''] | ||
Fernando Perez
|
r6672 | |||
Anton I. Sipos
|
r6266 | @DocInherit | ||
Jonathan Taylor
|
r7373 | def render_display_format_text(self, output): | ||
Jonathan Taylor
|
r7372 | return rst_directive('.. parsed-literal::', output.text) | ||
Fernando Perez
|
r6220 | |||
Fernando Perez
|
r6671 | @DocInherit | ||
Fernando Perez
|
r6674 | def _unknown_lines(self, data): | ||
return rst_directive('.. warning:: Unknown cell') + [data] | ||||
Fernando Perez
|
r6671 | |||
Paul Ivanov
|
r8260 | @DocInherit | ||
Jonathan Taylor
|
r7373 | def render_display_format_html(self, output): | ||
Jonathan Taylor
|
r7372 | return rst_directive('.. raw:: html', output.html) | ||
Paul Ivanov
|
r8260 | @DocInherit | ||
Jonathan Taylor
|
r7373 | def render_display_format_latex(self, output): | ||
Jonathan Taylor
|
r7372 | return rst_directive('.. math::', output.latex) | ||
Paul Ivanov
|
r8260 | @DocInherit | ||
Jonathan Taylor
|
r7373 | def render_display_format_json(self, output): | ||
Jonathan Taylor
|
r7372 | return rst_directive('.. raw:: json', output.json) | ||
Paul Ivanov
|
r8260 | @DocInherit | ||
Jonathan Taylor
|
r7373 | def render_display_format_javascript(self, output): | ||
Jonathan Taylor
|
r7372 | return rst_directive('.. raw:: javascript', output.javascript) | ||
Fernando Perez
|
r6673 | |||
Fernando Perez
|
r7871 | |||
MinRK
|
r7914 | def highlight(src, lang='ipython'): | ||
Fernando Perez
|
r7871 | """Return a syntax-highlighted version of the input source. | ||
""" | ||||
from pygments import highlight | ||||
from pygments.lexers import get_lexer_by_name | ||||
from pygments.formatters import HtmlFormatter | ||||
MinRK
|
r7914 | if lang == 'ipython': | ||
lexer = IPythonLexer() | ||||
else: | ||||
lexer = get_lexer_by_name(lang, stripall=True) | ||||
Fernando Perez
|
r7871 | return highlight(src, lexer, HtmlFormatter()) | ||
Fernando Perez
|
r7812 | class ConverterMarkdown(Converter): | ||
extension = 'md' | ||||
Brian Granger
|
r7884 | def __init__(self, infile, highlight_source=True, show_prompts=False, | ||
inline_prompt=False): | ||||
Fernando Perez
|
r7871 | super(ConverterMarkdown, self).__init__(infile) | ||
self.highlight_source = highlight_source | ||||
Brian Granger
|
r7884 | self.show_prompts = show_prompts | ||
self.inline_prompt = inline_prompt | ||||
Fernando Perez
|
r7871 | |||
Fernando Perez
|
r7812 | @DocInherit | ||
def render_heading(self, cell): | ||||
return ['{0} {1}'.format('#'*cell.level, cell.source), ''] | ||||
@DocInherit | ||||
def render_code(self, cell): | ||||
if not cell.input: | ||||
return [] | ||||
lines = [] | ||||
Brian Granger
|
r7884 | if self.show_prompts and not self.inline_prompt: | ||
lines.extend(['*In[%s]:*' % cell.prompt_number, '']) | ||||
if self.show_prompts and self.inline_prompt: | ||||
prompt = 'In[%s]: ' % cell.prompt_number | ||||
input_lines = cell.input.split('\n') | ||||
src = prompt + input_lines[0] + '\n' + indent('\n'.join(input_lines[1:]), nspaces=len(prompt)) | ||||
else: | ||||
src = cell.input | ||||
src = highlight(src) if self.highlight_source else indent(src) | ||||
Fernando Perez
|
r7871 | lines.extend([src, '']) | ||
Brian Granger
|
r7884 | if cell.outputs and self.show_prompts and not self.inline_prompt: | ||
lines.extend(['*Out[%s]:*' % cell.prompt_number, '']) | ||||
Fernando Perez
|
r7812 | for output in cell.outputs: | ||
conv_fn = self.dispatch(output.output_type) | ||||
lines.extend(conv_fn(output)) | ||||
#lines.append('----') | ||||
lines.append('') | ||||
return lines | ||||
@DocInherit | ||||
def render_markdown(self, cell): | ||||
return [cell.source, ''] | ||||
@DocInherit | ||||
def render_raw(self, cell): | ||||
if self.raw_as_verbatim: | ||||
return [indent(cell.source), ''] | ||||
else: | ||||
return [cell.source, ''] | ||||
@DocInherit | ||||
def render_pyout(self, output): | ||||
lines = [] | ||||
Fernando Perez
|
r7871 | |||
## if 'text' in output: | ||||
## lines.extend(['*Out[%s]:*' % output.prompt_number, '']) | ||||
Fernando Perez
|
r7812 | |||
# output is a dictionary like object with type as a key | ||||
if 'latex' in output: | ||||
pass | ||||
if 'text' in output: | ||||
Fernando Perez
|
r7871 | lines.extend(['<pre>', indent(output.text), '</pre>']) | ||
Fernando Perez
|
r7812 | |||
lines.append('') | ||||
return lines | ||||
@DocInherit | ||||
def render_pyerr(self, output): | ||||
# Note: a traceback is a *list* of frames. | ||||
return [indent(remove_ansi('\n'.join(output.traceback))), ''] | ||||
@DocInherit | ||||
def _img_lines(self, img_file): | ||||
Brian Granger
|
r7884 | return ['', '![](%s)' % img_file, ''] | ||
Fernando Perez
|
r7812 | |||
@DocInherit | ||||
def render_display_format_text(self, output): | ||||
return [indent(output.text)] | ||||
@DocInherit | ||||
def _unknown_lines(self, data): | ||||
return ['Warning: Unknown cell', data] | ||||
Paul Ivanov
|
r8260 | @DocInherit | ||
Fernando Perez
|
r7812 | def render_display_format_html(self, output): | ||
return [output.html] | ||||
Paul Ivanov
|
r8260 | @DocInherit | ||
Fernando Perez
|
r7812 | def render_display_format_latex(self, output): | ||
return ['LaTeX::', indent(output.latex)] | ||||
Paul Ivanov
|
r8260 | @DocInherit | ||
Fernando Perez
|
r7812 | def render_display_format_json(self, output): | ||
return ['JSON:', indent(output.json)] | ||||
Paul Ivanov
|
r8260 | @DocInherit | ||
Fernando Perez
|
r7812 | def render_display_format_javascript(self, output): | ||
return ['JavaScript:', indent(output.javascript)] | ||||
Fernando Perez
|
r7871 | def return_list(x): | ||
"""Ensure that x is returned as a list or inside one""" | ||||
return x if isinstance(x, list) else [x] | ||||
MinRK
|
r7914 | # decorators for HTML output | ||
def output_container(f): | ||||
"""add a prompt-area next to an output""" | ||||
def wrapped(self, output): | ||||
rendered = f(self, output) | ||||
if not rendered: | ||||
# empty output | ||||
return [] | ||||
lines = [] | ||||
lines.append('<div class="hbox output_area">') | ||||
lines.extend(self._out_prompt(output)) | ||||
classes = "output_subarea output_%s" % output.output_type | ||||
if output.output_type == 'stream': | ||||
classes += " output_%s" % output.stream | ||||
lines.append('<div class="%s">' % classes) | ||||
lines.extend(rendered) | ||||
lines.append('</div>') # subarea | ||||
lines.append('</div>') # output_area | ||||
return lines | ||||
return wrapped | ||||
def text_cell(f): | ||||
"""wrap text cells in appropriate divs""" | ||||
def wrapped(self, cell): | ||||
rendered = f(self, cell) | ||||
classes = "text_cell_render border-box-sizing rendered_html" | ||||
lines = ['<div class="%s">' % classes] + rendered + ['</div>'] | ||||
return lines | ||||
return wrapped | ||||
class ConverterHTML(Converter): | ||||
Paul Ivanov
|
r6267 | extension = 'html' | ||
Fernando Perez
|
r8423 | def in_tag(self, tag, src, attrs=None): | ||
Fernando Perez
|
r6674 | """Return a list of elements bracketed by the given tag""" | ||
Fernando Perez
|
r8432 | attr_s = '' if attrs is None else \ | ||
' '.join( "%s=%s" % (attr, value) | ||||
Fernando Perez
|
r8423 | for attr, value in attrs.iteritems() ) | ||
MinRK
|
r7914 | return ['<%s %s>' % (tag, attr_s), src, '</%s>' % tag] | ||
def _ansi_colored(self, text): | ||||
return ['<pre>%s</pre>' % ansi2html(text)] | ||||
def _stylesheet(self, fname): | ||||
with io.open(fname, encoding='utf-8') as f: | ||||
s = f.read() | ||||
Fernando Perez
|
r8432 | return self.in_tag('style', s, dict(type='"text/css"')) | ||
MinRK
|
r7914 | |||
def _out_prompt(self, output): | ||||
if output.output_type == 'pyout': | ||||
n = output.prompt_number if output.prompt_number is not None else ' ' | ||||
content = 'Out [%s]:' % n | ||||
else: | ||||
content = '' | ||||
return ['<div class="prompt output_prompt">%s</div>' % content] | ||||
Fernando Perez
|
r6674 | |||
Fernando Perez
|
r8432 | def header_body(self): | ||
"""Return the body of the header as a list of strings.""" | ||||
MinRK
|
r7914 | |||
Fernando Perez
|
r8432 | from pygments.formatters import HtmlFormatter | ||
header = [] | ||||
MinRK
|
r7914 | static = os.path.join(path.get_ipython_package_dir(), | ||
'frontend', 'html', 'notebook', 'static', | ||||
) | ||||
here = os.path.split(os.path.abspath(__file__))[0] | ||||
css = os.path.join(static, 'css') | ||||
for sheet in [ | ||||
# do we need jquery and prettify? | ||||
# os.path.join(static, 'jquery', 'css', 'themes', 'base', 'jquery-ui.min.css'), | ||||
# os.path.join(static, 'prettify', 'prettify.css'), | ||||
os.path.join(css, 'boilerplate.css'), | ||||
os.path.join(css, 'fbm.css'), | ||||
os.path.join(css, 'notebook.css'), | ||||
os.path.join(css, 'renderedhtml.css'), | ||||
# our overrides: | ||||
os.path.join(here, 'css', 'static_html.css'), | ||||
]: | ||||
header.extend(self._stylesheet(sheet)) | ||||
# pygments css | ||||
pygments_css = HtmlFormatter().get_style_defs('.highlight') | ||||
Matthias BUSSONNIER
|
r8174 | header.extend(['<meta charset="UTF-8">']) | ||
Fernando Perez
|
r8432 | header.extend(self.in_tag('style', pygments_css, dict(type='"text/css"'))) | ||
MinRK
|
r7914 | |||
# TODO: this should be allowed to use local mathjax: | ||||
Fernando Perez
|
r8432 | header.extend(self.in_tag('script', '', {'type':'"text/javascript"', | ||
MinRK
|
r7914 | 'src': '"https://c328740.ssl.cf1.rackcdn.com/mathjax/latest/MathJax.js?config=TeX-AMS_HTML"', | ||
})) | ||||
Fernando Perez
|
r8432 | with io.open(os.path.join(here, 'js', 'initmathjax.js'), | ||
encoding='utf-8') as f: | ||||
Fernando Perez
|
r8423 | header.extend(self.in_tag('script', f.read(), | ||
{'type': '"text/javascript"'})) | ||||
MinRK
|
r7914 | return header | ||
Paul Ivanov
|
r6267 | |||
Fernando Perez
|
r8432 | def optional_header(self): | ||
return ['<html>', '<head>'] + self.header_body() + \ | ||||
['</head>', '<body>'] | ||||
Paul Ivanov
|
r6267 | def optional_footer(self): | ||
Fernando Perez
|
r8432 | return ['</body>', '</html>'] | ||
Paul Ivanov
|
r6267 | |||
@DocInherit | ||||
MinRK
|
r7914 | @text_cell | ||
Paul Ivanov
|
r6267 | def render_heading(self, cell): | ||
marker = cell.level | ||||
MinRK
|
r7914 | return [u'<h{1}>\n {0}\n</h{1}>'.format(cell.source, marker)] | ||
Paul Ivanov
|
r6267 | @DocInherit | ||
def render_code(self, cell): | ||||
if not cell.input: | ||||
return [] | ||||
MinRK
|
r7914 | lines = ['<div class="cell border-box-sizing code_cell vbox">'] | ||
lines.append('<div class="input hbox">') | ||||
n = cell.prompt_number if getattr(cell, 'prompt_number', None) is not None else ' ' | ||||
lines.append('<div class="prompt input_prompt">In [%s]:</div>' % n) | ||||
lines.append('<div class="input_area box-flex1">') | ||||
lines.append(highlight(cell.input)) | ||||
lines.append('</div>') # input_area | ||||
lines.append('</div>') # input | ||||
if cell.outputs: | ||||
lines.append('<div class="vbox output_wrapper">') | ||||
lines.append('<div class="output vbox">') | ||||
for output in coalesce_streams(cell.outputs): | ||||
conv_fn = self.dispatch(output.output_type) | ||||
lines.extend(conv_fn(output)) | ||||
lines.append('</div>') # output | ||||
lines.append('</div>') # output_wrapper | ||||
lines.append('</div>') # cell | ||||
Paul Ivanov
|
r6267 | return lines | ||
@DocInherit | ||||
MinRK
|
r7914 | @text_cell | ||
Paul Ivanov
|
r6267 | def render_markdown(self, cell): | ||
Matthias BUSSONNIER
|
r8163 | return [markdown(cell.source)] | ||
Paul Ivanov
|
r6267 | |||
@DocInherit | ||||
Fernando Perez
|
r6674 | def render_raw(self, cell): | ||
if self.raw_as_verbatim: | ||||
return self.in_tag('pre', cell.source) | ||||
else: | ||||
return [cell.source] | ||||
Paul Ivanov
|
r6267 | |||
@DocInherit | ||||
MinRK
|
r7914 | @output_container | ||
Paul Ivanov
|
r6267 | def render_pyout(self, output): | ||
MinRK
|
r7914 | for fmt in ['html', 'latex', 'png', 'jpeg', 'svg', 'text']: | ||
if fmt in output: | ||||
conv_fn = self.dispatch_display_format(fmt) | ||||
return conv_fn(output) | ||||
return [] | ||||
Paul Ivanov
|
r6267 | |||
MinRK
|
r7914 | render_display_data = render_pyout | ||
Paul Ivanov
|
r6267 | |||
MinRK
|
r7914 | @DocInherit | ||
@output_container | ||||
def render_stream(self, output): | ||||
return self._ansi_colored(output.text) | ||||
Paul Ivanov
|
r6267 | |||
@DocInherit | ||||
MinRK
|
r7914 | @output_container | ||
Fernando Perez
|
r6674 | def render_pyerr(self, output): | ||
# Note: a traceback is a *list* of frames. | ||||
MinRK
|
r7914 | # lines = [] | ||
# stb = | ||||
return self._ansi_colored('\n'.join(output.traceback)) | ||||
Fernando Perez
|
r6674 | |||
@DocInherit | ||||
Fernando Perez
|
r6672 | def _img_lines(self, img_file): | ||
MinRK
|
r7914 | return ['<img src="%s">' % img_file, '</img>'] | ||
Anton I. Sipos
|
r6253 | |||
Fernando Perez
|
r6674 | @DocInherit | ||
def _unknown_lines(self, data): | ||||
return ['<h2>Warning:: Unknown cell</h2>'] + self.in_tag('pre', data) | ||||
Fernando Perez
|
r6671 | |||
Paul Ivanov
|
r8260 | @DocInherit | ||
MinRK
|
r7914 | def render_display_format_png(self, output): | ||
return ['<img src="data:image/png;base64,%s"></img>' % output.png] | ||||
Paul Ivanov
|
r8260 | @DocInherit | ||
MinRK
|
r7914 | def render_display_format_svg(self, output): | ||
return [output.svg] | ||||
Paul Ivanov
|
r8260 | @DocInherit | ||
MinRK
|
r7914 | def render_display_format_jpeg(self, output): | ||
return ['<img src="data:image/jpeg;base64,%s"></img>' % output.jpeg] | ||||
Paul Ivanov
|
r8260 | @DocInherit | ||
Jonathan Taylor
|
r7373 | def render_display_format_text(self, output): | ||
MinRK
|
r7914 | return self._ansi_colored(output.text) | ||
Jonathan Taylor
|
r7372 | |||
Paul Ivanov
|
r8260 | @DocInherit | ||
Jonathan Taylor
|
r7373 | def render_display_format_html(self, output): | ||
MinRK
|
r7914 | return [output.html] | ||
Jonathan Taylor
|
r7372 | |||
Paul Ivanov
|
r8260 | @DocInherit | ||
Jonathan Taylor
|
r7373 | def render_display_format_latex(self, output): | ||
MinRK
|
r7914 | return [output.latex] | ||
Jonathan Taylor
|
r7372 | |||
Paul Ivanov
|
r8260 | @DocInherit | ||
Jonathan Taylor
|
r7373 | def render_display_format_json(self, output): | ||
MinRK
|
r7914 | # html ignores json | ||
Jonathan Taylor
|
r7372 | return [] | ||
Paul Ivanov
|
r8260 | @DocInherit | ||
Jonathan Taylor
|
r7373 | def render_display_format_javascript(self, output): | ||
MinRK
|
r7914 | return [output.javascript] | ||
Jonathan Taylor
|
r7372 | |||
Jonathan Taylor
|
r7374 | |||
Fernando Perez
|
r8432 | class ConverterBloggerHTML(ConverterHTML): | ||
"""Convert a notebook to html suitable for easy pasting into Blogger. | ||||
It generates an html file that has *only* the pure HTML contents, and a | ||||
separate file with `_header` appended to the name with all header contents. | ||||
Typically, the header file only needs to be used once when setting up a | ||||
blog, as the CSS for all posts is stored in a single location in Blogger. | ||||
""" | ||||
def optional_header(self): | ||||
with io.open(self.outbase + '_header.html', 'w', | ||||
encoding=self.default_encoding) as f: | ||||
f.write('\n'.join(self.header_body())) | ||||
return [] | ||||
def optional_footer(self): | ||||
return [] | ||||
Fernando Perez
|
r6671 | class ConverterLaTeX(Converter): | ||
Fernando Perez
|
r6672 | """Converts a notebook to a .tex file suitable for pdflatex. | ||
Note: this converter *needs*: | ||||
- `pandoc`: for all conversion of markdown cells. If your notebook only | ||||
has Raw cells, pandoc will not be needed. | ||||
- `inkscape`: if your notebook has SVG figures. These need to be | ||||
converted to PDF before inclusion in the TeX file, as LaTeX doesn't | ||||
understand SVG natively. | ||||
You will in general obtain much better final PDF results if you configure | ||||
the matplotlib backend to create SVG output with | ||||
%config InlineBackend.figure_format = 'svg' | ||||
(or set the equivalent flag at startup or in your configuration profile). | ||||
""" | ||||
Fernando Perez
|
r6671 | extension = 'tex' | ||
Fernando Perez
|
r6673 | documentclass = 'article' | ||
documentclass_options = '11pt,english' | ||||
heading_map = {1: r'\section', | ||||
2: r'\subsection', | ||||
3: r'\subsubsection', | ||||
4: r'\paragraph', | ||||
5: r'\subparagraph', | ||||
6: r'\subparagraph'} | ||||
Fernando Perez
|
r6671 | |||
Fernando Perez
|
r6674 | def in_env(self, environment, lines): | ||
Fernando Perez
|
r6671 | """Return list of environment lines for input lines | ||
Parameters | ||||
---------- | ||||
env : string | ||||
Name of the environment to bracket with begin/end. | ||||
lines: """ | ||||
Matthias BUSSONNIER
|
r7046 | out = [ur'\begin{%s}' % environment] | ||
Fernando Perez
|
r6671 | if isinstance(lines, basestring): | ||
out.append(lines) | ||||
else: # list | ||||
out.extend(lines) | ||||
Matthias BUSSONNIER
|
r7046 | out.append(ur'\end{%s}' % environment) | ||
Fernando Perez
|
r6671 | return out | ||
Fernando Perez
|
r6673 | |||
def convert(self): | ||||
# The main body is done by the logic in the parent class, and that's | ||||
# all we need if preamble support has been turned off. | ||||
body = super(ConverterLaTeX, self).convert() | ||||
if not self.with_preamble: | ||||
return body | ||||
# But if preamble is on, then we need to construct a proper, standalone | ||||
# tex file. | ||||
# Tag the document at the top and set latex class | ||||
final = [ r'%% This file was auto-generated by IPython, do NOT edit', | ||||
r'%% Conversion from the original notebook file:', | ||||
r'%% {0}'.format(self.infile), | ||||
r'%%', | ||||
r'\documentclass[%s]{%s}' % (self.documentclass_options, | ||||
self.documentclass), | ||||
'', | ||||
] | ||||
# Load our own preamble, which is stored next to the main file. We | ||||
# need to be careful in case the script entry point is a symlink | ||||
myfile = __file__ if not os.path.islink(__file__) else \ | ||||
os.readlink(__file__) | ||||
with open(os.path.join(os.path.dirname(myfile), 'preamble.tex')) as f: | ||||
final.append(f.read()) | ||||
# Load any additional user-supplied preamble | ||||
if self.user_preamble: | ||||
final.extend(['', '%% Adding user preamble from file:', | ||||
'%% {0}'.format(self.user_preamble), '']) | ||||
with open(self.user_preamble) as f: | ||||
final.append(f.read()) | ||||
# Include document body | ||||
final.extend([ r'\begin{document}', '', | ||||
body, | ||||
r'\end{document}', '']) | ||||
# Retun value must be a string | ||||
return '\n'.join(final) | ||||
Fernando Perez
|
r6671 | @DocInherit | ||
def render_heading(self, cell): | ||||
Fernando Perez
|
r6673 | marker = self.heading_map[cell.level] | ||
Fernando Perez
|
r6675 | return ['%s{%s}' % (marker, cell.source) ] | ||
Fernando Perez
|
r6671 | |||
@DocInherit | ||||
def render_code(self, cell): | ||||
if not cell.input: | ||||
return [] | ||||
# Cell codes first carry input code, we use lstlisting for that | ||||
Matthias BUSSONNIER
|
r7046 | lines = [ur'\begin{codecell}'] | ||
Fernando Perez
|
r6671 | |||
Fernando Perez
|
r6674 | lines.extend(self.in_env('codeinput', | ||
self.in_env('lstlisting', cell.input))) | ||||
Fernando Perez
|
r6671 | |||
outlines = [] | ||||
for output in cell.outputs: | ||||
conv_fn = self.dispatch(output.output_type) | ||||
outlines.extend(conv_fn(output)) | ||||
# And then output of many possible types; use a frame for all of it. | ||||
if outlines: | ||||
Fernando Perez
|
r6674 | lines.extend(self.in_env('codeoutput', outlines)) | ||
Fernando Perez
|
r6671 | |||
Matthias BUSSONNIER
|
r7046 | lines.append(ur'\end{codecell}') | ||
Fernando Perez
|
r6671 | |||
return lines | ||||
Fernando Perez
|
r6672 | @DocInherit | ||
def _img_lines(self, img_file): | ||||
Fernando Perez
|
r6674 | return self.in_env('center', | ||
Fernando Perez
|
r7345 | [r'\includegraphics[width=6in]{%s}' % img_file, r'\par']) | ||
Fernando Perez
|
r6672 | |||
def _svg_lines(self, img_file): | ||||
base_file = os.path.splitext(img_file)[0] | ||||
pdf_file = base_file + '.pdf' | ||||
Matthias BUSSONNIER
|
r6678 | subprocess.check_call([ inkscape, '--export-pdf=%s' % pdf_file, | ||
Fernando Perez
|
r6672 | img_file]) | ||
return self._img_lines(pdf_file) | ||||
Fernando Perez
|
r6671 | |||
@DocInherit | ||||
def render_markdown(self, cell): | ||||
Fernando Perez
|
r6677 | return [markdown2latex(cell.source)] | ||
Fernando Perez
|
r6671 | |||
@DocInherit | ||||
def render_pyout(self, output): | ||||
lines = [] | ||||
# output is a dictionary like object with type as a key | ||||
if 'latex' in output: | ||||
lines.extend(output.latex) | ||||
if 'text' in output: | ||||
Fernando Perez
|
r6674 | lines.extend(self.in_env('verbatim', output.text)) | ||
Fernando Perez
|
r6671 | |||
return lines | ||||
@DocInherit | ||||
def render_pyerr(self, output): | ||||
# Note: a traceback is a *list* of frames. | ||||
Fernando Perez
|
r6674 | return self.in_env('traceback', | ||
self.in_env('verbatim', | ||||
Fernando Perez
|
r6671 | remove_ansi('\n'.join(output.traceback)))) | ||
@DocInherit | ||||
Fernando Perez
|
r6674 | def render_raw(self, cell): | ||
if self.raw_as_verbatim: | ||||
return self.in_env('verbatim', cell.source) | ||||
else: | ||||
return [cell.source] | ||||
@DocInherit | ||||
def _unknown_lines(self, data): | ||||
return [r'{\vspace{5mm}\bf WARNING:: unknown cell:}'] + \ | ||||
self.in_env('verbatim', data) | ||||
Fernando Perez
|
r6671 | |||
Jonathan Taylor
|
r7366 | |||
Jonathan Taylor
|
r7372 | @DocInherit | ||
Jonathan Taylor
|
r7373 | def render_display_format_text(self, output): | ||
Jonathan Taylor
|
r7372 | lines = [] | ||
if 'text' in output: | ||||
lines.extend(self.in_env('verbatim', output.text.strip())) | ||||
return lines | ||||
Paul Ivanov
|
r8260 | @DocInherit | ||
Jonathan Taylor
|
r7373 | def render_display_format_html(self, output): | ||
Jonathan Taylor
|
r7372 | return [] | ||
Paul Ivanov
|
r8260 | @DocInherit | ||
Jonathan Taylor
|
r7373 | def render_display_format_latex(self, output): | ||
Jonathan Taylor
|
r7375 | if type(output.latex) == type([]): | ||
return output.latex | ||||
Jonathan Taylor
|
r7372 | return [output.latex] | ||
Paul Ivanov
|
r8260 | @DocInherit | ||
Jonathan Taylor
|
r7373 | def render_display_format_json(self, output): | ||
Jonathan Taylor
|
r7372 | # latex ignores json | ||
return [] | ||||
Paul Ivanov
|
r8260 | @DocInherit | ||
Jonathan Taylor
|
r7373 | def render_display_format_javascript(self, output): | ||
Jonathan Taylor
|
r7372 | # latex ignores javascript | ||
return [] | ||||
Jonathan Taylor
|
r7366 | class ConverterNotebook(Converter): | ||
""" | ||||
A converter that is essentially a null-op. | ||||
This exists so it can be subclassed | ||||
for custom handlers of .ipynb files | ||||
that create new .ipynb files. | ||||
What distinguishes this from JSONWriter is that | ||||
subclasses can specify what to do with each type of cell. | ||||
Writes out a notebook file. | ||||
""" | ||||
extension = 'ipynb' | ||||
def __init__(self, infile, outbase): | ||||
Converter.__init__(self, infile) | ||||
self.outbase = outbase | ||||
rmtree(self.files_dir) | ||||
def convert(self): | ||||
return json.dumps(json.loads(Converter.convert(self, ',')), indent=1, sort_keys=True) | ||||
def optional_header(self): | ||||
s = \ | ||||
"""{ | ||||
"metadata": { | ||||
"name": "%(name)s" | ||||
}, | ||||
"nbformat": 3, | ||||
"worksheets": [ | ||||
{ | ||||
"cells": [""" % {'name':self.outbase} | ||||
return s.split('\n') | ||||
def optional_footer(self): | ||||
s = \ | ||||
"""] | ||||
} | ||||
] | ||||
}""" | ||||
return s.split('\n') | ||||
@DocInherit | ||||
def render_heading(self, cell): | ||||
return cell_to_lines(cell) | ||||
@DocInherit | ||||
def render_code(self, cell): | ||||
return cell_to_lines(cell) | ||||
@DocInherit | ||||
def render_markdown(self, cell): | ||||
return cell_to_lines(cell) | ||||
@DocInherit | ||||
def render_raw(self, cell): | ||||
return cell_to_lines(cell) | ||||
@DocInherit | ||||
def render_pyout(self, output): | ||||
MinRK
|
r7914 | return cell_to_lines(output) | ||
Jonathan Taylor
|
r7366 | |||
@DocInherit | ||||
def render_pyerr(self, output): | ||||
MinRK
|
r7914 | return cell_to_lines(output) | ||
Jonathan Taylor
|
r7366 | |||
Jonathan Taylor
|
r7372 | @DocInherit | ||
Jonathan Taylor
|
r7373 | def render_display_format_text(self, output): | ||
Jonathan Taylor
|
r7372 | return [output.text] | ||
Paul Ivanov
|
r8260 | @DocInherit | ||
Jonathan Taylor
|
r7373 | def render_display_format_html(self, output): | ||
Jonathan Taylor
|
r7372 | return [output.html] | ||
Paul Ivanov
|
r8260 | @DocInherit | ||
Jonathan Taylor
|
r7373 | def render_display_format_latex(self, output): | ||
Jonathan Taylor
|
r7372 | return [output.latex] | ||
Paul Ivanov
|
r8260 | @DocInherit | ||
Jonathan Taylor
|
r7373 | def render_display_format_json(self, output): | ||
Jonathan Taylor
|
r7372 | return [output.json] | ||
Paul Ivanov
|
r8260 | @DocInherit | ||
Jonathan Taylor
|
r7373 | def render_display_format_javascript(self, output): | ||
Jonathan Taylor
|
r7372 | return [output.javascript] | ||
Paul Ivanov
|
r8261 | class ConverterPy(Converter): | ||
""" | ||||
A converter that takes a notebook and converts it to a .py file. | ||||
What distinguishes this from PyWriter and PyReader in IPython.nbformat is | ||||
that subclasses can specify what to do with each type of cell. | ||||
Additionally, unlike PyWriter, this does not preserve the '# <markdown>' | ||||
opening and closing comments style comments in favor of a cleaner looking | ||||
python program. | ||||
Note: | ||||
Even though this produces a .py file, it is not guaranteed to be valid | ||||
python file, since the notebook may be using magics and even cell | ||||
magics. | ||||
""" | ||||
extension = 'py' | ||||
def __init__(self, infile, show_prompts=True, show_output=True): | ||||
super(ConverterPy, self).__init__(infile) | ||||
self.show_prompts = show_prompts | ||||
self.show_output = show_output | ||||
@staticmethod | ||||
def comment(input): | ||||
"returns every line in input as commented out" | ||||
return "# "+input.replace("\n", "\n# ") | ||||
@DocInherit | ||||
def render_heading(self, cell): | ||||
return ['#{0} {1}'.format('#'*cell.level, cell.source), ''] | ||||
@DocInherit | ||||
def render_code(self, cell): | ||||
if not cell.input: | ||||
return [] | ||||
lines = [] | ||||
if self.show_prompts: | ||||
lines.extend(['# In[%s]:' % cell.prompt_number]) | ||||
src = cell.input | ||||
lines.extend([src, '']) | ||||
if self.show_output: | ||||
if cell.outputs : | ||||
lines.extend(['# Out[%s]:' % cell.prompt_number]) | ||||
for output in cell.outputs: | ||||
conv_fn = self.dispatch(output.output_type) | ||||
lines.extend(conv_fn(output)) | ||||
return lines | ||||
@DocInherit | ||||
def render_markdown(self, cell): | ||||
return [self.comment(cell.source), ''] | ||||
@DocInherit | ||||
def render_raw(self, cell): | ||||
if self.raw_as_verbatim: | ||||
return [self.comment(indent(cell.source)), ''] | ||||
else: | ||||
return [self.comment(cell.source), ''] | ||||
@DocInherit | ||||
def render_pyout(self, output): | ||||
lines = [] | ||||
## if 'text' in output: | ||||
## lines.extend(['*Out[%s]:*' % output.prompt_number, '']) | ||||
# output is a dictionary like object with type as a key | ||||
if 'latex' in output: | ||||
pass | ||||
if 'text' in output: | ||||
lines.extend([self.comment(indent(output.text)), '']) | ||||
lines.append('') | ||||
return lines | ||||
@DocInherit | ||||
def render_pyerr(self, output): | ||||
# Note: a traceback is a *list* of frames. | ||||
return [indent(remove_ansi('\n'.join(output.traceback))), ''] | ||||
@DocInherit | ||||
def _img_lines(self, img_file): | ||||
return [ self.comment('image file: %s' % img_file), ''] | ||||
@DocInherit | ||||
def render_display_format_text(self, output): | ||||
return [self.comment(indent(output.text))] | ||||
@DocInherit | ||||
def _unknown_lines(self, data): | ||||
return [self.comment('Warning: Unknown cell'+ str(data))] | ||||
@DocInherit | ||||
def render_display_format_html(self, output): | ||||
return [self.comment(output.html)] | ||||
@DocInherit | ||||
def render_display_format_latex(self, output): | ||||
return [] | ||||
@DocInherit | ||||
def render_display_format_json(self, output): | ||||
return [] | ||||
@DocInherit | ||||
def render_display_format_javascript(self, output): | ||||
return [] | ||||
Fernando Perez
|
r6677 | #----------------------------------------------------------------------------- | ||
# Standalone conversion functions | ||||
#----------------------------------------------------------------------------- | ||||
Fernando Perez
|
r6671 | |||
Anton I. Sipos
|
r6261 | def rst2simplehtml(infile): | ||
Fernando Perez
|
r6220 | """Convert a rst file to simplified html suitable for blogger. | ||
This just runs rst2html with certain parameters to produce really simple | ||||
html and strips the document header, so the resulting file can be easily | ||||
pasted into a blogger edit window. | ||||
""" | ||||
# This is the template for the rst2html call that produces the cleanest, | ||||
# simplest html I could find. This should help in making it easier to | ||||
# paste into the blogspot html window, though I'm still having problems | ||||
# with linebreaks there... | ||||
Jonathan Taylor
|
r7373 | cmd_template = ("rst2html --link-stylesheet --no-xml-declaration " | ||
Fernando Perez
|
r6220 | "--no-generator --no-datestamp --no-source-link " | ||
"--no-toc-backlinks --no-section-numbering " | ||||
"--strip-comments ") | ||||
Anton I. Sipos
|
r6261 | cmd = "%s %s" % (cmd_template, infile) | ||
Fernando Perez
|
r6220 | proc = subprocess.Popen(cmd, | ||
stdout=subprocess.PIPE, | ||||
stderr=subprocess.PIPE, | ||||
shell=True) | ||||
html, stderr = proc.communicate() | ||||
if stderr: | ||||
raise IOError(stderr) | ||||
# Make an iterator so breaking out holds state. Our implementation of | ||||
# searching for the html body below is basically a trivial little state | ||||
# machine, so we need this. | ||||
walker = iter(html.splitlines()) | ||||
# Find start of main text, break out to then print until we find end /div. | ||||
# This may only work if there's a real title defined so we get a 'div class' | ||||
# tag, I haven't really tried. | ||||
for line in walker: | ||||
smithj1
|
r6228 | if line.startswith('<body>'): | ||
Fernando Perez
|
r6220 | break | ||
Anton I. Sipos
|
r6261 | newfname = os.path.splitext(infile)[0] + '.html' | ||
Fernando Perez
|
r6220 | with open(newfname, 'w') as f: | ||
for line in walker: | ||||
smithj1
|
r6228 | if line.startswith('</body>'): | ||
Fernando Perez
|
r6220 | break | ||
f.write(line) | ||||
f.write('\n') | ||||
Anton I. Sipos
|
r6253 | |||
Fernando Perez
|
r6220 | return newfname | ||
Jonathan Taylor
|
r7366 | #----------------------------------------------------------------------------- | ||
# Cell-level functions -- similar to IPython.nbformat.v3.rwbase functions | ||||
# but at cell level instead of whole notebook level | ||||
#----------------------------------------------------------------------------- | ||||
def writes_cell(cell, **kwargs): | ||||
kwargs['cls'] = BytesEncoder | ||||
kwargs['indent'] = 3 | ||||
kwargs['sort_keys'] = True | ||||
kwargs['separators'] = (',',': ') | ||||
if kwargs.pop('split_lines', True): | ||||
cell = split_lines_cell(copy.deepcopy(cell)) | ||||
return py3compat.str_to_unicode(json.dumps(cell, **kwargs), 'utf-8') | ||||
Fernando Perez
|
r7871 | |||
Jonathan Taylor
|
r7366 | _multiline_outputs = ['text', 'html', 'svg', 'latex', 'javascript', 'json'] | ||
Fernando Perez
|
r7871 | |||
Jonathan Taylor
|
r7366 | def split_lines_cell(cell): | ||
""" | ||||
Split lines within a cell as in | ||||
IPython.nbformat.v3.rwbase.split_lines | ||||
""" | ||||
if cell.cell_type == 'code': | ||||
if 'input' in cell and isinstance(cell.input, basestring): | ||||
cell.input = (cell.input + '\n').splitlines() | ||||
for output in cell.outputs: | ||||
for key in _multiline_outputs: | ||||
item = output.get(key, None) | ||||
if isinstance(item, basestring): | ||||
output[key] = (item + '\n').splitlines() | ||||
else: # text, heading cell | ||||
for key in ['source', 'rendered']: | ||||
item = cell.get(key, None) | ||||
if isinstance(item, basestring): | ||||
cell[key] = (item + '\n').splitlines() | ||||
return cell | ||||
Fernando Perez
|
r7871 | |||
Jonathan Taylor
|
r7366 | def cell_to_lines(cell): | ||
''' | ||||
Write a cell to json, returning the split lines. | ||||
''' | ||||
split_lines_cell(cell) | ||||
s = writes_cell(cell).strip() | ||||
return s.split('\n') | ||||
Fernando Perez
|
r8432 | known_formats = "rst (default), html, blogger-html, latex, markdown, py" | ||
Fernando Perez
|
r6220 | |||
Anton I. Sipos
|
r6261 | def main(infile, format='rst'): | ||
Fernando Perez
|
r6220 | """Convert a notebook to html in one step""" | ||
Paul Ivanov
|
r6280 | # XXX: this is just quick and dirty for now. When adding a new format, | ||
# make sure to add it to the `known_formats` string above, which gets | ||||
# printed in in the catch-all else, as well as in the help | ||||
Anton I. Sipos
|
r6261 | if format == 'rst': | ||
converter = ConverterRST(infile) | ||||
converter.render() | ||||
Fernando Perez
|
r7812 | elif format == 'markdown': | ||
converter = ConverterMarkdown(infile) | ||||
converter.render() | ||||
Anton I. Sipos
|
r6261 | elif format == 'html': | ||
MinRK
|
r7914 | converter = ConverterHTML(infile) | ||
htmlfname = converter.render() | ||||
Fernando Perez
|
r8432 | elif format == 'blogger-html': | ||
converter = ConverterBloggerHTML(infile) | ||||
htmlfname = converter.render() | ||||
Fernando Perez
|
r6671 | elif format == 'latex': | ||
converter = ConverterLaTeX(infile) | ||||
latexfname = converter.render() | ||||
Paul Ivanov
|
r8261 | elif format == 'py': | ||
converter = ConverterPy(infile) | ||||
converter.render() | ||||
Paul Ivanov
|
r6280 | else: | ||
raise SystemExit("Unknown format '%s', " % format + | ||||
"known formats are: " + known_formats) | ||||
Fernando Perez
|
r6220 | |||
Fernando Perez
|
r6677 | #----------------------------------------------------------------------------- | ||
# Script main | ||||
#----------------------------------------------------------------------------- | ||||
Anton I. Sipos
|
r6261 | |||
Paul Ivanov
|
r6280 | if __name__ == '__main__': | ||
parser = argparse.ArgumentParser(description=__doc__, | ||||
formatter_class=argparse.RawTextHelpFormatter) | ||||
Anton I. Sipos
|
r6261 | # TODO: consider passing file like object around, rather than filenames | ||
# would allow us to process stdin, or even http streams | ||||
#parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin) | ||||
#Require a filename as a positional argument | ||||
parser.add_argument('infile', nargs=1) | ||||
Anton I. Sipos
|
r6265 | parser.add_argument('-f', '--format', default='rst', | ||
Paul Ivanov
|
r6280 | help='Output format. Supported formats: \n' + | ||
known_formats) | ||||
Anton I. Sipos
|
r6261 | args = parser.parse_args() | ||
main(infile=args.infile[0], format=args.format) | ||||