nbconvert.py
270 lines
| 7.7 KiB
| text/x-python
|
PythonLexer
|
r6220 | #!/usr/bin/env python | ||
"""A really simple notebook to rst/html exporter. | ||||
Usage | ||||
./nb2html.py file.ipynb | ||||
Produces 'file.rst' and 'file.html', along with auto-generated figure files | ||||
called nb_figure_NN.png. | ||||
""" | ||||
import os | ||||
import subprocess | ||||
import sys | ||||
|
r6261 | import argparse | ||
|
r6220 | from IPython.nbformat import current as nbformat | ||
|
r6257 | from IPython.utils.text import indent | ||
|
r6220 | |||
# Cell converters | ||||
def unknown_cell(cell): | ||||
"""Default converter for cells of unknown type. | ||||
""" | ||||
|
r6222 | return rst_directive('.. warning:: Unknown cell') + \ | ||
[repr(cell)] | ||||
|
r6220 | |||
|
r6222 | def rst_directive(directive, text=''): | ||
out = [directive, ''] | ||||
if text: | ||||
out.extend([indent(text), '']) | ||||
return out | ||||
|
r6220 | |||
# Converters for parts of a cell. | ||||
|
r6253 | |||
|
r6261 | |||
|
r6239 | class ConversionException(Exception): | ||
pass | ||||
|
r6253 | |||
|
r6239 | class Converter(object): | ||
default_encoding = 'utf-8' | ||||
|
r6253 | |||
|
r6261 | def __init__(self, infile): | ||
self.infile = infile | ||||
self.dirpath = os.path.dirname(infile) | ||||
|
r6239 | |||
@property | ||||
def extension(self): | ||||
raise ConversionException("""extension must be defined in Converter | ||||
subclass""") | ||||
|
r6253 | def dispatch(self, cell_type): | ||
|
r6239 | """return cell_type dependent render method, for example render_code | ||
""" | ||||
|
r6253 | return getattr(self, 'render_' + cell_type, unknown_cell) | ||
|
r6239 | |||
def convert(self): | ||||
lines = [] | ||||
for cell in self.nb.worksheets[0].cells: | ||||
conv_fn = self.dispatch(cell.cell_type) | ||||
lines.extend(conv_fn(cell)) | ||||
lines.append('') | ||||
return '\n'.join(lines) | ||||
def render(self): | ||||
|
r6261 | "read, convert, and save self.infile" | ||
|
r6239 | self.read() | ||
self.output = self.convert() | ||||
return self.save() | ||||
def read(self): | ||||
"read and parse notebook into NotebookNode called self.nb" | ||||
|
r6261 | with open(self.infile) as f: | ||
|
r6239 | self.nb = nbformat.read(f, 'json') | ||
|
r6261 | def save(self, infile=None, encoding=None): | ||
|
r6239 | "read and parse notebook into self.nb" | ||
|
r6261 | if infile is None: | ||
infile = os.path.splitext(self.infile)[0] + '.' + self.extension | ||||
|
r6239 | if encoding is None: | ||
encoding = self.default_encoding | ||||
|
r6261 | with open(infile, 'w') as f: | ||
|
r6239 | f.write(self.output.encode(encoding)) | ||
|
r6261 | return infile | ||
|
r6220 | |||
|
r6253 | def render_heading(self, cell): | ||
raise NotImplementedError | ||||
def render_code(self, cell): | ||||
raise NotImplementedError | ||||
|
r6249 | |||
|
r6253 | def render_markdown(self, cell): | ||
raise NotImplementedError | ||||
|
r6249 | |||
|
r6253 | def render_pyout(self, cell): | ||
raise NotImplementedError | ||||
|
r6249 | |||
|
r6253 | def render_display_data(self, cell): | ||
raise NotImplementedError | ||||
|
r6249 | |||
|
r6253 | def render_stream(self, cell): | ||
raise NotImplementedError | ||||
|
r6220 | |||
|
r6249 | |||
|
r6239 | class ConverterRST(Converter): | ||
extension = 'rst' | ||||
|
r6254 | figures_counter = 0 | ||
|
r6253 | |||
def render_heading(self, cell): | ||||
|
r6239 | """convert a heading cell to rst | ||
|
r6220 | |||
|
r6239 | Returns list.""" | ||
|
r6253 | heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'} | ||
|
r6239 | marker = heading_level[cell.level] | ||
|
r6253 | return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))] | ||
|
r6220 | |||
|
r6253 | def render_code(self, cell): | ||
|
r6239 | """Convert a code cell to rst | ||
|
r6220 | |||
|
r6239 | Returns list.""" | ||
|
r6220 | |||
|
r6239 | if not cell.input: | ||
return [] | ||||
|
r6220 | |||
|
r6239 | lines = ['In[%s]:' % cell.prompt_number, ''] | ||
lines.extend(rst_directive('.. code:: python', cell.input)) | ||||
|
r6220 | |||
|
r6239 | for output in cell.outputs: | ||
conv_fn = self.dispatch(output.output_type) | ||||
lines.extend(conv_fn(output)) | ||||
|
r6254 | |||
|
r6239 | return lines | ||
|
r6220 | |||
|
r6253 | def render_markdown(self, cell): | ||
|
r6239 | """convert a markdown cell to rst | ||
|
r6220 | |||
|
r6239 | Returns list.""" | ||
return [cell.source] | ||||
|
r6220 | |||
|
r6256 | def render_plaintext(self, cell): | ||
"""convert plain text to rst | ||||
Returns list.""" | ||||
return [cell.source] | ||||
|
r6253 | def render_pyout(self, output): | ||
|
r6239 | """convert pyout part of a code cell to rst | ||
|
r6220 | |||
|
r6239 | Returns list.""" | ||
|
r6220 | |||
|
r6239 | lines = ['Out[%s]:' % output.prompt_number, ''] | ||
|
r6249 | |||
|
r6252 | # output is a dictionary like object with type as a key | ||
|
r6239 | if 'latex' in output: | ||
lines.extend(rst_directive('.. math::', output.latex)) | ||||
|
r6220 | |||
|
r6239 | if 'text' in output: | ||
lines.extend(rst_directive('.. parsed-literal::', output.text)) | ||||
|
r6220 | |||
|
r6239 | return lines | ||
|
r6220 | |||
|
r6253 | def render_display_data(self, output): | ||
|
r6239 | """convert display data from the output of a code cell to rst. | ||
|
r6220 | |||
|
r6239 | Returns list. | ||
""" | ||||
lines = [] | ||||
if 'png' in output: | ||||
|
r6261 | infile = 'nb_figure_%s.png' % self.figures_counter | ||
fullname = os.path.join(self.dirpath, infile) | ||||
|
r6254 | with open(fullname, 'w') as f: | ||
|
r6239 | f.write(output.png.decode('base64')) | ||
|
r6220 | |||
|
r6254 | self.figures_counter += 1 | ||
|
r6261 | lines.append('.. image:: %s' % infile) | ||
|
r6239 | lines.append('') | ||
|
r6249 | |||
return lines | ||||
|
r6253 | def render_stream(self, output): | ||
|
r6249 | """convert stream part of a code cell to rst | ||
Returns list.""" | ||||
lines = [] | ||||
if 'text' in output: | ||||
lines.extend(rst_directive('.. parsed-literal::', output.text)) | ||||
|
r6239 | return lines | ||
|
r6220 | |||
|
r6253 | |||
|
r6261 | def rst2simplehtml(infile): | ||
|
r6220 | """Convert a rst file to simplified html suitable for blogger. | ||
This just runs rst2html with certain parameters to produce really simple | ||||
html and strips the document header, so the resulting file can be easily | ||||
pasted into a blogger edit window. | ||||
""" | ||||
# This is the template for the rst2html call that produces the cleanest, | ||||
# simplest html I could find. This should help in making it easier to | ||||
# paste into the blogspot html window, though I'm still having problems | ||||
# with linebreaks there... | ||||
|
r6229 | cmd_template = ("rst2html --link-stylesheet --no-xml-declaration " | ||
|
r6220 | "--no-generator --no-datestamp --no-source-link " | ||
"--no-toc-backlinks --no-section-numbering " | ||||
"--strip-comments ") | ||||
|
r6261 | cmd = "%s %s" % (cmd_template, infile) | ||
|
r6220 | proc = subprocess.Popen(cmd, | ||
stdout=subprocess.PIPE, | ||||
stderr=subprocess.PIPE, | ||||
shell=True) | ||||
html, stderr = proc.communicate() | ||||
if stderr: | ||||
raise IOError(stderr) | ||||
# Make an iterator so breaking out holds state. Our implementation of | ||||
# searching for the html body below is basically a trivial little state | ||||
# machine, so we need this. | ||||
walker = iter(html.splitlines()) | ||||
# Find start of main text, break out to then print until we find end /div. | ||||
# This may only work if there's a real title defined so we get a 'div class' | ||||
# tag, I haven't really tried. | ||||
for line in walker: | ||||
|
r6228 | if line.startswith('<body>'): | ||
|
r6220 | break | ||
|
r6261 | newfname = os.path.splitext(infile)[0] + '.html' | ||
|
r6220 | with open(newfname, 'w') as f: | ||
for line in walker: | ||||
|
r6228 | if line.startswith('</body>'): | ||
|
r6220 | break | ||
f.write(line) | ||||
f.write('\n') | ||||
|
r6253 | |||
|
r6220 | return newfname | ||
|
r6261 | def main(infile, format='rst'): | ||
|
r6220 | """Convert a notebook to html in one step""" | ||
|
r6261 | if format == 'rst': | ||
converter = ConverterRST(infile) | ||||
converter.render() | ||||
elif format == 'html': | ||||
#Currently, conversion to html is a 2 step process, nb->rst->html | ||||
converter = ConverterRST(infile) | ||||
rstfname = converter.render() | ||||
rst2simplehtml(rstfname) | ||||
|
r6220 | |||
if __name__ == '__main__': | ||||
|
r6261 | parser = argparse.ArgumentParser(description='nbconvert: Convert IPython notebooks to other formats') | ||
# TODO: consider passing file like object around, rather than filenames | ||||
# would allow us to process stdin, or even http streams | ||||
#parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin) | ||||
#Require a filename as a positional argument | ||||
parser.add_argument('infile', nargs=1) | ||||
parser.add_argument('-f', '--format', default='rst') | ||||
args = parser.parse_args() | ||||
main(infile=args.infile[0], format=args.format) | ||||