nb2html.py
186 lines
| 4.6 KiB
| text/x-python
|
PythonLexer
Fernando Perez
|
r6220 | #!/usr/bin/env python | ||
"""A really simple notebook to rst/html exporter. | ||||
Usage | ||||
./nb2html.py file.ipynb | ||||
Produces 'file.rst' and 'file.html', along with auto-generated figure files | ||||
called nb_figure_NN.png. | ||||
""" | ||||
import os | ||||
import subprocess | ||||
import sys | ||||
from IPython.nbformat import current as nbformat | ||||
from IPython.utils.text import wrap_paragraphs, indent | ||||
# Cell converters | ||||
def unknown_cell(cell): | ||||
"""Default converter for cells of unknown type. | ||||
""" | ||||
return [rst_directive('.. warning:: Unknown cell'), | ||||
repr(cell)] | ||||
def markdown_cell(cell): | ||||
"""convert a markdown cell to rst | ||||
Returns list.""" | ||||
return [cell.source] | ||||
def rst_directive(directive, text): | ||||
return [directive, '', indent(text), ''] | ||||
def code_cell(cell): | ||||
"""Convert a code cell to rst | ||||
Returns list.""" | ||||
if not cell.input: | ||||
return [] | ||||
lines = ['In[%s]:' % cell.prompt_number, ''] | ||||
lines.extend(rst_directive('.. code:: python', cell.input)) | ||||
for output in cell.outputs: | ||||
conv = converters[output.output_type] | ||||
lines.extend(conv(output)) | ||||
return lines | ||||
# Converters for parts of a cell. | ||||
figures_counter = 1 | ||||
def out_display(output): | ||||
"""convert display data from the output of a code cell to rst. | ||||
Returns list. | ||||
""" | ||||
global figures_counter | ||||
lines = [] | ||||
if 'png' in output: | ||||
fname = 'nb_figure_%s.png' % figures_counter | ||||
with open(fname, 'w') as f: | ||||
f.write(output.png.decode('base64')) | ||||
figures_counter += 1 | ||||
lines.append('.. image:: %s' % fname) | ||||
lines.append('') | ||||
return lines | ||||
def out_pyout(output): | ||||
"""convert pyout part of a code cell to rst | ||||
Returns list.""" | ||||
lines = ['Out[%s]:' % output.prompt_number, ''] | ||||
if 'latex' in output: | ||||
lines.extend(rst_directive('.. math::', output.latex)) | ||||
if 'text' in output: | ||||
lines.extend(rst_directive('.. parsed-literal::', output.text)) | ||||
return lines | ||||
converters = dict(code = code_cell, | ||||
markdown = markdown_cell, | ||||
pyout = out_pyout, | ||||
display_data = out_display, | ||||
) | ||||
def convert_notebook(nb): | ||||
lines = [] | ||||
for cell in nb.worksheets[0].cells: | ||||
conv = converters.get(cell.cell_type, unknown_cell) | ||||
lines.extend(conv(cell)) | ||||
lines.append('') | ||||
return '\n'.join(lines) | ||||
def nb2rst(fname): | ||||
"Convert notebook to rst" | ||||
with open(fname) as f: | ||||
nb = nbformat.read(f, 'json') | ||||
rst = convert_notebook(nb) | ||||
newfname = os.path.splitext(fname)[0] + '.rst' | ||||
with open(newfname, 'w') as f: | ||||
f.write(rst.encode('utf8')) | ||||
return newfname | ||||
def rst2simplehtml(fname): | ||||
"""Convert a rst file to simplified html suitable for blogger. | ||||
This just runs rst2html with certain parameters to produce really simple | ||||
html and strips the document header, so the resulting file can be easily | ||||
pasted into a blogger edit window. | ||||
""" | ||||
# This is the template for the rst2html call that produces the cleanest, | ||||
# simplest html I could find. This should help in making it easier to | ||||
# paste into the blogspot html window, though I'm still having problems | ||||
# with linebreaks there... | ||||
cmd_template = ("rst2html --link-stylesheet --no-xml-declaration " | ||||
"--no-generator --no-datestamp --no-source-link " | ||||
"--no-toc-backlinks --no-section-numbering " | ||||
"--strip-comments ") | ||||
cmd = "%s %s" % (cmd_template, fname) | ||||
proc = subprocess.Popen(cmd, | ||||
stdout=subprocess.PIPE, | ||||
stderr=subprocess.PIPE, | ||||
shell=True) | ||||
html, stderr = proc.communicate() | ||||
if stderr: | ||||
raise IOError(stderr) | ||||
# Make an iterator so breaking out holds state. Our implementation of | ||||
# searching for the html body below is basically a trivial little state | ||||
# machine, so we need this. | ||||
walker = iter(html.splitlines()) | ||||
# Find start of main text, break out to then print until we find end /div. | ||||
# This may only work if there's a real title defined so we get a 'div class' | ||||
# tag, I haven't really tried. | ||||
for line in walker: | ||||
if line.startswith('<div class'): | ||||
break | ||||
newfname = os.path.splitext(fname)[0] + '.html' | ||||
with open(newfname, 'w') as f: | ||||
for line in walker: | ||||
if line.startswith('</div>'): | ||||
break | ||||
f.write(line) | ||||
f.write('\n') | ||||
return newfname | ||||
def main(fname): | ||||
"""Convert a notebook to html in one step""" | ||||
newfname = nb2rst(fname) | ||||
rst2simplehtml(newfname) | ||||
if __name__ == '__main__': | ||||
main(sys.argv[1]) | ||||