##// END OF EJS Templates
improve rst_directive and fix bug in unknown_cell
Fernando Perez -
Show More
@@ -1,186 +1,189 b''
1 1 #!/usr/bin/env python
2 2 """A really simple notebook to rst/html exporter.
3 3
4 4 Usage
5 5
6 6 ./nb2html.py file.ipynb
7 7
8 8 Produces 'file.rst' and 'file.html', along with auto-generated figure files
9 9 called nb_figure_NN.png.
10 10
11 11 """
12 12
13 13 import os
14 14 import subprocess
15 15 import sys
16 16
17 17 from IPython.nbformat import current as nbformat
18 18 from IPython.utils.text import wrap_paragraphs, indent
19 19
20 20
21 21 # Cell converters
22 22
23 23 def unknown_cell(cell):
24 24 """Default converter for cells of unknown type.
25 25 """
26 26
27 return [rst_directive('.. warning:: Unknown cell'),
28 repr(cell)]
27 return rst_directive('.. warning:: Unknown cell') + \
28 [repr(cell)]
29 29
30 30 def markdown_cell(cell):
31 31 """convert a markdown cell to rst
32 32
33 33 Returns list."""
34 34 return [cell.source]
35 35
36 36
37 def rst_directive(directive, text):
38 return [directive, '', indent(text), '']
37 def rst_directive(directive, text=''):
38 out = [directive, '']
39 if text:
40 out.extend([indent(text), ''])
41 return out
39 42
40 43 def code_cell(cell):
41 44 """Convert a code cell to rst
42 45
43 46 Returns list."""
44 47
45 48 if not cell.input:
46 49 return []
47 50
48 51 lines = ['In[%s]:' % cell.prompt_number, '']
49 52 lines.extend(rst_directive('.. code:: python', cell.input))
50 53
51 54 for output in cell.outputs:
52 conv = converters[output.output_type]
55 conv = converters.get(output.output_type, unknown_cell)
53 56 lines.extend(conv(output))
54 57
55 58 return lines
56 59
57 60 # Converters for parts of a cell.
58 61 figures_counter = 1
59 62
60 63 def out_display(output):
61 64 """convert display data from the output of a code cell to rst.
62 65
63 66 Returns list.
64 67 """
65 68 global figures_counter
66 69
67 70 lines = []
68 71
69 72 if 'png' in output:
70 73 fname = 'nb_figure_%s.png' % figures_counter
71 74 with open(fname, 'w') as f:
72 75 f.write(output.png.decode('base64'))
73 76
74 77 figures_counter += 1
75 78 lines.append('.. image:: %s' % fname)
76 79 lines.append('')
77 80
78 81 return lines
79 82
80 83
81 84 def out_pyout(output):
82 85 """convert pyout part of a code cell to rst
83 86
84 87 Returns list."""
85 88
86 89 lines = ['Out[%s]:' % output.prompt_number, '']
87 90
88 91 if 'latex' in output:
89 92 lines.extend(rst_directive('.. math::', output.latex))
90 93
91 94 if 'text' in output:
92 95 lines.extend(rst_directive('.. parsed-literal::', output.text))
93 96
94 97 return lines
95 98
96 99
97 100 converters = dict(code = code_cell,
98 101 markdown = markdown_cell,
99 102 pyout = out_pyout,
100 103 display_data = out_display,
101 104 )
102 105
103 106
104 107
105 108 def convert_notebook(nb):
106 109 lines = []
107 110 for cell in nb.worksheets[0].cells:
108 111 conv = converters.get(cell.cell_type, unknown_cell)
109 112 lines.extend(conv(cell))
110 113 lines.append('')
111 114
112 115 return '\n'.join(lines)
113 116
114 117
115 118 def nb2rst(fname):
116 119 "Convert notebook to rst"
117 120
118 121 with open(fname) as f:
119 122 nb = nbformat.read(f, 'json')
120 123
121 124 rst = convert_notebook(nb)
122 125
123 126 newfname = os.path.splitext(fname)[0] + '.rst'
124 127 with open(newfname, 'w') as f:
125 128 f.write(rst.encode('utf8'))
126 129
127 130 return newfname
128 131
129 132
130 133 def rst2simplehtml(fname):
131 134 """Convert a rst file to simplified html suitable for blogger.
132 135
133 136 This just runs rst2html with certain parameters to produce really simple
134 137 html and strips the document header, so the resulting file can be easily
135 138 pasted into a blogger edit window.
136 139 """
137 140
138 141 # This is the template for the rst2html call that produces the cleanest,
139 142 # simplest html I could find. This should help in making it easier to
140 143 # paste into the blogspot html window, though I'm still having problems
141 144 # with linebreaks there...
142 145 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
143 146 "--no-generator --no-datestamp --no-source-link "
144 147 "--no-toc-backlinks --no-section-numbering "
145 148 "--strip-comments ")
146 149
147 150 cmd = "%s %s" % (cmd_template, fname)
148 151 proc = subprocess.Popen(cmd,
149 152 stdout=subprocess.PIPE,
150 153 stderr=subprocess.PIPE,
151 154 shell=True)
152 155 html, stderr = proc.communicate()
153 156 if stderr:
154 157 raise IOError(stderr)
155 158
156 159 # Make an iterator so breaking out holds state. Our implementation of
157 160 # searching for the html body below is basically a trivial little state
158 161 # machine, so we need this.
159 162 walker = iter(html.splitlines())
160 163
161 164 # Find start of main text, break out to then print until we find end /div.
162 165 # This may only work if there's a real title defined so we get a 'div class'
163 166 # tag, I haven't really tried.
164 167 for line in walker:
165 168 if line.startswith('<div class'):
166 169 break
167 170
168 171 newfname = os.path.splitext(fname)[0] + '.html'
169 172 with open(newfname, 'w') as f:
170 173 for line in walker:
171 174 if line.startswith('</div>'):
172 175 break
173 176 f.write(line)
174 177 f.write('\n')
175 178
176 179 return newfname
177 180
178 181
179 182 def main(fname):
180 183 """Convert a notebook to html in one step"""
181 184 newfname = nb2rst(fname)
182 185 rst2simplehtml(newfname)
183 186
184 187
185 188 if __name__ == '__main__':
186 189 main(sys.argv[1])
General Comments 0
You need to be logged in to leave comments. Login now