##// END OF EJS Templates
Added heading cell converter. Also changed the down-parser for output to html (from looking for <div> and </div> to <body> and </body>.
smithj1 -
Show More
@@ -1,189 +1,198 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 """A really simple notebook to rst/html exporter.
2 """A really simple notebook to rst/html exporter.
3
3
4 Usage
4 Usage
5
5
6 ./nb2html.py file.ipynb
6 ./nb2html.py file.ipynb
7
7
8 Produces 'file.rst' and 'file.html', along with auto-generated figure files
8 Produces 'file.rst' and 'file.html', along with auto-generated figure files
9 called nb_figure_NN.png.
9 called nb_figure_NN.png.
10
10
11 """
11 """
12
12
13 import os
13 import os
14 import subprocess
14 import subprocess
15 import sys
15 import sys
16
16
17 from IPython.nbformat import current as nbformat
17 from IPython.nbformat import current as nbformat
18 from IPython.utils.text import wrap_paragraphs, indent
18 from IPython.utils.text import wrap_paragraphs, indent
19
19
20
20
21 # Cell converters
21 # Cell converters
22
22
23 def unknown_cell(cell):
23 def unknown_cell(cell):
24 """Default converter for cells of unknown type.
24 """Default converter for cells of unknown type.
25 """
25 """
26
26
27 return rst_directive('.. warning:: Unknown cell') + \
27 return rst_directive('.. warning:: Unknown cell') + \
28 [repr(cell)]
28 [repr(cell)]
29
29
30 def heading_cell(cell):
31 """convert a heading cell to rst
32
33 Returns list."""
34 heading_level = {1:'=', 2:'-', 3:'`', 4:'\'', 5:'.',6:'~'}
35 marker = heading_level[cell.level]
36 return ['{0}\n{1}\n'.format(cell.source, marker*len(cell.source))]
37
30 def markdown_cell(cell):
38 def markdown_cell(cell):
31 """convert a markdown cell to rst
39 """convert a markdown cell to rst
32
40
33 Returns list."""
41 Returns list."""
34 return [cell.source]
42 return [cell.source]
35
43
36
44
37 def rst_directive(directive, text=''):
45 def rst_directive(directive, text=''):
38 out = [directive, '']
46 out = [directive, '']
39 if text:
47 if text:
40 out.extend([indent(text), ''])
48 out.extend([indent(text), ''])
41 return out
49 return out
42
50
43 def code_cell(cell):
51 def code_cell(cell):
44 """Convert a code cell to rst
52 """Convert a code cell to rst
45
53
46 Returns list."""
54 Returns list."""
47
55
48 if not cell.input:
56 if not cell.input:
49 return []
57 return []
50
58
51 lines = ['In[%s]:' % cell.prompt_number, '']
59 lines = ['In[%s]:' % cell.prompt_number, '']
52 lines.extend(rst_directive('.. code:: python', cell.input))
60 lines.extend(rst_directive('.. code:: python', cell.input))
53
61
54 for output in cell.outputs:
62 for output in cell.outputs:
55 conv = converters.get(output.output_type, unknown_cell)
63 conv = converters.get(output.output_type, unknown_cell)
56 lines.extend(conv(output))
64 lines.extend(conv(output))
57
65
58 return lines
66 return lines
59
67
60 # Converters for parts of a cell.
68 # Converters for parts of a cell.
61 figures_counter = 1
69 figures_counter = 1
62
70
63 def out_display(output):
71 def out_display(output):
64 """convert display data from the output of a code cell to rst.
72 """convert display data from the output of a code cell to rst.
65
73
66 Returns list.
74 Returns list.
67 """
75 """
68 global figures_counter
76 global figures_counter
69
77
70 lines = []
78 lines = []
71
79
72 if 'png' in output:
80 if 'png' in output:
73 fname = 'nb_figure_%s.png' % figures_counter
81 fname = 'nb_figure_%s.png' % figures_counter
74 with open(fname, 'w') as f:
82 with open(fname, 'w') as f:
75 f.write(output.png.decode('base64'))
83 f.write(output.png.decode('base64'))
76
84
77 figures_counter += 1
85 figures_counter += 1
78 lines.append('.. image:: %s' % fname)
86 lines.append('.. image:: %s' % fname)
79 lines.append('')
87 lines.append('')
80
88
81 return lines
89 return lines
82
90
83
91
84 def out_pyout(output):
92 def out_pyout(output):
85 """convert pyout part of a code cell to rst
93 """convert pyout part of a code cell to rst
86
94
87 Returns list."""
95 Returns list."""
88
96
89 lines = ['Out[%s]:' % output.prompt_number, '']
97 lines = ['Out[%s]:' % output.prompt_number, '']
90
98
91 if 'latex' in output:
99 if 'latex' in output:
92 lines.extend(rst_directive('.. math::', output.latex))
100 lines.extend(rst_directive('.. math::', output.latex))
93
101
94 if 'text' in output:
102 if 'text' in output:
95 lines.extend(rst_directive('.. parsed-literal::', output.text))
103 lines.extend(rst_directive('.. parsed-literal::', output.text))
96
104
97 return lines
105 return lines
98
106
99
107
100 converters = dict(code = code_cell,
108 converters = dict(heading = heading_cell,
109 code = code_cell,
101 markdown = markdown_cell,
110 markdown = markdown_cell,
102 pyout = out_pyout,
111 pyout = out_pyout,
103 display_data = out_display,
112 display_data = out_display,
104 )
113 )
105
114
106
115
107
116
108 def convert_notebook(nb):
117 def convert_notebook(nb):
109 lines = []
118 lines = []
110 for cell in nb.worksheets[0].cells:
119 for cell in nb.worksheets[0].cells:
111 conv = converters.get(cell.cell_type, unknown_cell)
120 conv = converters.get(cell.cell_type, unknown_cell)
112 lines.extend(conv(cell))
121 lines.extend(conv(cell))
113 lines.append('')
122 lines.append('')
114
123
115 return '\n'.join(lines)
124 return '\n'.join(lines)
116
125
117
126
118 def nb2rst(fname):
127 def nb2rst(fname):
119 "Convert notebook to rst"
128 "Convert notebook to rst"
120
129
121 with open(fname) as f:
130 with open(fname) as f:
122 nb = nbformat.read(f, 'json')
131 nb = nbformat.read(f, 'json')
123
132
124 rst = convert_notebook(nb)
133 rst = convert_notebook(nb)
125
134
126 newfname = os.path.splitext(fname)[0] + '.rst'
135 newfname = os.path.splitext(fname)[0] + '.rst'
127 with open(newfname, 'w') as f:
136 with open(newfname, 'w') as f:
128 f.write(rst.encode('utf8'))
137 f.write(rst.encode('utf8'))
129
138
130 return newfname
139 return newfname
131
140
132
141
133 def rst2simplehtml(fname):
142 def rst2simplehtml(fname):
134 """Convert a rst file to simplified html suitable for blogger.
143 """Convert a rst file to simplified html suitable for blogger.
135
144
136 This just runs rst2html with certain parameters to produce really simple
145 This just runs rst2html with certain parameters to produce really simple
137 html and strips the document header, so the resulting file can be easily
146 html and strips the document header, so the resulting file can be easily
138 pasted into a blogger edit window.
147 pasted into a blogger edit window.
139 """
148 """
140
149
141 # This is the template for the rst2html call that produces the cleanest,
150 # This is the template for the rst2html call that produces the cleanest,
142 # simplest html I could find. This should help in making it easier to
151 # simplest html I could find. This should help in making it easier to
143 # paste into the blogspot html window, though I'm still having problems
152 # paste into the blogspot html window, though I'm still having problems
144 # with linebreaks there...
153 # with linebreaks there...
145 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
154 cmd_template = ("rst2html.py --link-stylesheet --no-xml-declaration "
146 "--no-generator --no-datestamp --no-source-link "
155 "--no-generator --no-datestamp --no-source-link "
147 "--no-toc-backlinks --no-section-numbering "
156 "--no-toc-backlinks --no-section-numbering "
148 "--strip-comments ")
157 "--strip-comments ")
149
158
150 cmd = "%s %s" % (cmd_template, fname)
159 cmd = "%s %s" % (cmd_template, fname)
151 proc = subprocess.Popen(cmd,
160 proc = subprocess.Popen(cmd,
152 stdout=subprocess.PIPE,
161 stdout=subprocess.PIPE,
153 stderr=subprocess.PIPE,
162 stderr=subprocess.PIPE,
154 shell=True)
163 shell=True)
155 html, stderr = proc.communicate()
164 html, stderr = proc.communicate()
156 if stderr:
165 if stderr:
157 raise IOError(stderr)
166 raise IOError(stderr)
158
167
159 # Make an iterator so breaking out holds state. Our implementation of
168 # Make an iterator so breaking out holds state. Our implementation of
160 # searching for the html body below is basically a trivial little state
169 # searching for the html body below is basically a trivial little state
161 # machine, so we need this.
170 # machine, so we need this.
162 walker = iter(html.splitlines())
171 walker = iter(html.splitlines())
163
172
164 # Find start of main text, break out to then print until we find end /div.
173 # Find start of main text, break out to then print until we find end /div.
165 # This may only work if there's a real title defined so we get a 'div class'
174 # This may only work if there's a real title defined so we get a 'div class'
166 # tag, I haven't really tried.
175 # tag, I haven't really tried.
167 for line in walker:
176 for line in walker:
168 if line.startswith('<div class'):
177 if line.startswith('<body>'):
169 break
178 break
170
179
171 newfname = os.path.splitext(fname)[0] + '.html'
180 newfname = os.path.splitext(fname)[0] + '.html'
172 with open(newfname, 'w') as f:
181 with open(newfname, 'w') as f:
173 for line in walker:
182 for line in walker:
174 if line.startswith('</div>'):
183 if line.startswith('</body>'):
175 break
184 break
176 f.write(line)
185 f.write(line)
177 f.write('\n')
186 f.write('\n')
178
187
179 return newfname
188 return newfname
180
189
181
190
182 def main(fname):
191 def main(fname):
183 """Convert a notebook to html in one step"""
192 """Convert a notebook to html in one step"""
184 newfname = nb2rst(fname)
193 newfname = nb2rst(fname)
185 rst2simplehtml(newfname)
194 rst2simplehtml(newfname)
186
195
187
196
188 if __name__ == '__main__':
197 if __name__ == '__main__':
189 main(sys.argv[1])
198 main(sys.argv[1])
General Comments 0
You need to be logged in to leave comments. Login now