##// END OF EJS Templates
Added heading cell converter. Also changed the down-parser for output to html (from looking for <div> and </div> to <body> and </body>.
smithj1 -
Show More
@@ -1,189 +1,198 b''
1 1 #!/usr/bin/env python
2 2 """A really simple notebook to rst/html exporter.
3 3
4 4 Usage
5 5
6 6 ./nb2html.py file.ipynb
7 7
8 8 Produces 'file.rst' and 'file.html', along with auto-generated figure files
9 9 called nb_figure_NN.png.
10 10
11 11 """
12 12
13 13 import os
14 14 import subprocess
15 15 import sys
16 16
17 17 from IPython.nbformat import current as nbformat
18 18 from IPython.utils.text import wrap_paragraphs, indent
19 19
20 20
21 21 # Cell converters
22 22
23 23 def unknown_cell(cell):
24 24 """Default converter for cells of unknown type.
25 25 """
26 26
27 27 return rst_directive('.. warning:: Unknown cell') + \
28 28 [repr(cell)]
29 29
30 def heading_cell(cell):
31 """convert a heading cell to rst
32
33 Returns list."""
34 heading_level = {1:'=', 2:'-', 3:'`', 4:'\'', 5:'.',6:'~'}
35 marker = heading_level[cell.level]
36 return ['{0}\n{1}\n'.format(cell.source, marker*len(cell.source))]
37
30 38 def markdown_cell(cell):
31 39 """convert a markdown cell to rst
32 40
33 41 Returns list."""
34 42 return [cell.source]
35 43
36 44
37 45 def rst_directive(directive, text=''):
38 46 out = [directive, '']
39 47 if text:
40 48 out.extend([indent(text), ''])
41 49 return out
42 50
43 51 def code_cell(cell):
44 52 """Convert a code cell to rst
45 53
46 54 Returns list."""
47 55
48 56 if not cell.input:
49 57 return []
50 58
51 59 lines = ['In[%s]:' % cell.prompt_number, '']
52 60 lines.extend(rst_directive('.. code:: python', cell.input))
53 61
54 62 for output in cell.outputs:
55 63 conv = converters.get(output.output_type, unknown_cell)
56 64 lines.extend(conv(output))
57 65
58 66 return lines
59 67
60 68 # Converters for parts of a cell.
61 69 figures_counter = 1
62 70
63 71 def out_display(output):
64 72 """convert display data from the output of a code cell to rst.
65 73
66 74 Returns list.
67 75 """
68 76 global figures_counter
69 77
70 78 lines = []
71 79
72 80 if 'png' in output:
73 81 fname = 'nb_figure_%s.png' % figures_counter
74 82 with open(fname, 'w') as f:
75 83 f.write(output.png.decode('base64'))
76 84
77 85 figures_counter += 1
78 86 lines.append('.. image:: %s' % fname)
79 87 lines.append('')
80 88
81 89 return lines
82 90
83 91
84 92 def out_pyout(output):
85 93 """convert pyout part of a code cell to rst
86 94
87 95 Returns list."""
88 96
89 97 lines = ['Out[%s]:' % output.prompt_number, '']
90 98
91 99 if 'latex' in output:
92 100 lines.extend(rst_directive('.. math::', output.latex))
93 101
94 102 if 'text' in output:
95 103 lines.extend(rst_directive('.. parsed-literal::', output.text))
96 104
97 105 return lines
98 106
99 107
100 converters = dict(code = code_cell,
108 converters = dict(heading = heading_cell,
109 code = code_cell,
101 110 markdown = markdown_cell,
102 111 pyout = out_pyout,
103 112 display_data = out_display,
104 113 )
105 114
106 115
107 116
108 117 def convert_notebook(nb):
109 118 lines = []
110 119 for cell in nb.worksheets[0].cells:
111 120 conv = converters.get(cell.cell_type, unknown_cell)
112 121 lines.extend(conv(cell))
113 122 lines.append('')
114 123
115 124 return '\n'.join(lines)
116 125
117 126
118 127 def nb2rst(fname):
119 128 "Convert notebook to rst"
120 129
121 130 with open(fname) as f:
122 131 nb = nbformat.read(f, 'json')
123 132
124 133 rst = convert_notebook(nb)
125 134
126 135 newfname = os.path.splitext(fname)[0] + '.rst'
127 136 with open(newfname, 'w') as f:
128 137 f.write(rst.encode('utf8'))
129 138
130 139 return newfname
131 140
132 141
133 142 def rst2simplehtml(fname):
134 143 """Convert a rst file to simplified html suitable for blogger.
135 144
136 145 This just runs rst2html with certain parameters to produce really simple
137 146 html and strips the document header, so the resulting file can be easily
138 147 pasted into a blogger edit window.
139 148 """
140 149
141 150 # This is the template for the rst2html call that produces the cleanest,
142 151 # simplest html I could find. This should help in making it easier to
143 152 # paste into the blogspot html window, though I'm still having problems
144 153 # with linebreaks there...
145 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
154 cmd_template = ("rst2html.py --link-stylesheet --no-xml-declaration "
146 155 "--no-generator --no-datestamp --no-source-link "
147 156 "--no-toc-backlinks --no-section-numbering "
148 157 "--strip-comments ")
149 158
150 159 cmd = "%s %s" % (cmd_template, fname)
151 160 proc = subprocess.Popen(cmd,
152 161 stdout=subprocess.PIPE,
153 162 stderr=subprocess.PIPE,
154 163 shell=True)
155 164 html, stderr = proc.communicate()
156 165 if stderr:
157 166 raise IOError(stderr)
158 167
159 168 # Make an iterator so breaking out holds state. Our implementation of
160 169 # searching for the html body below is basically a trivial little state
161 170 # machine, so we need this.
162 171 walker = iter(html.splitlines())
163 172
164 173 # Find start of main text, break out to then print until we find end /div.
165 174 # This may only work if there's a real title defined so we get a 'div class'
166 175 # tag, I haven't really tried.
167 176 for line in walker:
168 if line.startswith('<div class'):
177 if line.startswith('<body>'):
169 178 break
170 179
171 180 newfname = os.path.splitext(fname)[0] + '.html'
172 181 with open(newfname, 'w') as f:
173 182 for line in walker:
174 if line.startswith('</div>'):
183 if line.startswith('</body>'):
175 184 break
176 185 f.write(line)
177 186 f.write('\n')
178 187
179 188 return newfname
180 189
181 190
182 191 def main(fname):
183 192 """Convert a notebook to html in one step"""
184 193 newfname = nb2rst(fname)
185 194 rst2simplehtml(newfname)
186 195
187 196
188 197 if __name__ == '__main__':
189 198 main(sys.argv[1])
General Comments 0
You need to be logged in to leave comments. Login now