##// END OF EJS Templates
use argparse from IPython
Paul Ivanov -
Show More
@@ -1,270 +1,270 b''
1 1 #!/usr/bin/env python
2 2 """A really simple notebook to rst/html exporter.
3 3
4 4 Usage
5 5
6 6 ./nb2html.py file.ipynb
7 7
8 8 Produces 'file.rst' and 'file.html', along with auto-generated figure files
9 9 called nb_figure_NN.png.
10 10
11 11 """
12 12
13 13 import os
14 14 import subprocess
15 15 import sys
16 import argparse
16 from IPython.external import argparse
17 17 from IPython.nbformat import current as nbformat
18 18 from IPython.utils.text import indent
19 19
20 20
21 21 # Cell converters
22 22
23 23 def unknown_cell(cell):
24 24 """Default converter for cells of unknown type.
25 25 """
26 26
27 27 return rst_directive('.. warning:: Unknown cell') + \
28 28 [repr(cell)]
29 29
30 30
31 31 def rst_directive(directive, text=''):
32 32 out = [directive, '']
33 33 if text:
34 34 out.extend([indent(text), ''])
35 35 return out
36 36
37 37 # Converters for parts of a cell.
38 38
39 39
40 40 class ConversionException(Exception):
41 41 pass
42 42
43 43
44 44 class Converter(object):
45 45 default_encoding = 'utf-8'
46 46
47 47 def __init__(self, infile):
48 48 self.infile = infile
49 49 self.dirpath = os.path.dirname(infile)
50 50
51 51 @property
52 52 def extension(self):
53 53 raise ConversionException("""extension must be defined in Converter
54 54 subclass""")
55 55
56 56 def dispatch(self, cell_type):
57 57 """return cell_type dependent render method, for example render_code
58 58 """
59 59 return getattr(self, 'render_' + cell_type, unknown_cell)
60 60
61 61 def convert(self):
62 62 lines = []
63 63 for cell in self.nb.worksheets[0].cells:
64 64 conv_fn = self.dispatch(cell.cell_type)
65 65 lines.extend(conv_fn(cell))
66 66 lines.append('')
67 67 return '\n'.join(lines)
68 68
69 69 def render(self):
70 70 "read, convert, and save self.infile"
71 71 self.read()
72 72 self.output = self.convert()
73 73 return self.save()
74 74
75 75 def read(self):
76 76 "read and parse notebook into NotebookNode called self.nb"
77 77 with open(self.infile) as f:
78 78 self.nb = nbformat.read(f, 'json')
79 79
80 80 def save(self, infile=None, encoding=None):
81 81 "read and parse notebook into self.nb"
82 82 if infile is None:
83 83 infile = os.path.splitext(self.infile)[0] + '.' + self.extension
84 84 if encoding is None:
85 85 encoding = self.default_encoding
86 86 with open(infile, 'w') as f:
87 87 f.write(self.output.encode(encoding))
88 88 return infile
89 89
90 90 def render_heading(self, cell):
91 91 raise NotImplementedError
92 92
93 93 def render_code(self, cell):
94 94 raise NotImplementedError
95 95
96 96 def render_markdown(self, cell):
97 97 raise NotImplementedError
98 98
99 99 def render_pyout(self, cell):
100 100 raise NotImplementedError
101 101
102 102 def render_display_data(self, cell):
103 103 raise NotImplementedError
104 104
105 105 def render_stream(self, cell):
106 106 raise NotImplementedError
107 107
108 108
109 109 class ConverterRST(Converter):
110 110 extension = 'rst'
111 111 figures_counter = 0
112 112
113 113 def render_heading(self, cell):
114 114 """convert a heading cell to rst
115 115
116 116 Returns list."""
117 117 heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}
118 118 marker = heading_level[cell.level]
119 119 return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]
120 120
121 121 def render_code(self, cell):
122 122 """Convert a code cell to rst
123 123
124 124 Returns list."""
125 125
126 126 if not cell.input:
127 127 return []
128 128
129 129 lines = ['In[%s]:' % cell.prompt_number, '']
130 130 lines.extend(rst_directive('.. code:: python', cell.input))
131 131
132 132 for output in cell.outputs:
133 133 conv_fn = self.dispatch(output.output_type)
134 134 lines.extend(conv_fn(output))
135 135
136 136 return lines
137 137
138 138 def render_markdown(self, cell):
139 139 """convert a markdown cell to rst
140 140
141 141 Returns list."""
142 142 return [cell.source]
143 143
144 144 def render_plaintext(self, cell):
145 145 """convert plain text to rst
146 146
147 147 Returns list."""
148 148 return [cell.source]
149 149
150 150 def render_pyout(self, output):
151 151 """convert pyout part of a code cell to rst
152 152
153 153 Returns list."""
154 154
155 155 lines = ['Out[%s]:' % output.prompt_number, '']
156 156
157 157 # output is a dictionary like object with type as a key
158 158 if 'latex' in output:
159 159 lines.extend(rst_directive('.. math::', output.latex))
160 160
161 161 if 'text' in output:
162 162 lines.extend(rst_directive('.. parsed-literal::', output.text))
163 163
164 164 return lines
165 165
166 166 def render_display_data(self, output):
167 167 """convert display data from the output of a code cell to rst.
168 168
169 169 Returns list.
170 170 """
171 171 lines = []
172 172
173 173 if 'png' in output:
174 174 infile = 'nb_figure_%s.png' % self.figures_counter
175 175 fullname = os.path.join(self.dirpath, infile)
176 176 with open(fullname, 'w') as f:
177 177 f.write(output.png.decode('base64'))
178 178
179 179 self.figures_counter += 1
180 180 lines.append('.. image:: %s' % infile)
181 181 lines.append('')
182 182
183 183 return lines
184 184
185 185 def render_stream(self, output):
186 186 """convert stream part of a code cell to rst
187 187
188 188 Returns list."""
189 189
190 190 lines = []
191 191
192 192 if 'text' in output:
193 193 lines.extend(rst_directive('.. parsed-literal::', output.text))
194 194
195 195 return lines
196 196
197 197
198 198 def rst2simplehtml(infile):
199 199 """Convert a rst file to simplified html suitable for blogger.
200 200
201 201 This just runs rst2html with certain parameters to produce really simple
202 202 html and strips the document header, so the resulting file can be easily
203 203 pasted into a blogger edit window.
204 204 """
205 205
206 206 # This is the template for the rst2html call that produces the cleanest,
207 207 # simplest html I could find. This should help in making it easier to
208 208 # paste into the blogspot html window, though I'm still having problems
209 209 # with linebreaks there...
210 210 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
211 211 "--no-generator --no-datestamp --no-source-link "
212 212 "--no-toc-backlinks --no-section-numbering "
213 213 "--strip-comments ")
214 214
215 215 cmd = "%s %s" % (cmd_template, infile)
216 216 proc = subprocess.Popen(cmd,
217 217 stdout=subprocess.PIPE,
218 218 stderr=subprocess.PIPE,
219 219 shell=True)
220 220 html, stderr = proc.communicate()
221 221 if stderr:
222 222 raise IOError(stderr)
223 223
224 224 # Make an iterator so breaking out holds state. Our implementation of
225 225 # searching for the html body below is basically a trivial little state
226 226 # machine, so we need this.
227 227 walker = iter(html.splitlines())
228 228
229 229 # Find start of main text, break out to then print until we find end /div.
230 230 # This may only work if there's a real title defined so we get a 'div class'
231 231 # tag, I haven't really tried.
232 232 for line in walker:
233 233 if line.startswith('<body>'):
234 234 break
235 235
236 236 newfname = os.path.splitext(infile)[0] + '.html'
237 237 with open(newfname, 'w') as f:
238 238 for line in walker:
239 239 if line.startswith('</body>'):
240 240 break
241 241 f.write(line)
242 242 f.write('\n')
243 243
244 244 return newfname
245 245
246 246
247 247 def main(infile, format='rst'):
248 248 """Convert a notebook to html in one step"""
249 249 if format == 'rst':
250 250 converter = ConverterRST(infile)
251 251 converter.render()
252 252 elif format == 'html':
253 253 #Currently, conversion to html is a 2 step process, nb->rst->html
254 254 converter = ConverterRST(infile)
255 255 rstfname = converter.render()
256 256 rst2simplehtml(rstfname)
257 257
258 258
259 259 if __name__ == '__main__':
260 260 parser = argparse.ArgumentParser(description='nbconvert: Convert IPython notebooks to other formats')
261 261
262 262 # TODO: consider passing file like object around, rather than filenames
263 263 # would allow us to process stdin, or even http streams
264 264 #parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
265 265
266 266 #Require a filename as a positional argument
267 267 parser.add_argument('infile', nargs=1)
268 268 parser.add_argument('-f', '--format', default='rst')
269 269 args = parser.parse_args()
270 270 main(infile=args.infile[0], format=args.format)
General Comments 0
You need to be logged in to leave comments. Login now