##// END OF EJS Templates
use argparse from IPython
Paul Ivanov -
Show More
@@ -1,270 +1,270 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 """A really simple notebook to rst/html exporter.
2 """A really simple notebook to rst/html exporter.
3
3
4 Usage
4 Usage
5
5
6 ./nb2html.py file.ipynb
6 ./nb2html.py file.ipynb
7
7
8 Produces 'file.rst' and 'file.html', along with auto-generated figure files
8 Produces 'file.rst' and 'file.html', along with auto-generated figure files
9 called nb_figure_NN.png.
9 called nb_figure_NN.png.
10
10
11 """
11 """
12
12
13 import os
13 import os
14 import subprocess
14 import subprocess
15 import sys
15 import sys
16 import argparse
16 from IPython.external import argparse
17 from IPython.nbformat import current as nbformat
17 from IPython.nbformat import current as nbformat
18 from IPython.utils.text import indent
18 from IPython.utils.text import indent
19
19
20
20
21 # Cell converters
21 # Cell converters
22
22
23 def unknown_cell(cell):
23 def unknown_cell(cell):
24 """Default converter for cells of unknown type.
24 """Default converter for cells of unknown type.
25 """
25 """
26
26
27 return rst_directive('.. warning:: Unknown cell') + \
27 return rst_directive('.. warning:: Unknown cell') + \
28 [repr(cell)]
28 [repr(cell)]
29
29
30
30
31 def rst_directive(directive, text=''):
31 def rst_directive(directive, text=''):
32 out = [directive, '']
32 out = [directive, '']
33 if text:
33 if text:
34 out.extend([indent(text), ''])
34 out.extend([indent(text), ''])
35 return out
35 return out
36
36
37 # Converters for parts of a cell.
37 # Converters for parts of a cell.
38
38
39
39
40 class ConversionException(Exception):
40 class ConversionException(Exception):
41 pass
41 pass
42
42
43
43
44 class Converter(object):
44 class Converter(object):
45 default_encoding = 'utf-8'
45 default_encoding = 'utf-8'
46
46
47 def __init__(self, infile):
47 def __init__(self, infile):
48 self.infile = infile
48 self.infile = infile
49 self.dirpath = os.path.dirname(infile)
49 self.dirpath = os.path.dirname(infile)
50
50
51 @property
51 @property
52 def extension(self):
52 def extension(self):
53 raise ConversionException("""extension must be defined in Converter
53 raise ConversionException("""extension must be defined in Converter
54 subclass""")
54 subclass""")
55
55
56 def dispatch(self, cell_type):
56 def dispatch(self, cell_type):
57 """return cell_type dependent render method, for example render_code
57 """return cell_type dependent render method, for example render_code
58 """
58 """
59 return getattr(self, 'render_' + cell_type, unknown_cell)
59 return getattr(self, 'render_' + cell_type, unknown_cell)
60
60
61 def convert(self):
61 def convert(self):
62 lines = []
62 lines = []
63 for cell in self.nb.worksheets[0].cells:
63 for cell in self.nb.worksheets[0].cells:
64 conv_fn = self.dispatch(cell.cell_type)
64 conv_fn = self.dispatch(cell.cell_type)
65 lines.extend(conv_fn(cell))
65 lines.extend(conv_fn(cell))
66 lines.append('')
66 lines.append('')
67 return '\n'.join(lines)
67 return '\n'.join(lines)
68
68
69 def render(self):
69 def render(self):
70 "read, convert, and save self.infile"
70 "read, convert, and save self.infile"
71 self.read()
71 self.read()
72 self.output = self.convert()
72 self.output = self.convert()
73 return self.save()
73 return self.save()
74
74
75 def read(self):
75 def read(self):
76 "read and parse notebook into NotebookNode called self.nb"
76 "read and parse notebook into NotebookNode called self.nb"
77 with open(self.infile) as f:
77 with open(self.infile) as f:
78 self.nb = nbformat.read(f, 'json')
78 self.nb = nbformat.read(f, 'json')
79
79
80 def save(self, infile=None, encoding=None):
80 def save(self, infile=None, encoding=None):
81 "read and parse notebook into self.nb"
81 "read and parse notebook into self.nb"
82 if infile is None:
82 if infile is None:
83 infile = os.path.splitext(self.infile)[0] + '.' + self.extension
83 infile = os.path.splitext(self.infile)[0] + '.' + self.extension
84 if encoding is None:
84 if encoding is None:
85 encoding = self.default_encoding
85 encoding = self.default_encoding
86 with open(infile, 'w') as f:
86 with open(infile, 'w') as f:
87 f.write(self.output.encode(encoding))
87 f.write(self.output.encode(encoding))
88 return infile
88 return infile
89
89
90 def render_heading(self, cell):
90 def render_heading(self, cell):
91 raise NotImplementedError
91 raise NotImplementedError
92
92
93 def render_code(self, cell):
93 def render_code(self, cell):
94 raise NotImplementedError
94 raise NotImplementedError
95
95
96 def render_markdown(self, cell):
96 def render_markdown(self, cell):
97 raise NotImplementedError
97 raise NotImplementedError
98
98
99 def render_pyout(self, cell):
99 def render_pyout(self, cell):
100 raise NotImplementedError
100 raise NotImplementedError
101
101
102 def render_display_data(self, cell):
102 def render_display_data(self, cell):
103 raise NotImplementedError
103 raise NotImplementedError
104
104
105 def render_stream(self, cell):
105 def render_stream(self, cell):
106 raise NotImplementedError
106 raise NotImplementedError
107
107
108
108
109 class ConverterRST(Converter):
109 class ConverterRST(Converter):
110 extension = 'rst'
110 extension = 'rst'
111 figures_counter = 0
111 figures_counter = 0
112
112
113 def render_heading(self, cell):
113 def render_heading(self, cell):
114 """convert a heading cell to rst
114 """convert a heading cell to rst
115
115
116 Returns list."""
116 Returns list."""
117 heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}
117 heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}
118 marker = heading_level[cell.level]
118 marker = heading_level[cell.level]
119 return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]
119 return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]
120
120
121 def render_code(self, cell):
121 def render_code(self, cell):
122 """Convert a code cell to rst
122 """Convert a code cell to rst
123
123
124 Returns list."""
124 Returns list."""
125
125
126 if not cell.input:
126 if not cell.input:
127 return []
127 return []
128
128
129 lines = ['In[%s]:' % cell.prompt_number, '']
129 lines = ['In[%s]:' % cell.prompt_number, '']
130 lines.extend(rst_directive('.. code:: python', cell.input))
130 lines.extend(rst_directive('.. code:: python', cell.input))
131
131
132 for output in cell.outputs:
132 for output in cell.outputs:
133 conv_fn = self.dispatch(output.output_type)
133 conv_fn = self.dispatch(output.output_type)
134 lines.extend(conv_fn(output))
134 lines.extend(conv_fn(output))
135
135
136 return lines
136 return lines
137
137
138 def render_markdown(self, cell):
138 def render_markdown(self, cell):
139 """convert a markdown cell to rst
139 """convert a markdown cell to rst
140
140
141 Returns list."""
141 Returns list."""
142 return [cell.source]
142 return [cell.source]
143
143
144 def render_plaintext(self, cell):
144 def render_plaintext(self, cell):
145 """convert plain text to rst
145 """convert plain text to rst
146
146
147 Returns list."""
147 Returns list."""
148 return [cell.source]
148 return [cell.source]
149
149
150 def render_pyout(self, output):
150 def render_pyout(self, output):
151 """convert pyout part of a code cell to rst
151 """convert pyout part of a code cell to rst
152
152
153 Returns list."""
153 Returns list."""
154
154
155 lines = ['Out[%s]:' % output.prompt_number, '']
155 lines = ['Out[%s]:' % output.prompt_number, '']
156
156
157 # output is a dictionary like object with type as a key
157 # output is a dictionary like object with type as a key
158 if 'latex' in output:
158 if 'latex' in output:
159 lines.extend(rst_directive('.. math::', output.latex))
159 lines.extend(rst_directive('.. math::', output.latex))
160
160
161 if 'text' in output:
161 if 'text' in output:
162 lines.extend(rst_directive('.. parsed-literal::', output.text))
162 lines.extend(rst_directive('.. parsed-literal::', output.text))
163
163
164 return lines
164 return lines
165
165
166 def render_display_data(self, output):
166 def render_display_data(self, output):
167 """convert display data from the output of a code cell to rst.
167 """convert display data from the output of a code cell to rst.
168
168
169 Returns list.
169 Returns list.
170 """
170 """
171 lines = []
171 lines = []
172
172
173 if 'png' in output:
173 if 'png' in output:
174 infile = 'nb_figure_%s.png' % self.figures_counter
174 infile = 'nb_figure_%s.png' % self.figures_counter
175 fullname = os.path.join(self.dirpath, infile)
175 fullname = os.path.join(self.dirpath, infile)
176 with open(fullname, 'w') as f:
176 with open(fullname, 'w') as f:
177 f.write(output.png.decode('base64'))
177 f.write(output.png.decode('base64'))
178
178
179 self.figures_counter += 1
179 self.figures_counter += 1
180 lines.append('.. image:: %s' % infile)
180 lines.append('.. image:: %s' % infile)
181 lines.append('')
181 lines.append('')
182
182
183 return lines
183 return lines
184
184
185 def render_stream(self, output):
185 def render_stream(self, output):
186 """convert stream part of a code cell to rst
186 """convert stream part of a code cell to rst
187
187
188 Returns list."""
188 Returns list."""
189
189
190 lines = []
190 lines = []
191
191
192 if 'text' in output:
192 if 'text' in output:
193 lines.extend(rst_directive('.. parsed-literal::', output.text))
193 lines.extend(rst_directive('.. parsed-literal::', output.text))
194
194
195 return lines
195 return lines
196
196
197
197
198 def rst2simplehtml(infile):
198 def rst2simplehtml(infile):
199 """Convert a rst file to simplified html suitable for blogger.
199 """Convert a rst file to simplified html suitable for blogger.
200
200
201 This just runs rst2html with certain parameters to produce really simple
201 This just runs rst2html with certain parameters to produce really simple
202 html and strips the document header, so the resulting file can be easily
202 html and strips the document header, so the resulting file can be easily
203 pasted into a blogger edit window.
203 pasted into a blogger edit window.
204 """
204 """
205
205
206 # This is the template for the rst2html call that produces the cleanest,
206 # This is the template for the rst2html call that produces the cleanest,
207 # simplest html I could find. This should help in making it easier to
207 # simplest html I could find. This should help in making it easier to
208 # paste into the blogspot html window, though I'm still having problems
208 # paste into the blogspot html window, though I'm still having problems
209 # with linebreaks there...
209 # with linebreaks there...
210 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
210 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
211 "--no-generator --no-datestamp --no-source-link "
211 "--no-generator --no-datestamp --no-source-link "
212 "--no-toc-backlinks --no-section-numbering "
212 "--no-toc-backlinks --no-section-numbering "
213 "--strip-comments ")
213 "--strip-comments ")
214
214
215 cmd = "%s %s" % (cmd_template, infile)
215 cmd = "%s %s" % (cmd_template, infile)
216 proc = subprocess.Popen(cmd,
216 proc = subprocess.Popen(cmd,
217 stdout=subprocess.PIPE,
217 stdout=subprocess.PIPE,
218 stderr=subprocess.PIPE,
218 stderr=subprocess.PIPE,
219 shell=True)
219 shell=True)
220 html, stderr = proc.communicate()
220 html, stderr = proc.communicate()
221 if stderr:
221 if stderr:
222 raise IOError(stderr)
222 raise IOError(stderr)
223
223
224 # Make an iterator so breaking out holds state. Our implementation of
224 # Make an iterator so breaking out holds state. Our implementation of
225 # searching for the html body below is basically a trivial little state
225 # searching for the html body below is basically a trivial little state
226 # machine, so we need this.
226 # machine, so we need this.
227 walker = iter(html.splitlines())
227 walker = iter(html.splitlines())
228
228
229 # Find start of main text, break out to then print until we find end /div.
229 # Find start of main text, break out to then print until we find end /div.
230 # This may only work if there's a real title defined so we get a 'div class'
230 # This may only work if there's a real title defined so we get a 'div class'
231 # tag, I haven't really tried.
231 # tag, I haven't really tried.
232 for line in walker:
232 for line in walker:
233 if line.startswith('<body>'):
233 if line.startswith('<body>'):
234 break
234 break
235
235
236 newfname = os.path.splitext(infile)[0] + '.html'
236 newfname = os.path.splitext(infile)[0] + '.html'
237 with open(newfname, 'w') as f:
237 with open(newfname, 'w') as f:
238 for line in walker:
238 for line in walker:
239 if line.startswith('</body>'):
239 if line.startswith('</body>'):
240 break
240 break
241 f.write(line)
241 f.write(line)
242 f.write('\n')
242 f.write('\n')
243
243
244 return newfname
244 return newfname
245
245
246
246
247 def main(infile, format='rst'):
247 def main(infile, format='rst'):
248 """Convert a notebook to html in one step"""
248 """Convert a notebook to html in one step"""
249 if format == 'rst':
249 if format == 'rst':
250 converter = ConverterRST(infile)
250 converter = ConverterRST(infile)
251 converter.render()
251 converter.render()
252 elif format == 'html':
252 elif format == 'html':
253 #Currently, conversion to html is a 2 step process, nb->rst->html
253 #Currently, conversion to html is a 2 step process, nb->rst->html
254 converter = ConverterRST(infile)
254 converter = ConverterRST(infile)
255 rstfname = converter.render()
255 rstfname = converter.render()
256 rst2simplehtml(rstfname)
256 rst2simplehtml(rstfname)
257
257
258
258
259 if __name__ == '__main__':
259 if __name__ == '__main__':
260 parser = argparse.ArgumentParser(description='nbconvert: Convert IPython notebooks to other formats')
260 parser = argparse.ArgumentParser(description='nbconvert: Convert IPython notebooks to other formats')
261
261
262 # TODO: consider passing file like object around, rather than filenames
262 # TODO: consider passing file like object around, rather than filenames
263 # would allow us to process stdin, or even http streams
263 # would allow us to process stdin, or even http streams
264 #parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
264 #parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
265
265
266 #Require a filename as a positional argument
266 #Require a filename as a positional argument
267 parser.add_argument('infile', nargs=1)
267 parser.add_argument('infile', nargs=1)
268 parser.add_argument('-f', '--format', default='rst')
268 parser.add_argument('-f', '--format', default='rst')
269 args = parser.parse_args()
269 args = parser.parse_args()
270 main(infile=args.infile[0], format=args.format)
270 main(infile=args.infile[0], format=args.format)
General Comments 0
You need to be logged in to leave comments. Login now