upstream/ipython Commit - r6222:72505349

1

#!/usr/bin/env python

1

#!/usr/bin/env python

2

"""A really simple notebook to rst/html exporter.

2

"""A really simple notebook to rst/html exporter.

3

4

Usage

4

Usage

5

6

./nb2html.py file.ipynb

6

./nb2html.py file.ipynb

7

8

Produces 'file.rst' and 'file.html', along with auto-generated figure files

8

Produces 'file.rst' and 'file.html', along with auto-generated figure files

9

called nb_figure_NN.png.

9

called nb_figure_NN.png.

10

11

"""

11

"""

12

13

import os

13

import os

14

import subprocess

14

import subprocess

15

import sys

15

import sys

16

17

from IPython.nbformat import current as nbformat

17

from IPython.nbformat import current as nbformat

18

from IPython.utils.text import wrap_paragraphs, indent

18

from IPython.utils.text import wrap_paragraphs, indent

19

20

21

# Cell converters

21

# Cell converters

22

23

def unknown_cell(cell):

23

def unknown_cell(cell):

24

"""Default converter for cells of unknown type.

24

"""Default converter for cells of unknown type.

25

"""

25

"""

26

27

return [rst_directive('.. warning:: Unknown cell'),

27

return rst_directive('.. warning:: Unknown cell') + \

28

repr(cell)]

28

[repr(cell)]

29

30

def markdown_cell(cell):

30

def markdown_cell(cell):

31

"""convert a markdown cell to rst

31

"""convert a markdown cell to rst

32

33

Returns list."""

33

Returns list."""

34

return [cell.source]

34

return [cell.source]

35

36

37

def rst_directive(directive, text):

37

def rst_directive(directive, text=''):

38

~~return~~ [directive, '', ~~indent~~(~~text~~), '']

38

out = [directive, '']

39

if text:

40

out.extend([indent(text), ''])

41

return out

39

42

40

def code_cell(cell):

43

def code_cell(cell):

41

"""Convert a code cell to rst

44

"""Convert a code cell to rst

42

45

43

Returns list."""

46

Returns list."""

44

47

45

if not cell.input:

48

if not cell.input:

46

return []

49

return []

47

50

48

lines = ['In[%s]:' % cell.prompt_number, '']

51

lines = ['In[%s]:' % cell.prompt_number, '']

49

lines.extend(rst_directive('.. code:: python', cell.input))

52

lines.extend(rst_directive('.. code:: python', cell.input))

50

53

51

for output in cell.outputs:

54

for output in cell.outputs:

52

conv = converters[output.output_type]

55

conv = converters.get(output.output_type, unknown_cell)

53

lines.extend(conv(output))

56

lines.extend(conv(output))

54

57

55

return lines

58

return lines

56

59

57

# Converters for parts of a cell.

60

# Converters for parts of a cell.

58

figures_counter = 1

61

figures_counter = 1

59

62

60

def out_display(output):

63

def out_display(output):

61

"""convert display data from the output of a code cell to rst.

64

"""convert display data from the output of a code cell to rst.

62

65

63

Returns list.

66

Returns list.

64

"""

67

"""

65

global figures_counter

68

global figures_counter

66

69

67

lines = []

70

lines = []

68

71

69

if 'png' in output:

72

if 'png' in output:

70

fname = 'nb_figure_%s.png' % figures_counter

73

fname = 'nb_figure_%s.png' % figures_counter

71

with open(fname, 'w') as f:

74

with open(fname, 'w') as f:

72

f.write(output.png.decode('base64'))

75

f.write(output.png.decode('base64'))

73

76

74

figures_counter += 1

77

figures_counter += 1

75

lines.append('.. image:: %s' % fname)

78

lines.append('.. image:: %s' % fname)

76

lines.append('')

79

lines.append('')

77

80

78

return lines

81

return lines

79

82

80

83

81

def out_pyout(output):

84

def out_pyout(output):

82

"""convert pyout part of a code cell to rst

85

"""convert pyout part of a code cell to rst

83

86

84

Returns list."""

87

Returns list."""

85

88

86

lines = ['Out[%s]:' % output.prompt_number, '']

89

lines = ['Out[%s]:' % output.prompt_number, '']

87

90

88

if 'latex' in output:

91

if 'latex' in output:

89

lines.extend(rst_directive('.. math::', output.latex))

92

lines.extend(rst_directive('.. math::', output.latex))

90

93

91

if 'text' in output:

94

if 'text' in output:

92

lines.extend(rst_directive('.. parsed-literal::', output.text))

95

lines.extend(rst_directive('.. parsed-literal::', output.text))

93

96

94

return lines

97

return lines

95

98

96

99

97

converters = dict(code = code_cell,

100

converters = dict(code = code_cell,

98

markdown = markdown_cell,

101

markdown = markdown_cell,

99

pyout = out_pyout,

102

pyout = out_pyout,

100

display_data = out_display,

103

display_data = out_display,

101

)

104

)

102

105

103

106

104

107

105

def convert_notebook(nb):

108

def convert_notebook(nb):

106

lines = []

109

lines = []

107

for cell in nb.worksheets[0].cells:

110

for cell in nb.worksheets[0].cells:

108

conv = converters.get(cell.cell_type, unknown_cell)

111

conv = converters.get(cell.cell_type, unknown_cell)

109

lines.extend(conv(cell))

112

lines.extend(conv(cell))

110

lines.append('')

113

lines.append('')

111

114

112

return '\n'.join(lines)

115

return '\n'.join(lines)

113

116

114

117

115

def nb2rst(fname):

118

def nb2rst(fname):

116

"Convert notebook to rst"

119

"Convert notebook to rst"

117

120

118

with open(fname) as f:

121

with open(fname) as f:

119

nb = nbformat.read(f, 'json')

122

nb = nbformat.read(f, 'json')

120

123

121

rst = convert_notebook(nb)

124

rst = convert_notebook(nb)

122

125

123

newfname = os.path.splitext(fname)[0] + '.rst'

126

newfname = os.path.splitext(fname)[0] + '.rst'

124

with open(newfname, 'w') as f:

127

with open(newfname, 'w') as f:

125

f.write(rst.encode('utf8'))

128

f.write(rst.encode('utf8'))

126

129

127

return newfname

130

return newfname

128

131

129

132

130

def rst2simplehtml(fname):

133

def rst2simplehtml(fname):

131

"""Convert a rst file to simplified html suitable for blogger.

134

"""Convert a rst file to simplified html suitable for blogger.

132

135

133

This just runs rst2html with certain parameters to produce really simple

136

This just runs rst2html with certain parameters to produce really simple

134

html and strips the document header, so the resulting file can be easily

137

html and strips the document header, so the resulting file can be easily

135

pasted into a blogger edit window.

138

pasted into a blogger edit window.

136

"""

139

"""

137

140

138

# This is the template for the rst2html call that produces the cleanest,

141

# This is the template for the rst2html call that produces the cleanest,

139

# simplest html I could find. This should help in making it easier to

142

# simplest html I could find. This should help in making it easier to

140

# paste into the blogspot html window, though I'm still having problems

143

# paste into the blogspot html window, though I'm still having problems

141

# with linebreaks there...

144

# with linebreaks there...

142

cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "

145

cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "

143

"--no-generator --no-datestamp --no-source-link "

146

"--no-generator --no-datestamp --no-source-link "

144

"--no-toc-backlinks --no-section-numbering "

147

"--no-toc-backlinks --no-section-numbering "

145

"--strip-comments ")

148

"--strip-comments ")

146

149

147

cmd = "%s %s" % (cmd_template, fname)

150

cmd = "%s %s" % (cmd_template, fname)

148

proc = subprocess.Popen(cmd,

151

proc = subprocess.Popen(cmd,

149

stdout=subprocess.PIPE,

152

stdout=subprocess.PIPE,

150

stderr=subprocess.PIPE,

153

stderr=subprocess.PIPE,

151

shell=True)

154

shell=True)

152

html, stderr = proc.communicate()

155

html, stderr = proc.communicate()

153

if stderr:

156

if stderr:

154

raise IOError(stderr)

157

raise IOError(stderr)

155

158

156

# Make an iterator so breaking out holds state. Our implementation of

159

# Make an iterator so breaking out holds state. Our implementation of

157

# searching for the html body below is basically a trivial little state

160

# searching for the html body below is basically a trivial little state

158

# machine, so we need this.

161

# machine, so we need this.

159

walker = iter(html.splitlines())

162

walker = iter(html.splitlines())

160

163

161

# Find start of main text, break out to then print until we find end /div.

164

# Find start of main text, break out to then print until we find end /div.

162

# This may only work if there's a real title defined so we get a 'div class'

165

# This may only work if there's a real title defined so we get a 'div class'

163

# tag, I haven't really tried.

166

# tag, I haven't really tried.

164

for line in walker:

167

for line in walker:

165

if line.startswith('<div class'):

168

if line.startswith('<div class'):

166

break

169

break

167

170

168

newfname = os.path.splitext(fname)[0] + '.html'

171

newfname = os.path.splitext(fname)[0] + '.html'

169

with open(newfname, 'w') as f:

172

with open(newfname, 'w') as f:

170

for line in walker:

173

for line in walker:

171

if line.startswith('</div>'):

174

if line.startswith('</div>'):

172

break

175

break

173

f.write(line)

176

f.write(line)

174

f.write('\n')

177

f.write('\n')

175

178

176

return newfname

179

return newfname

177

180

178

181

179

def main(fname):

182

def main(fname):

180

"""Convert a notebook to html in one step"""

183

"""Convert a notebook to html in one step"""

181

newfname = nb2rst(fname)

184

newfname = nb2rst(fname)

182

rst2simplehtml(newfname)

185

rst2simplehtml(newfname)

183

186

184

187

185

if __name__ == '__main__':

188

if __name__ == '__main__':

186

main(sys.argv[1])

189

main(sys.argv[1])

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             #!/usr/bin/env python
             """A really simple notebook to rst/html exporter.
             Usage
               ./nb2html.py file.ipynb
             Produces 'file.rst' and 'file.html', along with auto-generated figure files
             called nb_figure_NN.png.
             """
             import os
             import subprocess
             import sys
             from IPython.nbformat import current as nbformat
             from IPython.utils.text import wrap_paragraphs, indent
             # Cell converters
             def unknown_cell(cell):
                 """Default converter for cells of unknown type.
                 """
-                return [rst_directive('.. warning:: Unknown cell'),
+                return rst_directive('.. warning:: Unknown cell') + \
-                        repr(cell)]
+                  [repr(cell)]
             def markdown_cell(cell):
                 """convert a markdown cell to rst
                 Returns list."""
                 return [cell.source]
-            def rst_directive(directive, text):
+            def rst_directive(directive, text=''):
-                return [directive, '', indent(text), '']
+                out = [directive, '']
+                if text:
+                    out.extend([indent(text), ''])
+                return out
             def code_cell(cell):
                 """Convert a code cell to rst
                 Returns list."""
                 if not cell.input:
                     return []
                 lines = ['In[%s]:' % cell.prompt_number, '']
                 lines.extend(rst_directive('.. code:: python', cell.input))
                 for output in cell.outputs:
-                    conv = converters[output.output_type]
+                    conv = converters.get(output.output_type, unknown_cell)
                     lines.extend(conv(output))
                 return lines
             # Converters for parts of a cell.
             figures_counter = 1
             def out_display(output):
                 """convert display data from the output of a code cell to rst.
                 Returns list.
                 """
                 global figures_counter
                 lines = []
                 if 'png' in output:
                     fname = 'nb_figure_%s.png' % figures_counter
                     with open(fname, 'w') as f:
                         f.write(output.png.decode('base64'))
                     figures_counter += 1
                     lines.append('.. image:: %s' % fname)
                     lines.append('')
                 return lines
             def out_pyout(output):
                 """convert pyout part of a code cell to rst
                 Returns list."""
                 lines = ['Out[%s]:' % output.prompt_number, '']
                 if 'latex' in output:
                     lines.extend(rst_directive('.. math::', output.latex))
                 if 'text' in output:
                     lines.extend(rst_directive('.. parsed-literal::', output.text))
                 return lines
             converters = dict(code = code_cell,
                               markdown = markdown_cell,
                               pyout = out_pyout,
                               display_data = out_display,
                 )
             def convert_notebook(nb):
                 lines = []
                 for cell in nb.worksheets[0].cells:
                     conv = converters.get(cell.cell_type, unknown_cell)
                     lines.extend(conv(cell))
                     lines.append('')
                 return '\n'.join(lines)
             def nb2rst(fname):
                 "Convert notebook to rst"
                 with open(fname) as f:
                     nb = nbformat.read(f, 'json')
                 rst = convert_notebook(nb)
                 newfname = os.path.splitext(fname)[0] + '.rst'
                 with open(newfname, 'w') as f:
                     f.write(rst.encode('utf8'))
                 return newfname
             def rst2simplehtml(fname):
                 """Convert a rst file to simplified html suitable for blogger.
                 This just runs rst2html with certain parameters to produce really simple
                 html and strips the document header, so the resulting file can be easily
                 pasted into a blogger edit window.
                 """
                 # This is the template for the rst2html call that produces the cleanest,
                 # simplest html I could find.  This should help in making it easier to
                 # paste into the blogspot html window, though I'm still having problems
                 # with linebreaks there...
                 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
                                 "--no-generator --no-datestamp --no-source-link "
                                 "--no-toc-backlinks --no-section-numbering "
                                 "--strip-comments ")
                 cmd = "%s %s" % (cmd_template, fname)
                 proc = subprocess.Popen(cmd,
                                         stdout=subprocess.PIPE,
                                         stderr=subprocess.PIPE,
                                         shell=True)
                 html, stderr = proc.communicate()
                 if stderr:
                     raise IOError(stderr)
                 # Make an iterator so breaking out holds state.  Our implementation of
                 # searching for the html body below is basically a trivial little state
                 # machine, so we need this.
                 walker = iter(html.splitlines())
                 # Find start of main text, break out to then print until we find end /div.
                 # This may only work if there's a real title defined so we get a 'div class'
                 # tag, I haven't really tried.
                 for line in walker:
                     if line.startswith('<div class'):
                         break
                 newfname = os.path.splitext(fname)[0] + '.html'
                 with open(newfname, 'w') as f:
                     for line in walker:
                         if line.startswith('</div>'):
                             break
                         f.write(line)
                         f.write('\n')
                 return newfname
             def main(fname):
                 """Convert a notebook to html in one step"""
                 newfname = nb2rst(fname)
                 rst2simplehtml(newfname)
             if __name__ == '__main__':
                 main(sys.argv[1])