upstream/ipython Commit - r6228:3225948d

1

#!/usr/bin/env python

1

#!/usr/bin/env python

2

"""A really simple notebook to rst/html exporter.

2

"""A really simple notebook to rst/html exporter.

3

4

Usage

4

Usage

5

6

./nb2html.py file.ipynb

6

./nb2html.py file.ipynb

7

8

Produces 'file.rst' and 'file.html', along with auto-generated figure files

8

Produces 'file.rst' and 'file.html', along with auto-generated figure files

9

called nb_figure_NN.png.

9

called nb_figure_NN.png.

10

11

"""

11

"""

12

13

import os

13

import os

14

import subprocess

14

import subprocess

15

import sys

15

import sys

16

17

from IPython.nbformat import current as nbformat

17

from IPython.nbformat import current as nbformat

18

from IPython.utils.text import wrap_paragraphs, indent

18

from IPython.utils.text import wrap_paragraphs, indent

19

20

21

# Cell converters

21

# Cell converters

22

23

def unknown_cell(cell):

23

def unknown_cell(cell):

24

"""Default converter for cells of unknown type.

24

"""Default converter for cells of unknown type.

25

"""

25

"""

26

27

return rst_directive('.. warning:: Unknown cell') + \

27

return rst_directive('.. warning:: Unknown cell') + \

28

[repr(cell)]

28

[repr(cell)]

29

30

def heading_cell(cell):

31

"""convert a heading cell to rst

32

33

Returns list."""

34

heading_level = {1:'=', 2:'-', 3:'`', 4:'\'', 5:'.',6:'~'}

35

marker = heading_level[cell.level]

36

return ['{0}\n{1}\n'.format(cell.source, marker*len(cell.source))]

37

30

def markdown_cell(cell):

38

def markdown_cell(cell):

31

"""convert a markdown cell to rst

39

"""convert a markdown cell to rst

32

40

33

Returns list."""

41

Returns list."""

34

return [cell.source]

42

return [cell.source]

35

43

36

44

37

def rst_directive(directive, text=''):

45

def rst_directive(directive, text=''):

38

out = [directive, '']

46

out = [directive, '']

39

if text:

47

if text:

40

out.extend([indent(text), ''])

48

out.extend([indent(text), ''])

41

return out

49

return out

42

50

43

def code_cell(cell):

51

def code_cell(cell):

44

"""Convert a code cell to rst

52

"""Convert a code cell to rst

45

53

46

Returns list."""

54

Returns list."""

47

55

48

if not cell.input:

56

if not cell.input:

49

return []

57

return []

50

58

51

lines = ['In[%s]:' % cell.prompt_number, '']

59

lines = ['In[%s]:' % cell.prompt_number, '']

52

lines.extend(rst_directive('.. code:: python', cell.input))

60

lines.extend(rst_directive('.. code:: python', cell.input))

53

61

54

for output in cell.outputs:

62

for output in cell.outputs:

55

conv = converters.get(output.output_type, unknown_cell)

63

conv = converters.get(output.output_type, unknown_cell)

56

lines.extend(conv(output))

64

lines.extend(conv(output))

57

65

58

return lines

66

return lines

59

67

60

# Converters for parts of a cell.

68

# Converters for parts of a cell.

61

figures_counter = 1

69

figures_counter = 1

62

70

63

def out_display(output):

71

def out_display(output):

64

"""convert display data from the output of a code cell to rst.

72

"""convert display data from the output of a code cell to rst.

65

73

66

Returns list.

74

Returns list.

67

"""

75

"""

68

global figures_counter

76

global figures_counter

69

77

70

lines = []

78

lines = []

71

79

72

if 'png' in output:

80

if 'png' in output:

73

fname = 'nb_figure_%s.png' % figures_counter

81

fname = 'nb_figure_%s.png' % figures_counter

74

with open(fname, 'w') as f:

82

with open(fname, 'w') as f:

75

f.write(output.png.decode('base64'))

83

f.write(output.png.decode('base64'))

76

84

77

figures_counter += 1

85

figures_counter += 1

78

lines.append('.. image:: %s' % fname)

86

lines.append('.. image:: %s' % fname)

79

lines.append('')

87

lines.append('')

80

88

81

return lines

89

return lines

82

90

83

91

84

def out_pyout(output):

92

def out_pyout(output):

85

"""convert pyout part of a code cell to rst

93

"""convert pyout part of a code cell to rst

86

94

87

Returns list."""

95

Returns list."""

88

96

89

lines = ['Out[%s]:' % output.prompt_number, '']

97

lines = ['Out[%s]:' % output.prompt_number, '']

90

98

91

if 'latex' in output:

99

if 'latex' in output:

92

lines.extend(rst_directive('.. math::', output.latex))

100

lines.extend(rst_directive('.. math::', output.latex))

93

101

94

if 'text' in output:

102

if 'text' in output:

95

lines.extend(rst_directive('.. parsed-literal::', output.text))

103

lines.extend(rst_directive('.. parsed-literal::', output.text))

96

104

97

return lines

105

return lines

98

106

99

107

100

converters = dict(~~code~~ = ~~code~~_cell,

108

converters = dict(heading = heading_cell,

109

code = code_cell,

101

markdown = markdown_cell,

110

markdown = markdown_cell,

102

pyout = out_pyout,

111

pyout = out_pyout,

103

display_data = out_display,

112

display_data = out_display,

104

)

113

)

105

114

106

115

107

116

108

def convert_notebook(nb):

117

def convert_notebook(nb):

109

lines = []

118

lines = []

110

for cell in nb.worksheets[0].cells:

119

for cell in nb.worksheets[0].cells:

111

conv = converters.get(cell.cell_type, unknown_cell)

120

conv = converters.get(cell.cell_type, unknown_cell)

112

lines.extend(conv(cell))

121

lines.extend(conv(cell))

113

lines.append('')

122

lines.append('')

114

123

115

return '\n'.join(lines)

124

return '\n'.join(lines)

116

125

117

126

118

def nb2rst(fname):

127

def nb2rst(fname):

119

"Convert notebook to rst"

128

"Convert notebook to rst"

120

129

121

with open(fname) as f:

130

with open(fname) as f:

122

nb = nbformat.read(f, 'json')

131

nb = nbformat.read(f, 'json')

123

132

124

rst = convert_notebook(nb)

133

rst = convert_notebook(nb)

125

134

126

newfname = os.path.splitext(fname)[0] + '.rst'

135

newfname = os.path.splitext(fname)[0] + '.rst'

127

with open(newfname, 'w') as f:

136

with open(newfname, 'w') as f:

128

f.write(rst.encode('utf8'))

137

f.write(rst.encode('utf8'))

129

138

130

return newfname

139

return newfname

131

140

132

141

133

def rst2simplehtml(fname):

142

def rst2simplehtml(fname):

134

"""Convert a rst file to simplified html suitable for blogger.

143

"""Convert a rst file to simplified html suitable for blogger.

135

144

136

This just runs rst2html with certain parameters to produce really simple

145

This just runs rst2html with certain parameters to produce really simple

137

html and strips the document header, so the resulting file can be easily

146

html and strips the document header, so the resulting file can be easily

138

pasted into a blogger edit window.

147

pasted into a blogger edit window.

139

"""

148

"""

140

149

141

# This is the template for the rst2html call that produces the cleanest,

150

# This is the template for the rst2html call that produces the cleanest,

142

# simplest html I could find. This should help in making it easier to

151

# simplest html I could find. This should help in making it easier to

143

# paste into the blogspot html window, though I'm still having problems

152

# paste into the blogspot html window, though I'm still having problems

144

# with linebreaks there...

153

# with linebreaks there...

145

cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "

154

cmd_template = ("rst2html.py --link-stylesheet --no-xml-declaration "

146

"--no-generator --no-datestamp --no-source-link "

155

"--no-generator --no-datestamp --no-source-link "

147

"--no-toc-backlinks --no-section-numbering "

156

"--no-toc-backlinks --no-section-numbering "

148

"--strip-comments ")

157

"--strip-comments ")

149

158

150

cmd = "%s %s" % (cmd_template, fname)

159

cmd = "%s %s" % (cmd_template, fname)

151

proc = subprocess.Popen(cmd,

160

proc = subprocess.Popen(cmd,

152

stdout=subprocess.PIPE,

161

stdout=subprocess.PIPE,

153

stderr=subprocess.PIPE,

162

stderr=subprocess.PIPE,

154

shell=True)

163

shell=True)

155

html, stderr = proc.communicate()

164

html, stderr = proc.communicate()

156

if stderr:

165

if stderr:

157

raise IOError(stderr)

166

raise IOError(stderr)

158

167

159

# Make an iterator so breaking out holds state. Our implementation of

168

# Make an iterator so breaking out holds state. Our implementation of

160

# searching for the html body below is basically a trivial little state

169

# searching for the html body below is basically a trivial little state

161

# machine, so we need this.

170

# machine, so we need this.

162

walker = iter(html.splitlines())

171

walker = iter(html.splitlines())

163

172

164

# Find start of main text, break out to then print until we find end /div.

173

# Find start of main text, break out to then print until we find end /div.

165

# This may only work if there's a real title defined so we get a 'div class'

174

# This may only work if there's a real title defined so we get a 'div class'

166

# tag, I haven't really tried.

175

# tag, I haven't really tried.

167

for line in walker:

176

for line in walker:

168

if line.startswith('<~~div class~~'):

177

if line.startswith('<body>'):

169

break

178

break

170

179

171

newfname = os.path.splitext(fname)[0] + '.html'

180

newfname = os.path.splitext(fname)[0] + '.html'

172

with open(newfname, 'w') as f:

181

with open(newfname, 'w') as f:

173

for line in walker:

182

for line in walker:

174

if line.startswith('</~~div~~>'):

183

if line.startswith('</body>'):

175

break

184

break

176

f.write(line)

185

f.write(line)

177

f.write('\n')

186

f.write('\n')

178

187

179

return newfname

188

return newfname

180

189

181

190

182

def main(fname):

191

def main(fname):

183

"""Convert a notebook to html in one step"""

192

"""Convert a notebook to html in one step"""

184

newfname = nb2rst(fname)

193

newfname = nb2rst(fname)

185

rst2simplehtml(newfname)

194

rst2simplehtml(newfname)

186

195

187

196

188

if __name__ == '__main__':

197

if __name__ == '__main__':

189

main(sys.argv[1])

198

main(sys.argv[1])

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             #!/usr/bin/env python
             """A really simple notebook to rst/html exporter.
             Usage
               ./nb2html.py file.ipynb
             Produces 'file.rst' and 'file.html', along with auto-generated figure files
             called nb_figure_NN.png.
             """
             import os
             import subprocess
             import sys
             from IPython.nbformat import current as nbformat
             from IPython.utils.text import wrap_paragraphs, indent
             # Cell converters
             def unknown_cell(cell):
                 """Default converter for cells of unknown type.
                 """
                 return rst_directive('.. warning:: Unknown cell') + \
                   [repr(cell)]
+            def heading_cell(cell):
+                """convert a heading cell to rst
+                Returns list."""
+                heading_level = {1:'=', 2:'-', 3:'`', 4:'\'', 5:'.',6:'~'}
+                marker = heading_level[cell.level]
+                return ['{0}\n{1}\n'.format(cell.source, marker*len(cell.source))]
             def markdown_cell(cell):
                 """convert a markdown cell to rst
                 Returns list."""
                 return [cell.source]
             def rst_directive(directive, text=''):
                 out = [directive, '']
                 if text:
                     out.extend([indent(text), ''])
                 return out
             def code_cell(cell):
                 """Convert a code cell to rst
                 Returns list."""
                 if not cell.input:
                     return []
                 lines = ['In[%s]:' % cell.prompt_number, '']
                 lines.extend(rst_directive('.. code:: python', cell.input))
                 for output in cell.outputs:
                     conv = converters.get(output.output_type, unknown_cell)
                     lines.extend(conv(output))
                 return lines
             # Converters for parts of a cell.
             figures_counter = 1
             def out_display(output):
                 """convert display data from the output of a code cell to rst.
                 Returns list.
                 """
                 global figures_counter
                 lines = []
                 if 'png' in output:
                     fname = 'nb_figure_%s.png' % figures_counter
                     with open(fname, 'w') as f:
                         f.write(output.png.decode('base64'))
                     figures_counter += 1
                     lines.append('.. image:: %s' % fname)
                     lines.append('')
                 return lines
             def out_pyout(output):
                 """convert pyout part of a code cell to rst
                 Returns list."""
                 lines = ['Out[%s]:' % output.prompt_number, '']
                 if 'latex' in output:
                     lines.extend(rst_directive('.. math::', output.latex))
                 if 'text' in output:
                     lines.extend(rst_directive('.. parsed-literal::', output.text))
                 return lines
-            converters = dict(code = code_cell,
+            converters = dict(heading = heading_cell,
+                              code = code_cell,
                               markdown = markdown_cell,
                               pyout = out_pyout,
                               display_data = out_display,
                 )
             def convert_notebook(nb):
                 lines = []
                 for cell in nb.worksheets[0].cells:
                     conv = converters.get(cell.cell_type, unknown_cell)
                     lines.extend(conv(cell))
                     lines.append('')
                 return '\n'.join(lines)
             def nb2rst(fname):
                 "Convert notebook to rst"
                 with open(fname) as f:
                     nb = nbformat.read(f, 'json')
                 rst = convert_notebook(nb)
                 newfname = os.path.splitext(fname)[0] + '.rst'
                 with open(newfname, 'w') as f:
                     f.write(rst.encode('utf8'))
                 return newfname
             def rst2simplehtml(fname):
                 """Convert a rst file to simplified html suitable for blogger.
                 This just runs rst2html with certain parameters to produce really simple
                 html and strips the document header, so the resulting file can be easily
                 pasted into a blogger edit window.
                 """
                 # This is the template for the rst2html call that produces the cleanest,
                 # simplest html I could find.  This should help in making it easier to
                 # paste into the blogspot html window, though I'm still having problems
                 # with linebreaks there...
-                cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
+                cmd_template = ("rst2html.py --link-stylesheet --no-xml-declaration "
                                 "--no-generator --no-datestamp --no-source-link "
                                 "--no-toc-backlinks --no-section-numbering "
                                 "--strip-comments ")
                 cmd = "%s %s" % (cmd_template, fname)
                 proc = subprocess.Popen(cmd,
                                         stdout=subprocess.PIPE,
                                         stderr=subprocess.PIPE,
                                         shell=True)
                 html, stderr = proc.communicate()
                 if stderr:
                     raise IOError(stderr)
                 # Make an iterator so breaking out holds state.  Our implementation of
                 # searching for the html body below is basically a trivial little state
                 # machine, so we need this.
                 walker = iter(html.splitlines())
                 # Find start of main text, break out to then print until we find end /div.
                 # This may only work if there's a real title defined so we get a 'div class'
                 # tag, I haven't really tried.
                 for line in walker:
-                    if line.startswith('<div class'):
+                    if line.startswith('<body>'):
                         break
                 newfname = os.path.splitext(fname)[0] + '.html'
                 with open(newfname, 'w') as f:
                     for line in walker:
-                        if line.startswith('</div>'):
+                        if line.startswith('</body>'):
                             break
                         f.write(line)
                         f.write('\n')
                 return newfname
             def main(fname):
                 """Convert a notebook to html in one step"""
                 newfname = nb2rst(fname)
                 rst2simplehtml(newfname)
             if __name__ == '__main__':
                 main(sys.argv[1])