upstream/ipython Commit - r6253:68196bb0

1

#!/usr/bin/env python

1

#!/usr/bin/env python

2

"""A really simple notebook to rst/html exporter.

2

"""A really simple notebook to rst/html exporter.

3

4

Usage

4

Usage

5

6

./nb2html.py file.ipynb

6

./nb2html.py file.ipynb

7

8

Produces 'file.rst' and 'file.html', along with auto-generated figure files

8

Produces 'file.rst' and 'file.html', along with auto-generated figure files

9

called nb_figure_NN.png.

9

called nb_figure_NN.png.

10

11

"""

11

"""

12

13

import os

13

import os

14

import subprocess

14

import subprocess

15

import sys

15

import sys

16

17

from IPython.nbformat import current as nbformat

17

from IPython.nbformat import current as nbformat

18

from IPython.utils.text import wrap_paragraphs, indent

18

from IPython.utils.text import wrap_paragraphs, indent

19

20

21

# Cell converters

21

# Cell converters

22

23

def unknown_cell(cell):

23

def unknown_cell(cell):

24

"""Default converter for cells of unknown type.

24

"""Default converter for cells of unknown type.

25

"""

25

"""

26

27

return rst_directive('.. warning:: Unknown cell') + \

27

return rst_directive('.. warning:: Unknown cell') + \

28

[repr(cell)]

28

[repr(cell)]

29

30

31

def rst_directive(directive, text=''):

31

def rst_directive(directive, text=''):

32

out = [directive, '']

32

out = [directive, '']

33

if text:

33

if text:

34

out.extend([indent(text), ''])

34

out.extend([indent(text), ''])

35

return out

35

return out

36

37

# Converters for parts of a cell.

37

# Converters for parts of a cell.

38

figures_counter = 1

38

figures_counter = 1

39

40

class ConversionException(Exception):

41

class ConversionException(Exception):

41

pass

42

pass

42

43

44

43

class Converter(object):

45

class Converter(object):

44

default_encoding = 'utf-8'

46

default_encoding = 'utf-8'

47

45

def __init__(self, fname):

48

def __init__(self, fname):

46

self.fname = fname

49

self.fname = fname

47

50

48

@property

51

@property

49

def extension(self):

52

def extension(self):

50

raise ConversionException("""extension must be defined in Converter

53

raise ConversionException("""extension must be defined in Converter

51

subclass""")

54

subclass""")

52

55

53

def dispatch(self,cell_type):

56

def dispatch(self, cell_type):

54

"""return cell_type dependent render method, for example render_code

57

"""return cell_type dependent render method, for example render_code

55

"""

58

"""

56

return getattr(self, 'render_'+cell_type, unknown_cell)

59

return getattr(self, 'render_' + cell_type, unknown_cell)

57

60

58

def convert(self):

61

def convert(self):

59

lines = []

62

lines = []

60

for cell in self.nb.worksheets[0].cells:

63

for cell in self.nb.worksheets[0].cells:

61

conv_fn = self.dispatch(cell.cell_type)

64

conv_fn = self.dispatch(cell.cell_type)

62

lines.extend(conv_fn(cell))

65

lines.extend(conv_fn(cell))

63

lines.append('')

66

lines.append('')

64

return '\n'.join(lines)

67

return '\n'.join(lines)

65

68

66

def render(self):

69

def render(self):

67

"read, convert, and save self.fname"

70

"read, convert, and save self.fname"

68

self.read()

71

self.read()

69

self.output = self.convert()

72

self.output = self.convert()

70

return self.save()

73

return self.save()

71

74

72

def read(self):

75

def read(self):

73

"read and parse notebook into NotebookNode called self.nb"

76

"read and parse notebook into NotebookNode called self.nb"

74

with open(self.fname) as f:

77

with open(self.fname) as f:

75

self.nb = nbformat.read(f, 'json')

78

self.nb = nbformat.read(f, 'json')

76

79

77

def save(self,fname=None, encoding=None):

80

def save(self, fname=None, encoding=None):

78

"read and parse notebook into self.nb"

81

"read and parse notebook into self.nb"

79

if fname is None:

82

if fname is None:

80

fname = os.path.splitext(self.fname)[0] + '.' + self.extension

83

fname = os.path.splitext(self.fname)[0] + '.' + self.extension

81

if encoding is None:

84

if encoding is None:

82

encoding = self.default_encoding

85

encoding = self.default_encoding

83

with open(fname, 'w') as f:

86

with open(fname, 'w') as f:

84

f.write(self.output.encode(encoding))

87

f.write(self.output.encode(encoding))

85

return fname

88

return fname

86

89

87

def render_heading(self,cell):

90

def render_heading(self, cell):

88

raise NotImplementedError

91

raise NotImplementedError

92

93

def render_code(self, cell):

94

raise NotImplementedError

89

95

90

def render_~~code~~(self,cell):

96

def render_markdown(self, cell):

91

raise NotImplementedError

97

raise NotImplementedError

92

98

93

def render_~~markdown~~(self,cell):

99

def render_pyout(self, cell):

94

raise NotImplementedError

100

raise NotImplementedError

95

101

96

def render_~~pyout~~(self,cell):

102

def render_display_data(self, cell):

97

raise NotImplementedError

103

raise NotImplementedError

98

104

99

def render_dis~~play_da~~ta(self,cell):

105

def render_stream(self, cell):

100

raise NotImplementedError

106

raise NotImplementedError

101

107

102

def render_stream(self,cell):

103

raise NotImplementedError

104

108

105

class ConverterRST(Converter):

109

class ConverterRST(Converter):

106

extension = 'rst'

110

extension = 'rst'

107

def render_heading(self,cell):

111

112

def render_heading(self, cell):

108

"""convert a heading cell to rst

113

"""convert a heading cell to rst

109

114

110

Returns list."""

115

Returns list."""

111

heading_level = {1:'=', 2:'-', 3:'`', 4:'\'', 5:'.',6:'~'}

116

heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}

112

marker = heading_level[cell.level]

117

marker = heading_level[cell.level]

113

return ['{0}\n{1}\n'.format(cell.source, marker*len(cell.source))]

118

return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]

114

119

115

def render_code(self,cell):

120

def render_code(self, cell):

116

"""Convert a code cell to rst

121

"""Convert a code cell to rst

117

122

118

Returns list."""

123

Returns list."""

119

124

120

if not cell.input:

125

if not cell.input:

121

return []

126

return []

122

127

123

lines = ['In[%s]:' % cell.prompt_number, '']

128

lines = ['In[%s]:' % cell.prompt_number, '']

124

lines.extend(rst_directive('.. code:: python', cell.input))

129

lines.extend(rst_directive('.. code:: python', cell.input))

125

130

126

for output in cell.outputs:

131

for output in cell.outputs:

127

conv_fn = self.dispatch(output.output_type)

132

conv_fn = self.dispatch(output.output_type)

128

lines.extend(conv_fn(output))

133

lines.extend(conv_fn(output))

129

134

130

return lines

135

return lines

131

136

132

def render_markdown(self,cell):

137

def render_markdown(self, cell):

133

"""convert a markdown cell to rst

138

"""convert a markdown cell to rst

134

139

135

Returns list."""

140

Returns list."""

136

return [cell.source]

141

return [cell.source]

137

142

138

def render_pyout(self,output):

143

def render_pyout(self, output):

139

"""convert pyout part of a code cell to rst

144

"""convert pyout part of a code cell to rst

140

145

141

Returns list."""

146

Returns list."""

142

147

143

lines = ['Out[%s]:' % output.prompt_number, '']

148

lines = ['Out[%s]:' % output.prompt_number, '']

144

149

145

# output is a dictionary like object with type as a key

150

# output is a dictionary like object with type as a key

146

if 'latex' in output:

151

if 'latex' in output:

147

lines.extend(rst_directive('.. math::', output.latex))

152

lines.extend(rst_directive('.. math::', output.latex))

148

153

149

if 'text' in output:

154

if 'text' in output:

150

lines.extend(rst_directive('.. parsed-literal::', output.text))

155

lines.extend(rst_directive('.. parsed-literal::', output.text))

151

156

152

return lines

157

return lines

153

158

154

def render_display_data(self,output):

159

def render_display_data(self, output):

155

"""convert display data from the output of a code cell to rst.

160

"""convert display data from the output of a code cell to rst.

156

161

157

Returns list.

162

Returns list.

158

"""

163

"""

159

global figures_counter

164

global figures_counter

160

165

161

lines = []

166

lines = []

162

167

163

if 'png' in output:

168

if 'png' in output:

164

fname = 'nb_figure_%s.png' % figures_counter

169

fname = 'nb_figure_%s.png' % figures_counter

165

with open(fname, 'w') as f:

170

with open(fname, 'w') as f:

166

f.write(output.png.decode('base64'))

171

f.write(output.png.decode('base64'))

167

172

168

figures_counter += 1

173

figures_counter += 1

169

lines.append('.. image:: %s' % fname)

174

lines.append('.. image:: %s' % fname)

170

lines.append('')

175

lines.append('')

171

176

172

return lines

177

return lines

173

178

174

def render_stream(self,output):

179

def render_stream(self, output):

175

"""convert stream part of a code cell to rst

180

"""convert stream part of a code cell to rst

176

181

177

Returns list."""

182

Returns list."""

178

183

179

lines = []

184

lines = []

180

185

181

if 'text' in output:

186

if 'text' in output:

182

lines.extend(rst_directive('.. parsed-literal::', output.text))

187

lines.extend(rst_directive('.. parsed-literal::', output.text))

183

188

184

return lines

189

return lines

185

190

191

186

def rst2simplehtml(fname):

192

def rst2simplehtml(fname):

187

"""Convert a rst file to simplified html suitable for blogger.

193

"""Convert a rst file to simplified html suitable for blogger.

188

194

189

This just runs rst2html with certain parameters to produce really simple

195

This just runs rst2html with certain parameters to produce really simple

190

html and strips the document header, so the resulting file can be easily

196

html and strips the document header, so the resulting file can be easily

191

pasted into a blogger edit window.

197

pasted into a blogger edit window.

192

"""

198

"""

193

199

194

# This is the template for the rst2html call that produces the cleanest,

200

# This is the template for the rst2html call that produces the cleanest,

195

# simplest html I could find. This should help in making it easier to

201

# simplest html I could find. This should help in making it easier to

196

# paste into the blogspot html window, though I'm still having problems

202

# paste into the blogspot html window, though I'm still having problems

197

# with linebreaks there...

203

# with linebreaks there...

198

cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "

204

cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "

199

"--no-generator --no-datestamp --no-source-link "

205

"--no-generator --no-datestamp --no-source-link "

200

"--no-toc-backlinks --no-section-numbering "

206

"--no-toc-backlinks --no-section-numbering "

201

"--strip-comments ")

207

"--strip-comments ")

202

208

203

cmd = "%s %s" % (cmd_template, fname)

209

cmd = "%s %s" % (cmd_template, fname)

204

proc = subprocess.Popen(cmd,

210

proc = subprocess.Popen(cmd,

205

stdout=subprocess.PIPE,

211

stdout=subprocess.PIPE,

206

stderr=subprocess.PIPE,

212

stderr=subprocess.PIPE,

207

shell=True)

213

shell=True)

208

html, stderr = proc.communicate()

214

html, stderr = proc.communicate()

209

if stderr:

215

if stderr:

210

raise IOError(stderr)

216

raise IOError(stderr)

211

217

212

# Make an iterator so breaking out holds state. Our implementation of

218

# Make an iterator so breaking out holds state. Our implementation of

213

# searching for the html body below is basically a trivial little state

219

# searching for the html body below is basically a trivial little state

214

# machine, so we need this.

220

# machine, so we need this.

215

walker = iter(html.splitlines())

221

walker = iter(html.splitlines())

216

222

217

# Find start of main text, break out to then print until we find end /div.

223

# Find start of main text, break out to then print until we find end /div.

218

# This may only work if there's a real title defined so we get a 'div class'

224

# This may only work if there's a real title defined so we get a 'div class'

219

# tag, I haven't really tried.

225

# tag, I haven't really tried.

220

for line in walker:

226

for line in walker:

221

if line.startswith('<body>'):

227

if line.startswith('<body>'):

222

break

228

break

223

229

224

newfname = os.path.splitext(fname)[0] + '.html'

230

newfname = os.path.splitext(fname)[0] + '.html'

225

with open(newfname, 'w') as f:

231

with open(newfname, 'w') as f:

226

for line in walker:

232

for line in walker:

227

if line.startswith('</body>'):

233

if line.startswith('</body>'):

228

break

234

break

229

f.write(line)

235

f.write(line)

230

f.write('\n')

236

f.write('\n')

231

237

232

return newfname

238

return newfname

233

239

234

240

235

def main(fname):

241

def main(fname):

236

"""Convert a notebook to html in one step"""

242

"""Convert a notebook to html in one step"""

237

converter = ConverterRST(fname)

243

converter = ConverterRST(fname)

238

converter.render()

244

converter.render()

239

245

240

246

241

if __name__ == '__main__':

247

if __name__ == '__main__':

242

main(sys.argv[1])

248

main(sys.argv[1])

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             #!/usr/bin/env python
             """A really simple notebook to rst/html exporter.
             Usage
               ./nb2html.py file.ipynb
             Produces 'file.rst' and 'file.html', along with auto-generated figure files
             called nb_figure_NN.png.
             """
             import os
             import subprocess
             import sys
             from IPython.nbformat import current as nbformat
             from IPython.utils.text import wrap_paragraphs, indent
             # Cell converters
             def unknown_cell(cell):
                 """Default converter for cells of unknown type.
                 """
                 return rst_directive('.. warning:: Unknown cell') + \
                   [repr(cell)]
             def rst_directive(directive, text=''):
                 out = [directive, '']
                 if text:
                     out.extend([indent(text), ''])
                 return out
             # Converters for parts of a cell.
             figures_counter = 1
             class ConversionException(Exception):
                 pass
             class Converter(object):
                 default_encoding = 'utf-8'
                 def __init__(self, fname):
                     self.fname = fname
                 @property
                 def extension(self):
                     raise ConversionException("""extension must be defined in Converter
                             subclass""")
-                def dispatch(self,cell_type):
+                def dispatch(self, cell_type):
                     """return cell_type dependent render method,  for example render_code
                     """
-                    return getattr(self, 'render_'+cell_type, unknown_cell)
+                    return getattr(self, 'render_' + cell_type, unknown_cell)
                 def convert(self):
                     lines = []
                     for cell in self.nb.worksheets[0].cells:
                         conv_fn = self.dispatch(cell.cell_type)
                         lines.extend(conv_fn(cell))
                         lines.append('')
                     return '\n'.join(lines)
                 def render(self):
                     "read, convert, and save self.fname"
                     self.read()
                     self.output = self.convert()
                     return self.save()
                 def read(self):
                     "read and parse notebook into NotebookNode called self.nb"
                     with open(self.fname) as f:
                         self.nb = nbformat.read(f, 'json')
-                def save(self,fname=None, encoding=None):
+                def save(self, fname=None, encoding=None):
                     "read and parse notebook into self.nb"
                     if fname is None:
                         fname = os.path.splitext(self.fname)[0] + '.' + self.extension
                     if encoding is None:
                         encoding = self.default_encoding
                     with open(fname, 'w') as f:
                         f.write(self.output.encode(encoding))
                     return fname
-                def render_heading(self,cell):
+                def render_heading(self, cell):
-                     raise NotImplementedError
+                    raise NotImplementedError
+                def render_code(self, cell):
+                    raise NotImplementedError
-                def render_code(self,cell):
+                def render_markdown(self, cell):
-                     raise NotImplementedError
+                    raise NotImplementedError
-                def render_markdown(self,cell):
+                def render_pyout(self, cell):
-                     raise NotImplementedError
+                    raise NotImplementedError
-                def render_pyout(self,cell):
+                def render_display_data(self, cell):
-                     raise NotImplementedError
+                    raise NotImplementedError
-                def render_display_data(self,cell):
+                def render_stream(self, cell):
-                     raise NotImplementedError
+                    raise NotImplementedError
-                def render_stream(self,cell):
-                     raise NotImplementedError
             class ConverterRST(Converter):
                 extension = 'rst'
-                def render_heading(self,cell):
+                def render_heading(self, cell):
                     """convert a heading cell to rst
                     Returns list."""
-                    heading_level = {1:'=', 2:'-', 3:'`', 4:'\'', 5:'.',6:'~'}
+                    heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}
                     marker = heading_level[cell.level]
-                    return ['{0}\n{1}\n'.format(cell.source, marker*len(cell.source))]
+                    return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]
-                def render_code(self,cell):
+                def render_code(self, cell):
                     """Convert a code cell to rst
                     Returns list."""
                     if not cell.input:
                         return []
                     lines = ['In[%s]:' % cell.prompt_number, '']
                     lines.extend(rst_directive('.. code:: python', cell.input))
                     for output in cell.outputs:
                         conv_fn = self.dispatch(output.output_type)
                         lines.extend(conv_fn(output))
                     return lines
-                def render_markdown(self,cell):
+                def render_markdown(self, cell):
                     """convert a markdown cell to rst
                     Returns list."""
                     return [cell.source]
-                def render_pyout(self,output):
+                def render_pyout(self, output):
                     """convert pyout part of a code cell to rst
                     Returns list."""
                     lines = ['Out[%s]:' % output.prompt_number, '']
                     # output is a dictionary like object with type as a key
                     if 'latex' in output:
                         lines.extend(rst_directive('.. math::', output.latex))
                     if 'text' in output:
                         lines.extend(rst_directive('.. parsed-literal::', output.text))
                     return lines
-                def render_display_data(self,output):
+                def render_display_data(self, output):
                     """convert display data from the output of a code cell to rst.
                     Returns list.
                     """
                     global figures_counter
                     lines = []
                     if 'png' in output:
                         fname = 'nb_figure_%s.png' % figures_counter
                         with open(fname, 'w') as f:
                             f.write(output.png.decode('base64'))
                         figures_counter += 1
                         lines.append('.. image:: %s' % fname)
                         lines.append('')
                     return lines
-                def render_stream(self,output):
+                def render_stream(self, output):
                     """convert stream part of a code cell to rst
                     Returns list."""
                     lines = []
                     if 'text' in output:
                         lines.extend(rst_directive('.. parsed-literal::', output.text))
                     return lines
             def rst2simplehtml(fname):
                 """Convert a rst file to simplified html suitable for blogger.
                 This just runs rst2html with certain parameters to produce really simple
                 html and strips the document header, so the resulting file can be easily
                 pasted into a blogger edit window.
                 """
                 # This is the template for the rst2html call that produces the cleanest,
                 # simplest html I could find.  This should help in making it easier to
                 # paste into the blogspot html window, though I'm still having problems
                 # with linebreaks there...
                 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
                                 "--no-generator --no-datestamp --no-source-link "
                                 "--no-toc-backlinks --no-section-numbering "
                                 "--strip-comments ")
                 cmd = "%s %s" % (cmd_template, fname)
                 proc = subprocess.Popen(cmd,
                                         stdout=subprocess.PIPE,
                                         stderr=subprocess.PIPE,
                                         shell=True)
                 html, stderr = proc.communicate()
                 if stderr:
                     raise IOError(stderr)
                 # Make an iterator so breaking out holds state.  Our implementation of
                 # searching for the html body below is basically a trivial little state
                 # machine, so we need this.
                 walker = iter(html.splitlines())
                 # Find start of main text, break out to then print until we find end /div.
                 # This may only work if there's a real title defined so we get a 'div class'
                 # tag, I haven't really tried.
                 for line in walker:
                     if line.startswith('<body>'):
                         break
                 newfname = os.path.splitext(fname)[0] + '.html'
                 with open(newfname, 'w') as f:
                     for line in walker:
                         if line.startswith('</body>'):
                             break
                         f.write(line)
                         f.write('\n')
                 return newfname
             def main(fname):
                 """Convert a notebook to html in one step"""
                 converter = ConverterRST(fname)
                 converter.render()
             if __name__ == '__main__':
                 main(sys.argv[1])