upstream/ipython Commit - r6257:5a47132f

1

#!/usr/bin/env python

1

#!/usr/bin/env python

2

"""A really simple notebook to rst/html exporter.

2

"""A really simple notebook to rst/html exporter.

3

4

Usage

4

Usage

5

6

./nb2html.py file.ipynb

6

./nb2html.py file.ipynb

7

8

Produces 'file.rst' and 'file.html', along with auto-generated figure files

8

Produces 'file.rst' and 'file.html', along with auto-generated figure files

9

called nb_figure_NN.png.

9

called nb_figure_NN.png.

10

11

"""

11

"""

12

13

import os

13

import os

14

import subprocess

14

import subprocess

15

import sys

15

import sys

16

17

from IPython.nbformat import current as nbformat

17

from IPython.nbformat import current as nbformat

18

from IPython.utils.text import ~~wrap_paragraphs~~, indent

18

from IPython.utils.text import indent

19

20

21

# Cell converters

21

# Cell converters

22

23

def unknown_cell(cell):

23

def unknown_cell(cell):

24

"""Default converter for cells of unknown type.

24

"""Default converter for cells of unknown type.

25

"""

25

"""

26

27

return rst_directive('.. warning:: Unknown cell') + \

27

return rst_directive('.. warning:: Unknown cell') + \

28

[repr(cell)]

28

[repr(cell)]

29

30

31

def rst_directive(directive, text=''):

31

def rst_directive(directive, text=''):

32

out = [directive, '']

32

out = [directive, '']

33

if text:

33

if text:

34

out.extend([indent(text), ''])

34

out.extend([indent(text), ''])

35

return out

35

return out

36

37

# Converters for parts of a cell.

37

# Converters for parts of a cell.

38

39

class ConversionException(Exception):

39

class ConversionException(Exception):

40

pass

40

pass

41

42

43

class Converter(object):

43

class Converter(object):

44

default_encoding = 'utf-8'

44

default_encoding = 'utf-8'

45

46

def __init__(self, fname):

46

def __init__(self, fname):

47

self.fname = fname

47

self.fname = fname

48

self.dirpath = os.path.dirname(fname)

48

self.dirpath = os.path.dirname(fname)

49

50

@property

50

@property

51

def extension(self):

51

def extension(self):

52

raise ConversionException("""extension must be defined in Converter

52

raise ConversionException("""extension must be defined in Converter

53

subclass""")

53

subclass""")

54

55

def dispatch(self, cell_type):

55

def dispatch(self, cell_type):

56

"""return cell_type dependent render method, for example render_code

56

"""return cell_type dependent render method, for example render_code

57

"""

57

"""

58

return getattr(self, 'render_' + cell_type, unknown_cell)

58

return getattr(self, 'render_' + cell_type, unknown_cell)

59

60

def convert(self):

60

def convert(self):

61

lines = []

61

lines = []

62

for cell in self.nb.worksheets[0].cells:

62

for cell in self.nb.worksheets[0].cells:

63

conv_fn = self.dispatch(cell.cell_type)

63

conv_fn = self.dispatch(cell.cell_type)

64

lines.extend(conv_fn(cell))

64

lines.extend(conv_fn(cell))

65

lines.append('')

65

lines.append('')

66

return '\n'.join(lines)

66

return '\n'.join(lines)

67

68

def render(self):

68

def render(self):

69

"read, convert, and save self.fname"

69

"read, convert, and save self.fname"

70

self.read()

70

self.read()

71

self.output = self.convert()

71

self.output = self.convert()

72

return self.save()

72

return self.save()

73

74

def read(self):

74

def read(self):

75

"read and parse notebook into NotebookNode called self.nb"

75

"read and parse notebook into NotebookNode called self.nb"

76

with open(self.fname) as f:

76

with open(self.fname) as f:

77

self.nb = nbformat.read(f, 'json')

77

self.nb = nbformat.read(f, 'json')

78

79

def save(self, fname=None, encoding=None):

79

def save(self, fname=None, encoding=None):

80

"read and parse notebook into self.nb"

80

"read and parse notebook into self.nb"

81

if fname is None:

81

if fname is None:

82

fname = os.path.splitext(self.fname)[0] + '.' + self.extension

82

fname = os.path.splitext(self.fname)[0] + '.' + self.extension

83

if encoding is None:

83

if encoding is None:

84

encoding = self.default_encoding

84

encoding = self.default_encoding

85

with open(fname, 'w') as f:

85

with open(fname, 'w') as f:

86

f.write(self.output.encode(encoding))

86

f.write(self.output.encode(encoding))

87

return fname

87

return fname

88

89

def render_heading(self, cell):

89

def render_heading(self, cell):

90

raise NotImplementedError

90

raise NotImplementedError

91

92

def render_code(self, cell):

92

def render_code(self, cell):

93

raise NotImplementedError

93

raise NotImplementedError

94

95

def render_markdown(self, cell):

95

def render_markdown(self, cell):

96

raise NotImplementedError

96

raise NotImplementedError

97

98

def render_pyout(self, cell):

98

def render_pyout(self, cell):

99

raise NotImplementedError

99

raise NotImplementedError

100

101

def render_display_data(self, cell):

101

def render_display_data(self, cell):

102

raise NotImplementedError

102

raise NotImplementedError

103

104

def render_stream(self, cell):

104

def render_stream(self, cell):

105

raise NotImplementedError

105

raise NotImplementedError

106

107

108

class ConverterRST(Converter):

108

class ConverterRST(Converter):

109

extension = 'rst'

109

extension = 'rst'

110

figures_counter = 0

110

figures_counter = 0

111

112

def render_heading(self, cell):

112

def render_heading(self, cell):

113

"""convert a heading cell to rst

113

"""convert a heading cell to rst

114

115

Returns list."""

115

Returns list."""

116

heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}

116

heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}

117

marker = heading_level[cell.level]

117

marker = heading_level[cell.level]

118

return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]

118

return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]

119

120

def render_code(self, cell):

120

def render_code(self, cell):

121

"""Convert a code cell to rst

121

"""Convert a code cell to rst

122

123

Returns list."""

123

Returns list."""

124

125

if not cell.input:

125

if not cell.input:

126

return []

126

return []

127

128

lines = ['In[%s]:' % cell.prompt_number, '']

128

lines = ['In[%s]:' % cell.prompt_number, '']

129

lines.extend(rst_directive('.. code:: python', cell.input))

129

lines.extend(rst_directive('.. code:: python', cell.input))

130

131

for output in cell.outputs:

131

for output in cell.outputs:

132

conv_fn = self.dispatch(output.output_type)

132

conv_fn = self.dispatch(output.output_type)

133

lines.extend(conv_fn(output))

133

lines.extend(conv_fn(output))

134

135

return lines

135

return lines

136

137

def render_markdown(self, cell):

137

def render_markdown(self, cell):

138

"""convert a markdown cell to rst

138

"""convert a markdown cell to rst

139

140

Returns list."""

140

Returns list."""

141

return [cell.source]

141

return [cell.source]

142

143

def render_plaintext(self, cell):

143

def render_plaintext(self, cell):

144

"""convert plain text to rst

144

"""convert plain text to rst

145

146

Returns list."""

146

Returns list."""

147

return [cell.source]

147

return [cell.source]

148

149

def render_pyout(self, output):

149

def render_pyout(self, output):

150

"""convert pyout part of a code cell to rst

150

"""convert pyout part of a code cell to rst

151

152

Returns list."""

152

Returns list."""

153

154

lines = ['Out[%s]:' % output.prompt_number, '']

154

lines = ['Out[%s]:' % output.prompt_number, '']

155

156

# output is a dictionary like object with type as a key

156

# output is a dictionary like object with type as a key

157

if 'latex' in output:

157

if 'latex' in output:

158

lines.extend(rst_directive('.. math::', output.latex))

158

lines.extend(rst_directive('.. math::', output.latex))

159

160

if 'text' in output:

160

if 'text' in output:

161

lines.extend(rst_directive('.. parsed-literal::', output.text))

161

lines.extend(rst_directive('.. parsed-literal::', output.text))

162

163

return lines

163

return lines

164

165

def render_display_data(self, output):

165

def render_display_data(self, output):

166

"""convert display data from the output of a code cell to rst.

166

"""convert display data from the output of a code cell to rst.

167

168

Returns list.

168

Returns list.

169

"""

169

"""

170

lines = []

170

lines = []

171

172

if 'png' in output:

172

if 'png' in output:

173

fname = 'nb_figure_%s.png' % self.figures_counter

173

fname = 'nb_figure_%s.png' % self.figures_counter

174

fullname = os.path.join(self.dirpath, fname)

174

fullname = os.path.join(self.dirpath, fname)

175

with open(fullname, 'w') as f:

175

with open(fullname, 'w') as f:

176

f.write(output.png.decode('base64'))

176

f.write(output.png.decode('base64'))

177

178

self.figures_counter += 1

178

self.figures_counter += 1

179

lines.append('.. image:: %s' % fname)

179

lines.append('.. image:: %s' % fname)

180

lines.append('')

180

lines.append('')

181

182

return lines

182

return lines

183

184

def render_stream(self, output):

184

def render_stream(self, output):

185

"""convert stream part of a code cell to rst

185

"""convert stream part of a code cell to rst

186

187

Returns list."""

187

Returns list."""

188

189

lines = []

189

lines = []

190

191

if 'text' in output:

191

if 'text' in output:

192

lines.extend(rst_directive('.. parsed-literal::', output.text))

192

lines.extend(rst_directive('.. parsed-literal::', output.text))

193

194

return lines

194

return lines

195

196

197

def rst2simplehtml(fname):

197

def rst2simplehtml(fname):

198

"""Convert a rst file to simplified html suitable for blogger.

198

"""Convert a rst file to simplified html suitable for blogger.

199

200

This just runs rst2html with certain parameters to produce really simple

200

This just runs rst2html with certain parameters to produce really simple

201

html and strips the document header, so the resulting file can be easily

201

html and strips the document header, so the resulting file can be easily

202

pasted into a blogger edit window.

202

pasted into a blogger edit window.

203

"""

203

"""

204

205

# This is the template for the rst2html call that produces the cleanest,

205

# This is the template for the rst2html call that produces the cleanest,

206

# simplest html I could find. This should help in making it easier to

206

# simplest html I could find. This should help in making it easier to

207

# paste into the blogspot html window, though I'm still having problems

207

# paste into the blogspot html window, though I'm still having problems

208

# with linebreaks there...

208

# with linebreaks there...

209

cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "

209

cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "

210

"--no-generator --no-datestamp --no-source-link "

210

"--no-generator --no-datestamp --no-source-link "

211

"--no-toc-backlinks --no-section-numbering "

211

"--no-toc-backlinks --no-section-numbering "

212

"--strip-comments ")

212

"--strip-comments ")

213

214

cmd = "%s %s" % (cmd_template, fname)

214

cmd = "%s %s" % (cmd_template, fname)

215

proc = subprocess.Popen(cmd,

215

proc = subprocess.Popen(cmd,

216

stdout=subprocess.PIPE,

216

stdout=subprocess.PIPE,

217

stderr=subprocess.PIPE,

217

stderr=subprocess.PIPE,

218

shell=True)

218

shell=True)

219

html, stderr = proc.communicate()

219

html, stderr = proc.communicate()

220

if stderr:

220

if stderr:

221

raise IOError(stderr)

221

raise IOError(stderr)

222

223

# Make an iterator so breaking out holds state. Our implementation of

223

# Make an iterator so breaking out holds state. Our implementation of

224

# searching for the html body below is basically a trivial little state

224

# searching for the html body below is basically a trivial little state

225

# machine, so we need this.

225

# machine, so we need this.

226

walker = iter(html.splitlines())

226

walker = iter(html.splitlines())

227

228

# Find start of main text, break out to then print until we find end /div.

228

# Find start of main text, break out to then print until we find end /div.

229

# This may only work if there's a real title defined so we get a 'div class'

229

# This may only work if there's a real title defined so we get a 'div class'

230

# tag, I haven't really tried.

230

# tag, I haven't really tried.

231

for line in walker:

231

for line in walker:

232

if line.startswith('<body>'):

232

if line.startswith('<body>'):

233

break

233

break

234

235

newfname = os.path.splitext(fname)[0] + '.html'

235

newfname = os.path.splitext(fname)[0] + '.html'

236

with open(newfname, 'w') as f:

236

with open(newfname, 'w') as f:

237

for line in walker:

237

for line in walker:

238

if line.startswith('</body>'):

238

if line.startswith('</body>'):

239

break

239

break

240

f.write(line)

240

f.write(line)

241

f.write('\n')

241

f.write('\n')

242

243

return newfname

243

return newfname

244

245

246

def main(fname):

246

def main(fname):

247

"""Convert a notebook to html in one step"""

247

"""Convert a notebook to html in one step"""

248

converter = ConverterRST(fname)

248

converter = ConverterRST(fname)

249

converter.render()

249

converter.render()

250

251

252

if __name__ == '__main__':

252

if __name__ == '__main__':

253

main(sys.argv[1])

253

main(sys.argv[1])

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             #!/usr/bin/env python
             """A really simple notebook to rst/html exporter.
             Usage
               ./nb2html.py file.ipynb
             Produces 'file.rst' and 'file.html', along with auto-generated figure files
             called nb_figure_NN.png.
             """
             import os
             import subprocess
             import sys
             from IPython.nbformat import current as nbformat
-            from IPython.utils.text import wrap_paragraphs, indent
+            from IPython.utils.text import indent
             # Cell converters
             def unknown_cell(cell):
                 """Default converter for cells of unknown type.
                 """
                 return rst_directive('.. warning:: Unknown cell') + \
                   [repr(cell)]
             def rst_directive(directive, text=''):
                 out = [directive, '']
                 if text:
                     out.extend([indent(text), ''])
                 return out
             # Converters for parts of a cell.
             class ConversionException(Exception):
                 pass
             class Converter(object):
                 default_encoding = 'utf-8'
                 def __init__(self, fname):
                     self.fname = fname
                     self.dirpath = os.path.dirname(fname)
                 @property
                 def extension(self):
                     raise ConversionException("""extension must be defined in Converter
                             subclass""")
                 def dispatch(self, cell_type):
                     """return cell_type dependent render method,  for example render_code
                     """
                     return getattr(self, 'render_' + cell_type, unknown_cell)
                 def convert(self):
                     lines = []
                     for cell in self.nb.worksheets[0].cells:
                         conv_fn = self.dispatch(cell.cell_type)
                         lines.extend(conv_fn(cell))
                         lines.append('')
                     return '\n'.join(lines)
                 def render(self):
                     "read, convert, and save self.fname"
                     self.read()
                     self.output = self.convert()
                     return self.save()
                 def read(self):
                     "read and parse notebook into NotebookNode called self.nb"
                     with open(self.fname) as f:
                         self.nb = nbformat.read(f, 'json')
                 def save(self, fname=None, encoding=None):
                     "read and parse notebook into self.nb"
                     if fname is None:
                         fname = os.path.splitext(self.fname)[0] + '.' + self.extension
                     if encoding is None:
                         encoding = self.default_encoding
                     with open(fname, 'w') as f:
                         f.write(self.output.encode(encoding))
                     return fname
                 def render_heading(self, cell):
                     raise NotImplementedError
                 def render_code(self, cell):
                     raise NotImplementedError
                 def render_markdown(self, cell):
                     raise NotImplementedError
                 def render_pyout(self, cell):
                     raise NotImplementedError
                 def render_display_data(self, cell):
                     raise NotImplementedError
                 def render_stream(self, cell):
                     raise NotImplementedError
             class ConverterRST(Converter):
                 extension = 'rst'
                 figures_counter = 0
                 def render_heading(self, cell):
                     """convert a heading cell to rst
                     Returns list."""
                     heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}
                     marker = heading_level[cell.level]
                     return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]
                 def render_code(self, cell):
                     """Convert a code cell to rst
                     Returns list."""
                     if not cell.input:
                         return []
                     lines = ['In[%s]:' % cell.prompt_number, '']
                     lines.extend(rst_directive('.. code:: python', cell.input))
                     for output in cell.outputs:
                         conv_fn = self.dispatch(output.output_type)
                         lines.extend(conv_fn(output))
                     return lines
                 def render_markdown(self, cell):
                     """convert a markdown cell to rst
                     Returns list."""
                     return [cell.source]
                 def render_plaintext(self, cell):
                     """convert plain text to rst
                     Returns list."""
                     return [cell.source]
                 def render_pyout(self, output):
                     """convert pyout part of a code cell to rst
                     Returns list."""
                     lines = ['Out[%s]:' % output.prompt_number, '']
                     # output is a dictionary like object with type as a key
                     if 'latex' in output:
                         lines.extend(rst_directive('.. math::', output.latex))
                     if 'text' in output:
                         lines.extend(rst_directive('.. parsed-literal::', output.text))
                     return lines
                 def render_display_data(self, output):
                     """convert display data from the output of a code cell to rst.
                     Returns list.
                     """
                     lines = []
                     if 'png' in output:
                         fname = 'nb_figure_%s.png' % self.figures_counter
                         fullname = os.path.join(self.dirpath, fname)
                         with open(fullname, 'w') as f:
                             f.write(output.png.decode('base64'))
                         self.figures_counter += 1
                         lines.append('.. image:: %s' % fname)
                         lines.append('')
                     return lines
                 def render_stream(self, output):
                     """convert stream part of a code cell to rst
                     Returns list."""
                     lines = []
                     if 'text' in output:
                         lines.extend(rst_directive('.. parsed-literal::', output.text))
                     return lines
             def rst2simplehtml(fname):
                 """Convert a rst file to simplified html suitable for blogger.
                 This just runs rst2html with certain parameters to produce really simple
                 html and strips the document header, so the resulting file can be easily
                 pasted into a blogger edit window.
                 """
                 # This is the template for the rst2html call that produces the cleanest,
                 # simplest html I could find.  This should help in making it easier to
                 # paste into the blogspot html window, though I'm still having problems
                 # with linebreaks there...
                 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
                                 "--no-generator --no-datestamp --no-source-link "
                                 "--no-toc-backlinks --no-section-numbering "
                                 "--strip-comments ")
                 cmd = "%s %s" % (cmd_template, fname)
                 proc = subprocess.Popen(cmd,
                                         stdout=subprocess.PIPE,
                                         stderr=subprocess.PIPE,
                                         shell=True)
                 html, stderr = proc.communicate()
                 if stderr:
                     raise IOError(stderr)
                 # Make an iterator so breaking out holds state.  Our implementation of
                 # searching for the html body below is basically a trivial little state
                 # machine, so we need this.
                 walker = iter(html.splitlines())
                 # Find start of main text, break out to then print until we find end /div.
                 # This may only work if there's a real title defined so we get a 'div class'
                 # tag, I haven't really tried.
                 for line in walker:
                     if line.startswith('<body>'):
                         break
                 newfname = os.path.splitext(fname)[0] + '.html'
                 with open(newfname, 'w') as f:
                     for line in walker:
                         if line.startswith('</body>'):
                             break
                         f.write(line)
                         f.write('\n')
                 return newfname
             def main(fname):
                 """Convert a notebook to html in one step"""
                 converter = ConverterRST(fname)
                 converter.render()
             if __name__ == '__main__':
                 main(sys.argv[1])