upstream/ipython Commit - r6672:925d7baf

1

#!/usr/bin/env python

1

#!/usr/bin/env python

2

"""Convert IPython notebooks to other formats, such as ReST, and HTML.

2

"""Convert IPython notebooks to other formats, such as ReST, and HTML.

3

4

Example:

4

Example:

5

./nbconvert.py --format html file.ipynb

5

./nbconvert.py --format html file.ipynb

6

7

Produces 'file.rst' and 'file.html', along with auto-generated figure files

7

Produces 'file.rst' and 'file.html', along with auto-generated figure files

8

called nb_figure_NN.png. To avoid the two-step process, ipynb -> rst -> html,

8

called nb_figure_NN.png. To avoid the two-step process, ipynb -> rst -> html,

9

use '--format quick-html' which will do ipynb -> html, but won't look as

9

use '--format quick-html' which will do ipynb -> html, but won't look as

10

pretty.

10

pretty.

11

"""

11

"""

12

from __future__ import print_function

12

from __future__ import print_function

13

14

import codecs

14

import os

15

import os

15

import pprint

16

import pprint

16

import re

17

import re

17

import subprocess

18

import subprocess

18

import sys

19

import sys

19

20

from IPython.external import argparse

21

from IPython.external import argparse

21

from IPython.nbformat import current as nbformat

22

from IPython.nbformat import current as nbformat

22

from IPython.utils.text import indent

23

from IPython.utils.text import indent

23

from decorators import DocInherit

24

from decorators import DocInherit

24

25

def remove_ansi(src):

26

def remove_ansi(src):

26

"""Strip all ANSI color escape sequences from input string.

27

"""Strip all ANSI color escape sequences from input string.

27

28

Parameters

29

Parameters

29

----------

30

----------

30

src : string

31

src : string

31

32

Returns

33

Returns

33

-------

34

-------

34

string

35

string

35

"""

36

"""

36

return re.sub(r'\033\[(0|\d;\d\d)m', '', src)

37

return re.sub(r'\033\[(0|\d;\d\d)m', '', src)

37

38

# Pandoc-dependent code

39

# Pandoc-dependent code

39

def markdown2latex(src):

40

def markdown2latex(src):

40

"""Convert a markdown string to LaTeX via pandoc.

41

"""Convert a markdown string to LaTeX via pandoc.

41

42

This function will raise an error if pandoc is not installed.

43

This function will raise an error if pandoc is not installed.

43

44

Any error messages generated by pandoc are printed to stderr.

45

Any error messages generated by pandoc are printed to stderr.

45

46

Parameters

47

Parameters

47

----------

48

----------

48

src : string

49

src : string

49

Input string, assumed to be valid markdown.

50

Input string, assumed to be valid markdown.

50

51

Returns

52

Returns

52

-------

53

-------

53

out : string

54

out : string

54

Output as returned by pandoc.

55

Output as returned by pandoc.

55

"""

56

"""

56

p = subprocess.Popen('pandoc -f markdown -t latex'.split(),

57

p = subprocess.Popen('pandoc -f markdown -t latex'.split(),

57

stdin=subprocess.PIPE, stdout=subprocess.PIPE)

58

stdin=subprocess.PIPE, stdout=subprocess.PIPE)

58

out, err = p.communicate(src)

59

out, err = p.communicate(src)

59

if err:

60

if err:

60

print(err, file=sys.stderr)

61

print(err, file=sys.stderr)

61

#print('*'*20+'\n', out, '\n'+'*'*20) # dbg

62

#print('*'*20+'\n', out, '\n'+'*'*20) # dbg

62

return out

63

return out

63

64

# Cell converters

65

# Cell converters

65

66

67

def rst_directive(directive, text=''):

68

def rst_directive(directive, text=''):

68

out = [directive, '']

69

out = [directive, '']

69

if text:

70

if text:

70

out.extend([indent(text), ''])

71

out.extend([indent(text), ''])

71

return out

72

return out

72

73

# Converters for parts of a cell.

74

# Converters for parts of a cell.

74

75

76

class ConversionException(Exception):

77

class ConversionException(Exception):

77

pass

78

pass

78

79

80

class Converter(object):

81

class Converter(object):

81

default_encoding = 'utf-8'

82

default_encoding = 'utf-8'

83

extension = str()

82

figures_counter = 0

84

figures_counter = 0

83

85

infile = str()

86

infile_dir = str()

87

infile_root = str()

88

files_dir = str()

89

84

def __init__(self, infile):

90

def __init__(self, infile):

85

self.infile = infile

91

self.infile = infile

86

self.dir~~path~~ = os.path.dirname(infile)

92

self.infile_dir = os.path.dirname(infile)

87

93

infile_root = os.path.splitext(infile)[0]

88

@property

94

files_dir = infile_root + '_files'

89

def extension(self):

95

if not os.path.isdir(files_dir):

90

raise ConversionException("""extension must be defined in Converter

96

os.mkdir(files_dir)

91

subclass""")

97

self.infile_root = infile_root

98

self.files_dir = files_dir

92

99

93

def dispatch(self, cell_type):

100

def dispatch(self, cell_type):

94

"""return cell_type dependent render method, for example render_code

101

"""return cell_type dependent render method, for example render_code

95

"""

102

"""

96

return getattr(self, 'render_' + cell_type, self.render_unknown)

103

return getattr(self, 'render_' + cell_type, self.render_unknown)

97

104

98

def convert(self):

105

def convert(self):

99

lines = []

106

lines = []

100

lines.extend(self.optional_header())

107

lines.extend(self.optional_header())

101

for worksheet in self.nb.worksheets:

108

for worksheet in self.nb.worksheets:

102

for cell in worksheet.cells:

109

for cell in worksheet.cells:

103

conv_fn = self.dispatch(cell.cell_type)

110

conv_fn = self.dispatch(cell.cell_type)

104

lines.extend(conv_fn(cell))

111

lines.extend(conv_fn(cell))

105

lines.append('')

112

lines.append('')

106

lines.extend(self.optional_footer())

113

lines.extend(self.optional_footer())

107

return '\n'.join(lines)

114

return '\n'.join(lines)

108

115

109

def render(self):

116

def render(self):

110

"read, convert, and save self.infile"

117

"read, convert, and save self.infile"

111

self.read()

118

self.read()

112

self.output = self.convert()

119

self.output = self.convert()

113

return self.save()

120

return self.save()

114

121

115

def read(self):

122

def read(self):

116

"read and parse notebook into NotebookNode called self.nb"

123

"read and parse notebook into NotebookNode called self.nb"

117

with open(self.infile) as f:

124

with open(self.infile) as f:

118

self.nb = nbformat.read(f, 'json')

125

self.nb = nbformat.read(f, 'json')

119

126

120

def save(self, infile=None, encoding=None):

127

def save(self, infile=None, encoding=None):

121

"read and parse notebook into self.nb"

128

"read and parse notebook into self.nb"

122

if infile is None:

129

if infile is None:

123

infile = os.path.splitext(self.infile)[0] + '.' + self.extension

130

infile = os.path.splitext(self.infile)[0] + '.' + self.extension

124

if encoding is None:

131

if encoding is None:

125

encoding = self.default_encoding

132

encoding = self.default_encoding

126

with open(infile, 'w') as f:

133

with open(infile, 'w') as f:

127

f.write(self.output.encode(encoding))

134

f.write(self.output.encode(encoding))

128

return infile

135

return infile

129

136

130

def optional_header(self):

137

def optional_header(self):

131

return []

138

return []

132

139

133

def optional_footer(self):

140

def optional_footer(self):

134

return []

141

return []

135

142

136

def _new_figure(self, data, format):

143

def _new_figure(self, data, fmt):

137

basename = self.infile.replace('.ipynb', '')

144

"""Create a new figure file in the given format.

138

figname = '%s_fig_%02i.%s' % (basename, self.figures_counter, format)

145

146

Returns a path relative to the input file.

147

"""

148

figname = '%s_fig_%02i.%s' % (self.infile_root,

149

self.figures_counter, fmt)

139

self.figures_counter += 1

150

self.figures_counter += 1

140

fullname = os.path.join(self.dir~~path~~, figname)

151

fullname = os.path.join(self.files_dir, figname)

141

with open(fullname, 'w') as f:

152

142

f.write(data.decode('base64'))

153

# Binary files are base64-encoded, SVG is already XML

143

return figname

154

if fmt in ('png', 'jpg', 'pdf'):

155

data = data.decode('base64')

156

fopen = lambda fname: open(fname, 'wb')

157

else:

158

fopen = lambda fname: codecs.open(fname, 'wb', self.default_encoding)

159

160

with fopen(fullname) as f:

161

f.write(data)

162

163

return fullname

144

164

145

def render_heading(self, cell):

165

def render_heading(self, cell):

146

"""convert a heading cell

166

"""convert a heading cell

147

167

148

Returns list."""

168

Returns list."""

149

raise NotImplementedError

169

raise NotImplementedError

150

170

151

def render_code(self, cell):

171

def render_code(self, cell):

152

"""Convert a code cell

172

"""Convert a code cell

153

173

154

Returns list."""

174

Returns list."""

155

raise NotImplementedError

175

raise NotImplementedError

156

176

157

def render_markdown(self, cell):

177

def render_markdown(self, cell):

158

"""convert a markdown cell

178

"""convert a markdown cell

159

179

160

Returns list."""

180

Returns list."""

161

raise NotImplementedError

181

raise NotImplementedError

162

182

163

def render_pyout(self, output):

183

def render_pyout(self, output):

164

"""convert pyout part of a code cell

184

"""convert pyout part of a code cell

165

185

166

Returns list."""

186

Returns list."""

167

raise NotImplementedError

187

raise NotImplementedError

168

188

169

189

170

def render_pyerr(self, output):

190

def render_pyerr(self, output):

171

"""convert pyerr part of a code cell

191

"""convert pyerr part of a code cell

172

192

173

Returns list."""

193

Returns list."""

174

raise NotImplementedError

194

raise NotImplementedError

175

195

196

def _img_lines(self, img_file):

197

"""Return list of lines to include an image file."""

198

# Note: subclasses may choose to implement format-specific _FMT_lines

199

# methods if they so choose (FMT in {png, svg, jpg, pdf}).

200

raise NotImplementedError

201

176

def render_display_data(self, output):

202

def render_display_data(self, output):

177

"""convert display data from the output of a code cell

203

"""convert display data from the output of a code cell

178

204

179

Returns list.

205

Returns list.

180

"""

206

"""

181

raise NotImplementedError

207

lines = []

208

209

for fmt in ['png', 'svg', 'jpg', 'pdf']:

210

if fmt in output:

211

img_file = self._new_figure(output[fmt], fmt)

212

# Subclasses can have format-specific render functions (e.g.,

213

# latex has to auto-convert all SVG to PDF first).

214

lines_fun = getattr(self, '_%s_lines' % fmt, None)

215

if not lines_fun:

216

lines_fun = self._img_lines

217

lines.extend(lines_fun(img_file))

218

219

return lines

182

220

183

def render_stream(self, cell):

221

def render_stream(self, cell):

184

"""convert stream part of a code cell

222

"""convert stream part of a code cell

185

223

186

Returns list."""

224

Returns list."""

187

raise NotImplementedError

225

raise NotImplementedError

188

226

189

def render_plaintext(self, cell):

227

def render_plaintext(self, cell):

190

"""convert plain text

228

"""convert plain text

191

229

192

Returns list."""

230

Returns list."""

193

raise NotImplementedError

231

raise NotImplementedError

194

232

195

def render_unknown(self, cell):

233

def render_unknown(self, cell):

196

"""Render cells of unkown type

234

"""Render cells of unkown type

197

235

198

Returns list."""

236

Returns list."""

199

raise NotImplementedError

237

raise NotImplementedError

200

238

201

239

202

class ConverterRST(Converter):

240

class ConverterRST(Converter):

203

extension = 'rst'

241

extension = 'rst'

204

heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}

242

heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}

205

243

206

@DocInherit

244

@DocInherit

207

def render_heading(self, cell):

245

def render_heading(self, cell):

208

marker = self.heading_level[cell.level]

246

marker = self.heading_level[cell.level]

209

return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]

247

return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]

210

248

211

@DocInherit

249

@DocInherit

212

def render_code(self, cell):

250

def render_code(self, cell):

213

if not cell.input:

251

if not cell.input:

214

return []

252

return []

215

253

216

lines = ['In[%s]:' % cell.prompt_number, '']

254

lines = ['In[%s]:' % cell.prompt_number, '']

217

lines.extend(rst_directive('.. code:: python', cell.input))

255

lines.extend(rst_directive('.. code:: python', cell.input))

218

256

219

for output in cell.outputs:

257

for output in cell.outputs:

220

conv_fn = self.dispatch(output.output_type)

258

conv_fn = self.dispatch(output.output_type)

221

lines.extend(conv_fn(output))

259

lines.extend(conv_fn(output))

222

260

223

return lines

261

return lines

224

262

225

@DocInherit

263

@DocInherit

226

def render_markdown(self, cell):

264

def render_markdown(self, cell):

227

return [cell.source]

265

return [cell.source]

228

266

229

@DocInherit

267

@DocInherit

230

def render_plaintext(self, cell):

268

def render_plaintext(self, cell):

231

return [cell.source]

269

return [cell.source]

232

270

233

@DocInherit

271

@DocInherit

234

def render_pyout(self, output):

272

def render_pyout(self, output):

235

lines = ['Out[%s]:' % output.prompt_number, '']

273

lines = ['Out[%s]:' % output.prompt_number, '']

236

274

237

# output is a dictionary like object with type as a key

275

# output is a dictionary like object with type as a key

238

if 'latex' in output:

276

if 'latex' in output:

239

lines.extend(rst_directive('.. math::', output.latex))

277

lines.extend(rst_directive('.. math::', output.latex))

240

278

241

if 'text' in output:

279

if 'text' in output:

242

lines.extend(rst_directive('.. parsed-literal::', output.text))

280

lines.extend(rst_directive('.. parsed-literal::', output.text))

243

281

244

return lines

282

return lines

245

283

246

@DocInherit

284

@DocInherit

247

def render_display_data(self, output):

285

def _img_lines(self, img_file):

248

lines = []

286

return ['.. image:: %s' % figfile, '']

249

287

250

if 'png' in output:

251

figfile = self._new_figure(output.png, 'png')

252

lines.append('.. image:: %s' % figfile)

253

lines.append('')

254

255

return lines

256

257

@DocInherit

288

@DocInherit

258

def render_stream(self, output):

289

def render_stream(self, output):

259

lines = []

290

lines = []

260

291

261

if 'text' in output:

292

if 'text' in output:

262

lines.extend(rst_directive('.. parsed-literal::', output.text))

293

lines.extend(rst_directive('.. parsed-literal::', output.text))

263

294

264

return lines

295

return lines

265

296

266

@DocInherit

297

@DocInherit

267

def render_unknown(self, cell):

298

def render_unknown(self, cell):

268

return rst_directive('.. warning:: Unknown cell') + [repr(cell)]

299

return rst_directive('.. warning:: Unknown cell') + [repr(cell)]

269

300

270

class ConverterQuickHTML(Converter):

301

class ConverterQuickHTML(Converter):

271

extension = 'html'

302

extension = 'html'

272

303

273

def optional_header(self):

304

def optional_header(self):

274

# XXX: inject the IPython standard CSS into here

305

# XXX: inject the IPython standard CSS into here

275

s = """<html>

306

s = """<html>

276

<head>

307

<head>

277

</head>

308

</head>

278

309

279

<body>

310

<body>

280

"""

311

"""

281

return s.splitlines()

312

return s.splitlines()

282

313

283

def optional_footer(self):

314

def optional_footer(self):

284

s = """</body>

315

s = """</body>

285

</html>

316

</html>

286

"""

317

"""

287

return s.splitlines()

318

return s.splitlines()

288

319

289

@DocInherit

320

@DocInherit

290

def render_heading(self, cell):

321

def render_heading(self, cell):

291

marker = cell.level

322

marker = cell.level

292

return ['<h{1}>\n {0}\n</h{1}>'.format(cell.source, marker)]

323

return ['<h{1}>\n {0}\n</h{1}>'.format(cell.source, marker)]

293

324

294

@DocInherit

325

@DocInherit

295

def render_code(self, cell):

326

def render_code(self, cell):

296

if not cell.input:

327

if not cell.input:

297

return []

328

return []

298

329

299

lines = ['<table>']

330

lines = ['<table>']

300

lines.append('<tr><td><tt>In [<b>%s</b>]:</tt></td><td><tt>' % cell.prompt_number)

331

lines.append('<tr><td><tt>In [<b>%s</b>]:</tt></td><td><tt>' % cell.prompt_number)

301

lines.append("<br>\n".join(cell.input.splitlines()))

332

lines.append("<br>\n".join(cell.input.splitlines()))

302

lines.append('</tt></td></tr>')

333

lines.append('</tt></td></tr>')

303

334

304

for output in cell.outputs:

335

for output in cell.outputs:

305

lines.append('<tr><td></td><td>')

336

lines.append('<tr><td></td><td>')

306

conv_fn = self.dispatch(output.output_type)

337

conv_fn = self.dispatch(output.output_type)

307

lines.extend(conv_fn(output))

338

lines.extend(conv_fn(output))

308

lines.append('</td></tr>')

339

lines.append('</td></tr>')

309

340

310

lines.append('</table>')

341

lines.append('</table>')

311

return lines

342

return lines

312

343

313

@DocInherit

344

@DocInherit

314

def render_markdown(self, cell):

345

def render_markdown(self, cell):

315

return ["<pre>"+cell.source+"</pre>"]

346

return ["<pre>"+cell.source+"</pre>"]

316

347

317

@DocInherit

348

@DocInherit

318

def render_plaintext(self, cell):

349

def render_plaintext(self, cell):

319

return ["<pre>"+cell.source+"</pre>"]

350

return ["<pre>"+cell.source+"</pre>"]

320

351

321

@DocInherit

352

@DocInherit

322

def render_pyout(self, output):

353

def render_pyout(self, output):

323

lines = ['<tr><td><tt>Out[<b>%s</b>]:</tt></td></tr>' % output.prompt_number, '<td>']

354

lines = ['<tr><td><tt>Out[<b>%s</b>]:</tt></td></tr>' % output.prompt_number, '<td>']

324

355

325

# output is a dictionary like object with type as a key

356

# output is a dictionary like object with type as a key

326

if 'latex' in output:

357

if 'latex' in output:

327

lines.append("<pre>")

358

lines.append("<pre>")

328

lines.extend(indent(output.latex))

359

lines.extend(indent(output.latex))

329

lines.append("</pre>")

360

lines.append("</pre>")

330

361

331

if 'text' in output:

362

if 'text' in output:

332

lines.append("<pre>")

363

lines.append("<pre>")

333

lines.extend(indent(output.text))

364

lines.extend(indent(output.text))

334

lines.append("</pre>")

365

lines.append("</pre>")

335

366

336

return lines

367

return lines

337

368

338

@DocInherit

369

@DocInherit

339

def render_display_data(self, output):

370

def _img_lines(self, img_file):

340

lines = []

371

return ['<img src="%s">' % img_file, '']

341

342

if 'png' in output:

343

infile = 'nb_figure_%s.png' % self.figures_counter

344

fullname = os.path.join(self.dirpath, infile)

345

with open(fullname, 'w') as f:

346

f.write(output.png.decode('base64'))

347

348

self.figures_counter += 1

349

lines.append('<img src="%s">' % infile)

350

lines.append('')

351

352

return lines

353

372

354

@DocInherit

373

@DocInherit

355

def render_stream(self, output):

374

def render_stream(self, output):

356

lines = []

375

lines = []

357

376

358

if 'text' in output:

377

if 'text' in output:

359

lines.append(output.text)

378

lines.append(output.text)

360

379

361

return lines

380

return lines

362

381

363

382

364

class ConverterLaTeX(Converter):

383

class ConverterLaTeX(Converter):

384

"""Converts a notebook to a .tex file suitable for pdflatex.

385

386

Note: this converter *needs*:

387

388

- `pandoc`: for all conversion of markdown cells. If your notebook only

389

has Raw cells, pandoc will not be needed.

390

391

- `inkscape`: if your notebook has SVG figures. These need to be

392

converted to PDF before inclusion in the TeX file, as LaTeX doesn't

393

understand SVG natively.

394

395

You will in general obtain much better final PDF results if you configure

396

the matplotlib backend to create SVG output with

397

398

%config InlineBackend.figure_format = 'svg'

399

400

(or set the equivalent flag at startup or in your configuration profile).

401

"""

365

extension = 'tex'

402

extension = 'tex'

403

heading_marker = {1: r'\section',

404

2: r'\subsection',

405

3: r'\subsubsection',

406

4: r'\paragraph',

407

5: r'\subparagraph',

408

6: r'\subparagraph'}

366

409

367

def env(self, environment, lines):

410

def env(self, environment, lines):

368

"""Return list of environment lines for input lines

411

"""Return list of environment lines for input lines

369

412

370

Parameters

413

Parameters

371

----------

414

----------

372

env : string

415

env : string

373

Name of the environment to bracket with begin/end.

416

Name of the environment to bracket with begin/end.

374

417

375

lines: """

418

lines: """

376

out = [r'\begin{%s}' % environment]

419

out = [r'\begin{%s}' % environment]

377

if isinstance(lines, basestring):

420

if isinstance(lines, basestring):

378

out.append(lines)

421

out.append(lines)

379

else: # list

422

else: # list

380

out.extend(lines)

423

out.extend(lines)

381

out.append(r'\end{%s}' % environment)

424

out.append(r'\end{%s}' % environment)

382

return out

425

return out

383

426

384

@DocInherit

427

@DocInherit

385

def render_heading(self, cell):

428

def render_heading(self, cell):

386

heading_marker = {1: r'\section',

429

marker = self.heading_marker[cell.level]

387

2: r'\subsection',

388

3: r'\subsubsection',

389

4: r'\paragraph',

390

5: r'\subparagraph',

391

6: r'\subparagraph'}

392

marker = heading_marker[cell.level]

393

return ['%s{%s}\n\n' % (marker, cell.source) ]

430

return ['%s{%s}\n\n' % (marker, cell.source) ]

394

431

395

@DocInherit

432

@DocInherit

396

def render_code(self, cell):

433

def render_code(self, cell):

397

if not cell.input:

434

if not cell.input:

398

return []

435

return []

399

436

400

# Cell codes first carry input code, we use lstlisting for that

437

# Cell codes first carry input code, we use lstlisting for that

401

lines = [r'\begin{codecell}']

438

lines = [r'\begin{codecell}']

402

439

403

lines.extend(self.env('codeinput',

440

lines.extend(self.env('codeinput',

404

self.env('lstlisting', cell.input)))

441

self.env('lstlisting', cell.input)))

405

442

406

outlines = []

443

outlines = []

407

for output in cell.outputs:

444

for output in cell.outputs:

408

conv_fn = self.dispatch(output.output_type)

445

conv_fn = self.dispatch(output.output_type)

409

outlines.extend(conv_fn(output))

446

outlines.extend(conv_fn(output))

410

447

411

# And then output of many possible types; use a frame for all of it.

448

# And then output of many possible types; use a frame for all of it.

412

if outlines:

449

if outlines:

413

lines.extend(self.env('codeoutput', outlines))

450

lines.extend(self.env('codeoutput', outlines))

414

451

415

lines.append(r'\end{codecell}')

452

lines.append(r'\end{codecell}')

416

453

417

return lines

454

return lines

418

455

419

@DocInherit

420

def render_display_data(self, output):

421

lines = []

422

423

if 'png' in output:

424

figfile = self._new_figure(output.png, 'png')

425

456

426

lines.extend(self.env('center',

457

@DocInherit

427

[r'\includegraphics[width=3in]{%s}' % figfile,

458

def _img_lines(self, img_file):

428

r'\par']))

459

return self.env('center',

429

return lines

460

[r'\includegraphics[width=3in]{%s}' % img_file, r'\par'])

461

462

def _svg_lines(self, img_file):

463

base_file = os.path.splitext(img_file)[0]

464

pdf_file = base_file + '.pdf'

465

subprocess.check_call(['inkscape', '--export-pdf=%s' % pdf_file,

466

img_file])

467

return self._img_lines(pdf_file)

430

468

431

@DocInherit

469

@DocInherit

432

def render_stream(self, output):

470

def render_stream(self, output):

433

lines = []

471

lines = []

434

472

435

if 'text' in output:

473

if 'text' in output:

436

lines.extend(self.env('verbatim', output.text.strip()))

474

lines.extend(self.env('verbatim', output.text.strip()))

437

475

438

return lines

476

return lines

439

477

440

@DocInherit

478

@DocInherit

441

def render_markdown(self, cell):

479

def render_markdown(self, cell):

442

return [markdown2latex(cell['source'])]

480

return [markdown2latex(cell['source'])]

443

481

444

@DocInherit

482

@DocInherit

445

def render_pyout(self, output):

483

def render_pyout(self, output):

446

lines = []

484

lines = []

447

485

448

# output is a dictionary like object with type as a key

486

# output is a dictionary like object with type as a key

449

if 'latex' in output:

487

if 'latex' in output:

450

lines.extend(output.latex)

488

lines.extend(output.latex)

451

489

452

if 'text' in output:

490

if 'text' in output:

453

lines.extend(self.env('verbatim', output.text))

491

lines.extend(self.env('verbatim', output.text))

454

492

455

return lines

493

return lines

456

494

457

@DocInherit

495

@DocInherit

458

def render_pyerr(self, output):

496

def render_pyerr(self, output):

459

# Note: a traceback is a *list* of frames.

497

# Note: a traceback is a *list* of frames.

460

return self.env('traceback',

498

return self.env('traceback',

461

self.env('verbatim',

499

self.env('verbatim',

462

remove_ansi('\n'.join(output.traceback))))

500

remove_ansi('\n'.join(output.traceback))))

463

501

464

@DocInherit

502

@DocInherit

465

def render_unknown(self, cell):

503

def render_unknown(self, cell):

466

return self.env('verbatim', pprint.pformat(cell))

504

return self.env('verbatim', pprint.pformat(cell))

467

505

468

506

469

def rst2simplehtml(infile):

507

def rst2simplehtml(infile):

470

"""Convert a rst file to simplified html suitable for blogger.

508

"""Convert a rst file to simplified html suitable for blogger.

471

509

472

This just runs rst2html with certain parameters to produce really simple

510

This just runs rst2html with certain parameters to produce really simple

473

html and strips the document header, so the resulting file can be easily

511

html and strips the document header, so the resulting file can be easily

474

pasted into a blogger edit window.

512

pasted into a blogger edit window.

475

"""

513

"""

476

514

477

# This is the template for the rst2html call that produces the cleanest,

515

# This is the template for the rst2html call that produces the cleanest,

478

# simplest html I could find. This should help in making it easier to

516

# simplest html I could find. This should help in making it easier to

479

# paste into the blogspot html window, though I'm still having problems

517

# paste into the blogspot html window, though I'm still having problems

480

# with linebreaks there...

518

# with linebreaks there...

481

cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "

519

cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "

482

"--no-generator --no-datestamp --no-source-link "

520

"--no-generator --no-datestamp --no-source-link "

483

"--no-toc-backlinks --no-section-numbering "

521

"--no-toc-backlinks --no-section-numbering "

484

"--strip-comments ")

522

"--strip-comments ")

485

523

486

cmd = "%s %s" % (cmd_template, infile)

524

cmd = "%s %s" % (cmd_template, infile)

487

proc = subprocess.Popen(cmd,

525

proc = subprocess.Popen(cmd,

488

stdout=subprocess.PIPE,

526

stdout=subprocess.PIPE,

489

stderr=subprocess.PIPE,

527

stderr=subprocess.PIPE,

490

shell=True)

528

shell=True)

491

html, stderr = proc.communicate()

529

html, stderr = proc.communicate()

492

if stderr:

530

if stderr:

493

raise IOError(stderr)

531

raise IOError(stderr)

494

532

495

# Make an iterator so breaking out holds state. Our implementation of

533

# Make an iterator so breaking out holds state. Our implementation of

496

# searching for the html body below is basically a trivial little state

534

# searching for the html body below is basically a trivial little state

497

# machine, so we need this.

535

# machine, so we need this.

498

walker = iter(html.splitlines())

536

walker = iter(html.splitlines())

499

537

500

# Find start of main text, break out to then print until we find end /div.

538

# Find start of main text, break out to then print until we find end /div.

501

# This may only work if there's a real title defined so we get a 'div class'

539

# This may only work if there's a real title defined so we get a 'div class'

502

# tag, I haven't really tried.

540

# tag, I haven't really tried.

503

for line in walker:

541

for line in walker:

504

if line.startswith('<body>'):

542

if line.startswith('<body>'):

505

break

543

break

506

544

507

newfname = os.path.splitext(infile)[0] + '.html'

545

newfname = os.path.splitext(infile)[0] + '.html'

508

with open(newfname, 'w') as f:

546

with open(newfname, 'w') as f:

509

for line in walker:

547

for line in walker:

510

if line.startswith('</body>'):

548

if line.startswith('</body>'):

511

break

549

break

512

f.write(line)

550

f.write(line)

513

f.write('\n')

551

f.write('\n')

514

552

515

return newfname

553

return newfname

516

554

517

known_formats = "rst (default), html, quick-html, latex"

555

known_formats = "rst (default), html, quick-html, latex"

518

556

519

def main(infile, format='rst'):

557

def main(infile, format='rst'):

520

"""Convert a notebook to html in one step"""

558

"""Convert a notebook to html in one step"""

521

# XXX: this is just quick and dirty for now. When adding a new format,

559

# XXX: this is just quick and dirty for now. When adding a new format,

522

# make sure to add it to the `known_formats` string above, which gets

560

# make sure to add it to the `known_formats` string above, which gets

523

# printed in in the catch-all else, as well as in the help

561

# printed in in the catch-all else, as well as in the help

524

if format == 'rst':

562

if format == 'rst':

525

converter = ConverterRST(infile)

563

converter = ConverterRST(infile)

526

converter.render()

564

converter.render()

527

elif format == 'html':

565

elif format == 'html':

528

#Currently, conversion to html is a 2 step process, nb->rst->html

566

#Currently, conversion to html is a 2 step process, nb->rst->html

529

converter = ConverterRST(infile)

567

converter = ConverterRST(infile)

530

rstfname = converter.render()

568

rstfname = converter.render()

531

rst2simplehtml(rstfname)

569

rst2simplehtml(rstfname)

532

elif format == 'quick-html':

570

elif format == 'quick-html':

533

converter = ConverterQuickHTML(infile)

571

converter = ConverterQuickHTML(infile)

534

rstfname = converter.render()

572

rstfname = converter.render()

535

elif format == 'latex':

573

elif format == 'latex':

536

converter = ConverterLaTeX(infile)

574

converter = ConverterLaTeX(infile)

537

latexfname = converter.render()

575

latexfname = converter.render()

538

else:

576

else:

539

raise SystemExit("Unknown format '%s', " % format +

577

raise SystemExit("Unknown format '%s', " % format +

540

"known formats are: " + known_formats)

578

"known formats are: " + known_formats)

541

579

542

580

543

581

544

if __name__ == '__main__':

582

if __name__ == '__main__':

545

parser = argparse.ArgumentParser(description=__doc__,

583

parser = argparse.ArgumentParser(description=__doc__,

546

formatter_class=argparse.RawTextHelpFormatter)

584

formatter_class=argparse.RawTextHelpFormatter)

547

# TODO: consider passing file like object around, rather than filenames

585

# TODO: consider passing file like object around, rather than filenames

548

# would allow us to process stdin, or even http streams

586

# would allow us to process stdin, or even http streams

549

#parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)

587

#parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)

550

588

551

#Require a filename as a positional argument

589

#Require a filename as a positional argument

552

parser.add_argument('infile', nargs=1)

590

parser.add_argument('infile', nargs=1)

553

parser.add_argument('-f', '--format', default='rst',

591

parser.add_argument('-f', '--format', default='rst',

554

help='Output format. Supported formats: \n' +

592

help='Output format. Supported formats: \n' +

555

known_formats)

593

known_formats)

556

args = parser.parse_args()

594

args = parser.parse_args()

557

main(infile=args.infile[0], format=args.format)

595

main(infile=args.infile[0], format=args.format)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             #!/usr/bin/env python
             """Convert IPython notebooks to other formats, such as ReST, and HTML.
             Example:
               ./nbconvert.py --format html file.ipynb
             Produces 'file.rst' and 'file.html', along with auto-generated figure files
             called nb_figure_NN.png. To avoid the two-step process, ipynb -> rst -> html,
             use '--format quick-html' which will do ipynb -> html, but won't look as
             pretty.
             """
             from __future__ import print_function
+            import codecs
             import os
             import pprint
             import re
             import subprocess
             import sys
             from IPython.external import argparse
             from IPython.nbformat import current as nbformat
             from IPython.utils.text import indent
             from decorators import DocInherit
             def remove_ansi(src):
                 """Strip all ANSI color escape sequences from input string.
                 Parameters
                 ----------
                 src : string
                 Returns
                 -------
                 string
                 """
                 return re.sub(r'\033\[(0|\d;\d\d)m', '', src)
             # Pandoc-dependent code
             def markdown2latex(src):
                 """Convert a markdown string to LaTeX via pandoc.
                 This function will raise an error if pandoc is not installed.
                 Any error messages generated by pandoc are printed to stderr.
                 Parameters
                 ----------
                 src : string
                   Input string, assumed to be valid markdown.
                 Returns
                 -------
                 out : string
                   Output as returned by pandoc.
                 """
                 p = subprocess.Popen('pandoc -f markdown -t latex'.split(),
                                      stdin=subprocess.PIPE, stdout=subprocess.PIPE)
                 out, err = p.communicate(src)
                 if err:
                     print(err, file=sys.stderr)
                 #print('*'*20+'\n', out, '\n'+'*'*20)  # dbg
                 return out
             # Cell converters
             def rst_directive(directive, text=''):
                 out = [directive, '']
                 if text:
                     out.extend([indent(text), ''])
                 return out
             # Converters for parts of a cell.
             class ConversionException(Exception):
                 pass
             class Converter(object):
                 default_encoding = 'utf-8'
+                extension = str()
                 figures_counter = 0
+                infile = str()
+                infile_dir = str()
+                infile_root = str()
+                files_dir = str()
                 def __init__(self, infile):
                     self.infile = infile
-                    self.dirpath = os.path.dirname(infile)
+                    self.infile_dir = os.path.dirname(infile)
+                    infile_root = os.path.splitext(infile)[0]
-                @property
+                    files_dir = infile_root + '_files'
-                def extension(self):
+                    if not os.path.isdir(files_dir):
-                    raise ConversionException("""extension must be defined in Converter
+                        os.mkdir(files_dir)
-                            subclass""")
+                    self.infile_root = infile_root
+                    self.files_dir = files_dir
                 def dispatch(self, cell_type):
                     """return cell_type dependent render method,  for example render_code
                     """
                     return getattr(self, 'render_' + cell_type, self.render_unknown)
                 def convert(self):
                     lines = []
                     lines.extend(self.optional_header())
                     for worksheet in self.nb.worksheets:
                         for cell in worksheet.cells:
                             conv_fn = self.dispatch(cell.cell_type)
                             lines.extend(conv_fn(cell))
                             lines.append('')
                     lines.extend(self.optional_footer())
                     return '\n'.join(lines)
                 def render(self):
                     "read, convert, and save self.infile"
                     self.read()
                     self.output = self.convert()
                     return self.save()
                 def read(self):
                     "read and parse notebook into NotebookNode called self.nb"
                     with open(self.infile) as f:
                         self.nb = nbformat.read(f, 'json')
                 def save(self, infile=None, encoding=None):
                     "read and parse notebook into self.nb"
                     if infile is None:
                         infile = os.path.splitext(self.infile)[0] + '.' + self.extension
                     if encoding is None:
                         encoding = self.default_encoding
                     with open(infile, 'w') as f:
                         f.write(self.output.encode(encoding))
                     return infile
                 def optional_header(self):
                     return []
                 def optional_footer(self):
                     return []
-                def _new_figure(self, data, format):
+                def _new_figure(self, data, fmt):
-                    basename = self.infile.replace('.ipynb', '')
+                    """Create a new figure file in the given format.
-                    figname = '%s_fig_%02i.%s' % (basename, self.figures_counter, format)
+                    Returns a path relative to the input file.
+                    """
+                    figname = '%s_fig_%02i.%s' % (self.infile_root,
+                                                  self.figures_counter, fmt)
                     self.figures_counter += 1
-                    fullname = os.path.join(self.dirpath, figname)
+                    fullname = os.path.join(self.files_dir, figname)
-                    with open(fullname, 'w') as f:
-                        f.write(data.decode('base64'))
+                    # Binary files are base64-encoded, SVG is already XML
-                    return figname
+                    if fmt in ('png', 'jpg', 'pdf'):
+                        data = data.decode('base64')
+                        fopen = lambda fname: open(fname, 'wb')
+                    else:
+                        fopen = lambda fname: codecs.open(fname, 'wb', self.default_encoding)
+                    with fopen(fullname) as f:
+                        f.write(data)
+                    return fullname
                 def render_heading(self, cell):
                     """convert a heading cell
                     Returns list."""
                     raise NotImplementedError
                 def render_code(self, cell):
                     """Convert a code cell
                     Returns list."""
                     raise NotImplementedError
                 def render_markdown(self, cell):
                     """convert a markdown cell
                     Returns list."""
                     raise NotImplementedError
                 def render_pyout(self, output):
                     """convert pyout part of a code cell
                     Returns list."""
                     raise NotImplementedError
                 def render_pyerr(self, output):
                     """convert pyerr part of a code cell
                     Returns list."""
                     raise NotImplementedError
+                def _img_lines(self, img_file):
+                    """Return list of lines to include an image file."""
+                    # Note: subclasses may choose to implement format-specific _FMT_lines
+                    # methods if they so choose (FMT in {png, svg, jpg, pdf}).
+                    raise NotImplementedError
                 def render_display_data(self, output):
                     """convert display data from the output of a code cell
                     Returns list.
                     """
-                    raise NotImplementedError
+                    lines = []
+                    for fmt in ['png', 'svg', 'jpg', 'pdf']:
+                        if fmt in output:
+                            img_file = self._new_figure(output[fmt], fmt)
+                            # Subclasses can have format-specific render functions (e.g.,
+                            # latex has to auto-convert all SVG to PDF first).
+                            lines_fun = getattr(self, '_%s_lines' % fmt, None)
+                            if not lines_fun:
+                                lines_fun = self._img_lines
+                            lines.extend(lines_fun(img_file))
+                    return lines
                 def render_stream(self, cell):
                     """convert stream part of a code cell
                     Returns list."""
                     raise NotImplementedError
                 def render_plaintext(self, cell):
                     """convert plain text
                     Returns list."""
                     raise NotImplementedError
                 def render_unknown(self, cell):
                     """Render cells of unkown type
                     Returns list."""
                     raise NotImplementedError
             class ConverterRST(Converter):
                 extension = 'rst'
                 heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}
                 @DocInherit
                 def render_heading(self, cell):
                     marker = self.heading_level[cell.level]
                     return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]
                 @DocInherit
                 def render_code(self, cell):
                     if not cell.input:
                         return []
                     lines = ['In[%s]:' % cell.prompt_number, '']
                     lines.extend(rst_directive('.. code:: python', cell.input))
                     for output in cell.outputs:
                         conv_fn = self.dispatch(output.output_type)
                         lines.extend(conv_fn(output))
                     return lines
                 @DocInherit
                 def render_markdown(self, cell):
                     return [cell.source]
                 @DocInherit
                 def render_plaintext(self, cell):
                     return [cell.source]
                 @DocInherit
                 def render_pyout(self, output):
                     lines = ['Out[%s]:' % output.prompt_number, '']
                     # output is a dictionary like object with type as a key
                     if 'latex' in output:
                         lines.extend(rst_directive('.. math::', output.latex))
                     if 'text' in output:
                         lines.extend(rst_directive('.. parsed-literal::', output.text))
                     return lines
                 @DocInherit
-                def render_display_data(self, output):
+                def _img_lines(self, img_file):
-                    lines = []
+                    return ['.. image:: %s' % figfile, '']
-                    if 'png' in output:
-                        figfile = self._new_figure(output.png, 'png')
-                        lines.append('.. image:: %s' % figfile)
-                        lines.append('')
-                    return lines
                 @DocInherit
                 def render_stream(self, output):
                     lines = []
                     if 'text' in output:
                         lines.extend(rst_directive('.. parsed-literal::', output.text))
                     return lines
                 @DocInherit
                 def render_unknown(self, cell):
                     return rst_directive('.. warning:: Unknown cell') + [repr(cell)]
             class ConverterQuickHTML(Converter):
                 extension = 'html'
                 def optional_header(self):
                     # XXX: inject the IPython standard CSS into here
                     s = """<html>
                     <head>
                     </head>
                     <body>
                     """
                     return s.splitlines()
                 def optional_footer(self):
                     s = """</body>
                     </html>
                     """
                     return s.splitlines()
                 @DocInherit
                 def render_heading(self, cell):
                     marker = cell.level
                     return ['<h{1}>\n  {0}\n</h{1}>'.format(cell.source, marker)]
                 @DocInherit
                 def render_code(self, cell):
                     if not cell.input:
                         return []
                     lines = ['<table>']
                     lines.append('<tr><td><tt>In [<b>%s</b>]:</tt></td><td><tt>' % cell.prompt_number)
                     lines.append("<br>\n".join(cell.input.splitlines()))
                     lines.append('</tt></td></tr>')
                     for output in cell.outputs:
                         lines.append('<tr><td></td><td>')
                         conv_fn = self.dispatch(output.output_type)
                         lines.extend(conv_fn(output))
                         lines.append('</td></tr>')
                     lines.append('</table>')
                     return lines
                 @DocInherit
                 def render_markdown(self, cell):
                     return ["<pre>"+cell.source+"</pre>"]
                 @DocInherit
                 def render_plaintext(self, cell):
                     return ["<pre>"+cell.source+"</pre>"]
                 @DocInherit
                 def render_pyout(self, output):
                     lines = ['<tr><td><tt>Out[<b>%s</b>]:</tt></td></tr>' % output.prompt_number, '<td>']
                     # output is a dictionary like object with type as a key
                     if 'latex' in output:
                         lines.append("<pre>")
                         lines.extend(indent(output.latex))
                         lines.append("</pre>")
                     if 'text' in output:
                         lines.append("<pre>")
                         lines.extend(indent(output.text))
                         lines.append("</pre>")
                     return lines
                 @DocInherit
-                def render_display_data(self, output):
+                def _img_lines(self, img_file):
-                    lines = []
+                    return ['<img src="%s">' % img_file, '']
-                    if 'png' in output:
-                        infile = 'nb_figure_%s.png' % self.figures_counter
-                        fullname = os.path.join(self.dirpath, infile)
-                        with open(fullname, 'w') as f:
-                            f.write(output.png.decode('base64'))
-                        self.figures_counter += 1
-                        lines.append('<img src="%s">' % infile)
-                        lines.append('')
-                    return lines
                 @DocInherit
                 def render_stream(self, output):
                     lines = []
                     if 'text' in output:
                         lines.append(output.text)
                     return lines
             class ConverterLaTeX(Converter):
+                """Converts a notebook to a .tex file suitable for pdflatex.
+                Note: this converter *needs*:
+                - `pandoc`: for all conversion of markdown cells.  If your notebook only
+                   has Raw cells, pandoc will not be needed.
+                -  `inkscape`: if your notebook has SVG figures.  These need to be
+                   converted to PDF before inclusion in the TeX file, as LaTeX doesn't
+                   understand SVG natively.
+                You will in general obtain much better final PDF results if you configure
+                the matplotlib backend to create SVG output with
+                %config InlineBackend.figure_format = 'svg'
+                (or set the equivalent flag at startup or in your configuration profile).
+                """
                 extension = 'tex'
+                heading_marker = {1: r'\section',
+: r'\subsection',
+: r'\subsubsection',
+: r'\paragraph',
+: r'\subparagraph',
+: r'\subparagraph'}
                 def env(self, environment, lines):
                     """Return list of environment lines for input lines
                     Parameters
                     ----------
                     env : string
                       Name of the environment to bracket with begin/end.
                     lines: """
                     out = [r'\begin{%s}' % environment]
                     if isinstance(lines, basestring):
                         out.append(lines)
                     else:  # list
                         out.extend(lines)
                     out.append(r'\end{%s}' % environment)
                     return out
                 @DocInherit
                 def render_heading(self, cell):
-                    heading_marker = {1: r'\section',
+                    marker = self.heading_marker[cell.level]
-: r'\subsection',
-: r'\subsubsection',
-: r'\paragraph',
-: r'\subparagraph',
-: r'\subparagraph'}
-                    marker = heading_marker[cell.level]
                     return ['%s{%s}\n\n' % (marker, cell.source) ]
                 @DocInherit
                 def render_code(self, cell):
                     if not cell.input:
                         return []
                     # Cell codes first carry input code, we use lstlisting for that
                     lines = [r'\begin{codecell}']
                     lines.extend(self.env('codeinput',
                                           self.env('lstlisting', cell.input)))
                     outlines = []
                     for output in cell.outputs:
                         conv_fn = self.dispatch(output.output_type)
                         outlines.extend(conv_fn(output))
                     # And then output of many possible types; use a frame for all of it.
                     if outlines:
                         lines.extend(self.env('codeoutput', outlines))
                     lines.append(r'\end{codecell}')
                     return lines
-                @DocInherit
-                def render_display_data(self, output):
-                    lines = []
-                    if 'png' in output:
-                        figfile = self._new_figure(output.png, 'png')
-                        lines.extend(self.env('center',
+                @DocInherit
-                                              [r'\includegraphics[width=3in]{%s}' % figfile,
+                def _img_lines(self, img_file):
-                                               r'\par']))
+                    return self.env('center',
-                    return lines
+                            [r'\includegraphics[width=3in]{%s}' % img_file, r'\par'])
+                def _svg_lines(self, img_file):
+                    base_file = os.path.splitext(img_file)[0]
+                    pdf_file = base_file + '.pdf'
+                    subprocess.check_call(['inkscape', '--export-pdf=%s' % pdf_file,
+                                           img_file])
+                    return self._img_lines(pdf_file)
                 @DocInherit
                 def render_stream(self, output):
                     lines = []
                     if 'text' in output:
                         lines.extend(self.env('verbatim', output.text.strip()))
                     return lines
                 @DocInherit
                 def render_markdown(self, cell):
                     return [markdown2latex(cell['source'])]
                 @DocInherit
                 def render_pyout(self, output):
                     lines = []
                     # output is a dictionary like object with type as a key
                     if 'latex' in output:
                         lines.extend(output.latex)
                     if 'text' in output:
                         lines.extend(self.env('verbatim', output.text))
                     return lines
                 @DocInherit
                 def render_pyerr(self, output):
                     # Note: a traceback is a *list* of frames.
                     return self.env('traceback',
                                     self.env('verbatim',
                                              remove_ansi('\n'.join(output.traceback))))
                 @DocInherit
                 def render_unknown(self, cell):
                     return self.env('verbatim', pprint.pformat(cell))
             def rst2simplehtml(infile):
                 """Convert a rst file to simplified html suitable for blogger.
                 This just runs rst2html with certain parameters to produce really simple
                 html and strips the document header, so the resulting file can be easily
                 pasted into a blogger edit window.
                 """
                 # This is the template for the rst2html call that produces the cleanest,
                 # simplest html I could find.  This should help in making it easier to
                 # paste into the blogspot html window, though I'm still having problems
                 # with linebreaks there...
                 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
                                 "--no-generator --no-datestamp --no-source-link "
                                 "--no-toc-backlinks --no-section-numbering "
                                 "--strip-comments ")
                 cmd = "%s %s" % (cmd_template, infile)
                 proc = subprocess.Popen(cmd,
                                         stdout=subprocess.PIPE,
                                         stderr=subprocess.PIPE,
                                         shell=True)
                 html, stderr = proc.communicate()
                 if stderr:
                     raise IOError(stderr)
                 # Make an iterator so breaking out holds state.  Our implementation of
                 # searching for the html body below is basically a trivial little state
                 # machine, so we need this.
                 walker = iter(html.splitlines())
                 # Find start of main text, break out to then print until we find end /div.
                 # This may only work if there's a real title defined so we get a 'div class'
                 # tag, I haven't really tried.
                 for line in walker:
                     if line.startswith('<body>'):
                         break
                 newfname = os.path.splitext(infile)[0] + '.html'
                 with open(newfname, 'w') as f:
                     for line in walker:
                         if line.startswith('</body>'):
                             break
                         f.write(line)
                         f.write('\n')
                 return newfname
             known_formats = "rst (default), html, quick-html, latex"
             def main(infile, format='rst'):
                 """Convert a notebook to html in one step"""
                 # XXX: this is just quick and dirty for now. When adding a new format,
                 # make sure to add it to the `known_formats` string above, which gets
                 # printed in in the catch-all else, as well as in the help
                 if format == 'rst':
                     converter = ConverterRST(infile)
                     converter.render()
                 elif format == 'html':
                     #Currently, conversion to html is a 2 step process, nb->rst->html
                     converter = ConverterRST(infile)
                     rstfname = converter.render()
                     rst2simplehtml(rstfname)
                 elif format == 'quick-html':
                     converter = ConverterQuickHTML(infile)
                     rstfname = converter.render()
                 elif format == 'latex':
                     converter = ConverterLaTeX(infile)
                     latexfname = converter.render()
                 else:
                     raise SystemExit("Unknown format '%s', " % format +
                             "known formats are: " + known_formats)
             if __name__ == '__main__':
                 parser = argparse.ArgumentParser(description=__doc__,
                         formatter_class=argparse.RawTextHelpFormatter)
                 # TODO: consider passing file like object around, rather than filenames
                 # would allow us to process stdin, or even http streams
                 #parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
                 #Require a filename as a positional argument
                 parser.add_argument('infile', nargs=1)
                 parser.add_argument('-f', '--format', default='rst',
                                     help='Output format. Supported formats: \n' +
                                     known_formats)
                 args = parser.parse_args()
                 main(infile=args.infile[0], format=args.format)