upstream/ipython Commit - r7371:ffde32c8

1

#!/usr/bin/env python

1

#!/usr/bin/env python

2

"""Convert IPython notebooks to other formats, such as ReST, and HTML.

2

"""Convert IPython notebooks to other formats, such as ReST, and HTML.

3

4

Example:

4

Example:

5

./nbconvert.py --format html file.ipynb

5

./nbconvert.py --format html file.ipynb

6

7

Produces 'file.rst' and 'file.html', along with auto-generated figure files

7

Produces 'file.rst' and 'file.html', along with auto-generated figure files

8

called nb_figure_NN.png. To avoid the two-step process, ipynb -> rst -> html,

8

called nb_figure_NN.png. To avoid the two-step process, ipynb -> rst -> html,

9

use '--format quick-html' which will do ipynb -> html, but won't look as

9

use '--format quick-html' which will do ipynb -> html, but won't look as

10

pretty.

10

pretty.

11

"""

11

"""

12

#-----------------------------------------------------------------------------

12

#-----------------------------------------------------------------------------

13

# Imports

13

# Imports

14

#-----------------------------------------------------------------------------

14

#-----------------------------------------------------------------------------

15

from __future__ import print_function

15

from __future__ import print_function

16

17

# Stdlib

17

# Stdlib

18

import codecs

18

import codecs

19

import logging

19

import logging

20

import os

20

import os

21

import pprint

21

import pprint

22

import re

22

import re

23

import subprocess

23

import subprocess

24

import sys

24

import sys

25

import json

25

import json

26

import copy

26

import copy

27

from shutil import rmtree

27

from shutil import rmtree

28

29

inkscape = 'inkscape'

29

inkscape = 'inkscape'

30

if sys.platform == 'darwin':

30

if sys.platform == 'darwin':

31

inkscape = '/Applications/Inkscape.app/Contents/Resources/bin/inkscape'

31

inkscape = '/Applications/Inkscape.app/Contents/Resources/bin/inkscape'

32

if not os.path.exists(inkscape):

32

if not os.path.exists(inkscape):

33

inkscape = None

33

inkscape = None

34

35

# From IPython

35

# From IPython

36

from IPython.external import argparse

36

from IPython.external import argparse

37

from IPython.nbformat import current as nbformat

37

from IPython.nbformat import current as nbformat

38

from IPython.utils.text import indent

38

from IPython.utils.text import indent

39

from decorators import DocInherit

39

from decorators import DocInherit

40

from IPython.nbformat.v3.nbjson import BytesEncoder

40

from IPython.nbformat.v3.nbjson import BytesEncoder

41

from IPython.utils import py3compat

41

from IPython.utils import py3compat

42

43

#-----------------------------------------------------------------------------

43

#-----------------------------------------------------------------------------

44

# Utility functions

44

# Utility functions

45

#-----------------------------------------------------------------------------

45

#-----------------------------------------------------------------------------

46

47

def DocInherit(f):

47

def DocInherit(f):

48

return f

48

return f

49

50

def remove_fake_files_url(cell):

50

def remove_fake_files_url(cell):

51

"""Remove from the cell source the /files/ pseudo-path we use.

51

"""Remove from the cell source the /files/ pseudo-path we use.

52

"""

52

"""

53

src = cell.source

53

src = cell.source

54

cell.source = src.replace('/files/', '')

54

cell.source = src.replace('/files/', '')

55

56

57

def remove_ansi(src):

57

def remove_ansi(src):

58

"""Strip all ANSI color escape sequences from input string.

58

"""Strip all ANSI color escape sequences from input string.

59

60

Parameters

60

Parameters

61

----------

61

----------

62

src : string

62

src : string

63

64

Returns

64

Returns

65

-------

65

-------

66

string

66

string

67

"""

67

"""

68

return re.sub(r'\033\[(0|\d;\d\d)m', '', src)

68

return re.sub(r'\033\[(0|\d;\d\d)m', '', src)

69

70

71

# Pandoc-dependent code

71

# Pandoc-dependent code

72

def markdown2latex(src):

72

def markdown2latex(src):

73

"""Convert a markdown string to LaTeX via pandoc.

73

"""Convert a markdown string to LaTeX via pandoc.

74

75

This function will raise an error if pandoc is not installed.

75

This function will raise an error if pandoc is not installed.

76

77

Any error messages generated by pandoc are printed to stderr.

77

Any error messages generated by pandoc are printed to stderr.

78

79

Parameters

79

Parameters

80

----------

80

----------

81

src : string

81

src : string

82

Input string, assumed to be valid markdown.

82

Input string, assumed to be valid markdown.

83

84

Returns

84

Returns

85

-------

85

-------

86

out : string

86

out : string

87

Output as returned by pandoc.

87

Output as returned by pandoc.

88

"""

88

"""

89

p = subprocess.Popen('pandoc -f markdown -t latex'.split(),

89

p = subprocess.Popen('pandoc -f markdown -t latex'.split(),

90

stdin=subprocess.PIPE, stdout=subprocess.PIPE)

90

stdin=subprocess.PIPE, stdout=subprocess.PIPE)

91

out, err = p.communicate(src.encode('utf-8'))

91

out, err = p.communicate(src.encode('utf-8'))

92

if err:

92

if err:

93

print(err, file=sys.stderr)

93

print(err, file=sys.stderr)

94

#print('*'*20+'\n', out, '\n'+'*'*20) # dbg

94

#print('*'*20+'\n', out, '\n'+'*'*20) # dbg

95

return unicode(out,'utf-8')

95

return unicode(out,'utf-8')

96

97

98

def markdown2rst(src):

98

def markdown2rst(src):

99

"""Convert a markdown string to LaTeX via pandoc.

99

"""Convert a markdown string to LaTeX via pandoc.

100

101

This function will raise an error if pandoc is not installed.

101

This function will raise an error if pandoc is not installed.

102

103

Any error messages generated by pandoc are printed to stderr.

103

Any error messages generated by pandoc are printed to stderr.

104

105

Parameters

105

Parameters

106

----------

106

----------

107

src : string

107

src : string

108

Input string, assumed to be valid markdown.

108

Input string, assumed to be valid markdown.

109

110

Returns

110

Returns

111

-------

111

-------

112

out : string

112

out : string

113

Output as returned by pandoc.

113

Output as returned by pandoc.

114

"""

114

"""

115

p = subprocess.Popen('pandoc -f markdown -t rst'.split(),

115

p = subprocess.Popen('pandoc -f markdown -t rst'.split(),

116

stdin=subprocess.PIPE, stdout=subprocess.PIPE)

116

stdin=subprocess.PIPE, stdout=subprocess.PIPE)

117

out, err = p.communicate(src.encode('utf-8'))

117

out, err = p.communicate(src.encode('utf-8'))

118

if err:

118

if err:

119

print(err, file=sys.stderr)

119

print(err, file=sys.stderr)

120

#print('*'*20+'\n', out, '\n'+'*'*20) # dbg

120

#print('*'*20+'\n', out, '\n'+'*'*20) # dbg

121

return unicode(out,'utf-8')

121

return unicode(out,'utf-8')

122

123

124

def rst_directive(directive, text=''):

124

def rst_directive(directive, text=''):

125

out = [directive, '']

125

out = [directive, '']

126

if text:

126

if text:

127

out.extend([indent(text), ''])

127

out.extend([indent(text), ''])

128

return out

128

return out

129

130

#-----------------------------------------------------------------------------

130

#-----------------------------------------------------------------------------

131

# Class declarations

131

# Class declarations

132

#-----------------------------------------------------------------------------

132

#-----------------------------------------------------------------------------

133

134

class ConversionException(Exception):

134

class ConversionException(Exception):

135

pass

135

pass

136

137

138

class Converter(object):

138

class Converter(object):

139

default_encoding = 'utf-8'

139

default_encoding = 'utf-8'

140

extension = str()

140

extension = str()

141

figures_counter = 0

141

figures_counter = 0

142

infile = str()

142

infile = str()

143

infile_dir = str()

143

infile_dir = str()

144

infile_root = str()

144

infile_root = str()

145

files_dir = str()

145

files_dir = str()

146

with_preamble = True

146

with_preamble = True

147

user_preamble = None

147

user_preamble = None

148

output = str()

148

output = str()

149

raw_as_verbatim = False

149

raw_as_verbatim = False

150

151

def __init__(self, infile):

151

def __init__(self, infile):

152

self.infile = infile

152

self.infile = infile

153

self.infile_dir = os.path.dirname(infile)

153

self.infile_dir = os.path.dirname(infile)

154

infile_root = os.path.splitext(infile)[0]

154

infile_root = os.path.splitext(infile)[0]

155

files_dir = infile_root + '_files'

155

files_dir = infile_root + '_files'

156

if not os.path.isdir(files_dir):

156

if not os.path.isdir(files_dir):

157

os.mkdir(files_dir)

157

os.mkdir(files_dir)

158

self.infile_root = infile_root

158

self.infile_root = infile_root

159

self.files_dir = files_dir

159

self.files_dir = files_dir

160

self.outbase = infile_root

160

self.outbase = infile_root

161

162

def dispatch(self, cell_type):

162

def dispatch(self, cell_type):

163

"""return cell_type dependent render method, for example render_code

163

"""return cell_type dependent render method, for example render_code

164

"""

164

"""

165

return getattr(self, 'render_' + cell_type, self.render_unknown)

165

return getattr(self, 'render_' + cell_type, self.render_unknown)

166

167

def convert(self, cell_separator='\n'):

167

def convert(self, cell_separator='\n'):

168

lines = []

168

lines = []

169

lines.extend(self.optional_header())

169

lines.extend(self.optional_header())

170

converted_cells = []

170

converted_cells = []

171

for worksheet in self.nb.worksheets:

171

for worksheet in self.nb.worksheets:

172

for cell in worksheet.cells:

172

for cell in worksheet.cells:

173

#print(cell.cell_type) # dbg

173

#print(cell.cell_type) # dbg

174

conv_fn = self.dispatch(cell.cell_type)

174

conv_fn = self.dispatch(cell.cell_type)

175

if cell.cell_type in ('markdown', 'raw'):

175

if cell.cell_type in ('markdown', 'raw'):

176

remove_fake_files_url(cell)

176

remove_fake_files_url(cell)

177

converted_cells.append('\n'.join(conv_fn(cell)))

177

converted_cells.append('\n'.join(conv_fn(cell)))

178

cell_lines = cell_separator.join(converted_cells).split('\n')

178

cell_lines = cell_separator.join(converted_cells).split('\n')

179

lines.extend(cell_lines)

179

lines.extend(cell_lines)

180

lines.extend(self.optional_footer())

180

lines.extend(self.optional_footer())

181

return u'\n'.join(lines)

181

return u'\n'.join(lines)

182

183

def render(self):

183

def render(self):

184

"read, convert, and save self.infile"

184

"read, convert, and save self.infile"

185

if not hasattr(self, 'nb'):

185

self.read()

186

self.read()

186

self.output = self.convert()

187

self.output = self.convert()

187

return self.save()

188

return self.save()

188

189

def read(self):

190

def read(self):

190

"read and parse notebook into NotebookNode called self.nb"

191

"read and parse notebook into NotebookNode called self.nb"

191

with open(self.infile) as f:

192

with open(self.infile) as f:

192

self.nb = nbformat.read(f, 'json')

193

self.nb = nbformat.read(f, 'json')

193

194

def save(self, outfile=None, encoding=None):

195

def save(self, outfile=None, encoding=None):

195

"read and parse notebook into self.nb"

196

"read and parse notebook into self.nb"

196

if outfile is None:

197

if outfile is None:

197

outfile = self.outbase + '.' + self.extension

198

outfile = self.outbase + '.' + self.extension

198

if encoding is None:

199

if encoding is None:

199

encoding = self.default_encoding

200

encoding = self.default_encoding

200

with open(outfile, 'w') as f:

201

with open(outfile, 'w') as f:

201

f.write(self.output.encode(encoding))

202

f.write(self.output.encode(encoding))

202

return os.path.abspath(outfile)

203

return os.path.abspath(outfile)

203

204

def optional_header(self):

205

def optional_header(self):

205

return []

206

return []

206

207

def optional_footer(self):

208

def optional_footer(self):

208

return []

209

return []

209

210

def _new_figure(self, data, fmt):

211

def _new_figure(self, data, fmt):

211

"""Create a new figure file in the given format.

212

"""Create a new figure file in the given format.

212

213

Returns a path relative to the input file.

214

Returns a path relative to the input file.

214

"""

215

"""

215

figname = '%s_fig_%02i.%s' % (self.infile_root,

216

figname = '%s_fig_%02i.%s' % (self.infile_root,

216

self.figures_counter, fmt)

217

self.figures_counter, fmt)

217

self.figures_counter += 1

218

self.figures_counter += 1

218

fullname = os.path.join(self.files_dir, figname)

219

fullname = os.path.join(self.files_dir, figname)

219

220

# Binary files are base64-encoded, SVG is already XML

221

# Binary files are base64-encoded, SVG is already XML

221

if fmt in ('png', 'jpg', 'pdf'):

222

if fmt in ('png', 'jpg', 'pdf'):

222

data = data.decode('base64')

223

data = data.decode('base64')

223

fopen = lambda fname: open(fname, 'wb')

224

fopen = lambda fname: open(fname, 'wb')

224

else:

225

else:

225

fopen = lambda fname: codecs.open(fname, 'wb', self.default_encoding)

226

fopen = lambda fname: codecs.open(fname, 'wb', self.default_encoding)

226

227

with fopen(fullname) as f:

228

with fopen(fullname) as f:

228

f.write(data)

229

f.write(data)

229

230

return fullname

231

return fullname

231

232

def render_heading(self, cell):

233

def render_heading(self, cell):

233

"""convert a heading cell

234

"""convert a heading cell

234

235

Returns list."""

236

Returns list."""

236

raise NotImplementedError

237

raise NotImplementedError

237

238

def render_code(self, cell):

239

def render_code(self, cell):

239

"""Convert a code cell

240

"""Convert a code cell

240

241

Returns list."""

242

Returns list."""

242

raise NotImplementedError

243

raise NotImplementedError

243

244

def render_markdown(self, cell):

245

def render_markdown(self, cell):

245

"""convert a markdown cell

246

"""convert a markdown cell

246

247

Returns list."""

248

Returns list."""

248

raise NotImplementedError

249

raise NotImplementedError

249

250

def render_pyout(self, output):

251

def render_pyout(self, output):

251

"""convert pyout part of a code cell

252

"""convert pyout part of a code cell

252

253

Returns list."""

254

Returns list."""

254

raise NotImplementedError

255

raise NotImplementedError

255

256

257

def render_pyerr(self, output):

258

def render_pyerr(self, output):

258

"""convert pyerr part of a code cell

259

"""convert pyerr part of a code cell

259

260

Returns list."""

261

Returns list."""

261

raise NotImplementedError

262

raise NotImplementedError

262

263

def _img_lines(self, img_file):

264

def _img_lines(self, img_file):

264

"""Return list of lines to include an image file."""

265

"""Return list of lines to include an image file."""

265

# Note: subclasses may choose to implement format-specific _FMT_lines

266

# Note: subclasses may choose to implement format-specific _FMT_lines

266

# methods if they so choose (FMT in {png, svg, jpg, pdf}).

267

# methods if they so choose (FMT in {png, svg, jpg, pdf}).

267

raise NotImplementedError

268

raise NotImplementedError

268

269

def render_display_data(self, output):

270

def render_display_data(self, output):

270

"""convert display data from the output of a code cell

271

"""convert display data from the output of a code cell

271

272

Returns list.

273

Returns list.

273

"""

274

"""

274

lines = []

275

lines = []

275

276

for fmt in ['png', 'svg', 'jpg', 'pdf']:

277

for fmt in ['png', 'svg', 'jpg', 'pdf']:

277

if fmt in output:

278

if fmt in output:

278

img_file = self._new_figure(output[fmt], fmt)

279

img_file = self._new_figure(output[fmt], fmt)

279

# Subclasses can have format-specific render functions (e.g.,

280

# Subclasses can have format-specific render functions (e.g.,

280

# latex has to auto-convert all SVG to PDF first).

281

# latex has to auto-convert all SVG to PDF first).

281

lines_fun = getattr(self, '_%s_lines' % fmt, None)

282

lines_fun = getattr(self, '_%s_lines' % fmt, None)

282

if not lines_fun:

283

if not lines_fun:

283

lines_fun = self._img_lines

284

lines_fun = self._img_lines

284

lines.extend(lines_fun(img_file))

285

lines.extend(lines_fun(img_file))

285

286

return lines

287

return lines

287

288

def render_stream(self, cell):

289

def render_stream(self, cell):

289

"""convert stream part of a code cell

290

"""convert stream part of a code cell

290

291

Returns list."""

292

Returns list."""

292

raise NotImplementedError

293

raise NotImplementedError

293

294

def render_raw(self, cell):

295

def render_raw(self, cell):

295

"""convert a cell with raw text

296

"""convert a cell with raw text

296

297

Returns list."""

298

Returns list."""

298

raise NotImplementedError

299

raise NotImplementedError

299

300

def render_unknown(self, cell):

301

def render_unknown(self, cell):

301

"""Render cells of unkown type

302

"""Render cells of unkown type

302

303

Returns list."""

304

Returns list."""

304

data = pprint.pformat(cell)

305

data = pprint.pformat(cell)

305

logging.warning('Unknown cell:\n%s' % data)

306

logging.warning('Unknown cell:\n%s' % data)

306

return self._unknown_lines(data)

307

return self._unknown_lines(data)

307

308

def _unknown_lines(self, data):

309

def _unknown_lines(self, data):

309

"""Return list of lines for an unknown cell.

310

"""Return list of lines for an unknown cell.

310

311

Parameters

312

Parameters

312

----------

313

----------

313

data : str

314

data : str

314

The content of the unknown data as a single string.

315

The content of the unknown data as a single string.

315

"""

316

"""

316

raise NotImplementedError

317

raise NotImplementedError

317

318

319

class ConverterRST(Converter):

320

class ConverterRST(Converter):

320

extension = 'rst'

321

extension = 'rst'

321

heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}

322

heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}

322

323

@DocInherit

324

@DocInherit

324

def render_heading(self, cell):

325

def render_heading(self, cell):

325

marker = self.heading_level[cell.level]

326

marker = self.heading_level[cell.level]

326

return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]

327

return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]

327

328

@DocInherit

329

@DocInherit

329

def render_code(self, cell):

330

def render_code(self, cell):

330

if not cell.input:

331

if not cell.input:

331

return []

332

return []

332

333

lines = ['In[%s]:' % cell.prompt_number, '']

334

lines = ['In[%s]:' % cell.prompt_number, '']

334

lines.extend(rst_directive('.. code:: python', cell.input))

335

lines.extend(rst_directive('.. code:: python', cell.input))

335

336

for output in cell.outputs:

337

for output in cell.outputs:

337

conv_fn = self.dispatch(output.output_type)

338

conv_fn = self.dispatch(output.output_type)

338

lines.extend(conv_fn(output))

339

lines.extend(conv_fn(output))

339

340

return lines

341

return lines

341

342

@DocInherit

343

@DocInherit

343

def render_markdown(self, cell):

344

def render_markdown(self, cell):

344

#return [cell.source]

345

#return [cell.source]

345

return [markdown2rst(cell.source)]

346

return [markdown2rst(cell.source)]

346

347

@DocInherit

348

@DocInherit

348

def render_raw(self, cell):

349

def render_raw(self, cell):

349

if self.raw_as_verbatim:

350

if self.raw_as_verbatim:

350

return ['::', '', indent(cell.source), '']

351

return ['::', '', indent(cell.source), '']

351

else:

352

else:

352

return [cell.source]

353

return [cell.source]

353

354

@DocInherit

355

@DocInherit

355

def render_pyout(self, output):

356

def render_pyout(self, output):

356

lines = ['Out[%s]:' % output.prompt_number, '']

357

lines = ['Out[%s]:' % output.prompt_number, '']

357

358

# output is a dictionary like object with type as a key

359

# output is a dictionary like object with type as a key

359

if 'latex' in output:

360

if 'latex' in output:

360

lines.extend(rst_directive('.. math::', output.latex))

361

lines.extend(rst_directive('.. math::', output.latex))

361

362

if 'text' in output:

363

if 'text' in output:

363

lines.extend(rst_directive('.. parsed-literal::', output.text))

364

lines.extend(rst_directive('.. parsed-literal::', output.text))

364

365

return lines

366

return lines

366

367

@DocInherit

368

@DocInherit

368

def render_pyerr(self, output):

369

def render_pyerr(self, output):

369

# Note: a traceback is a *list* of frames.

370

# Note: a traceback is a *list* of frames.

370

return ['::', '', indent(remove_ansi('\n'.join(output.traceback))), '']

371

return ['::', '', indent(remove_ansi('\n'.join(output.traceback))), '']

371

372

@DocInherit

373

@DocInherit

373

def _img_lines(self, img_file):

374

def _img_lines(self, img_file):

374

return ['.. image:: %s' % img_file, '']

375

return ['.. image:: %s' % img_file, '']

375

376

@DocInherit

377

@DocInherit

377

def render_stream(self, output):

378

def render_stream(self, output):

378

lines = []

379

lines = []

379

380

if 'text' in output:

381

if 'text' in output:

381

lines.extend(rst_directive('.. parsed-literal::', output.text))

382

lines.extend(rst_directive('.. parsed-literal::', output.text))

382

383

return lines

384

return lines

384

385

@DocInherit

386

@DocInherit

386

def _unknown_lines(self, data):

387

def _unknown_lines(self, data):

387

return rst_directive('.. warning:: Unknown cell') + [data]

388

return rst_directive('.. warning:: Unknown cell') + [data]

388

389

390

class ConverterQuickHTML(Converter):

391

class ConverterQuickHTML(Converter):

391

extension = 'html'

392

extension = 'html'

392

393

def in_tag(self, tag, src):

394

def in_tag(self, tag, src):

394

"""Return a list of elements bracketed by the given tag"""

395

"""Return a list of elements bracketed by the given tag"""

395

return ['<%s>' % tag, src, '</%s>' % tag]

396

return ['<%s>' % tag, src, '</%s>' % tag]

396

397

def optional_header(self):

398

def optional_header(self):

398

# XXX: inject the IPython standard CSS into here

399

# XXX: inject the IPython standard CSS into here

399

s = """<html>

400

s = """<html>

400

<head>

401

<head>

401

</head>

402

</head>

402

403

<body>

404

<body>

404

"""

405

"""

405

return s.splitlines()

406

return s.splitlines()

406

407

def optional_footer(self):

408

def optional_footer(self):

408

s = """</body>

409

s = """</body>

409

</html>

410

</html>

410

"""

411

"""

411

return s.splitlines()

412

return s.splitlines()

412

413

@DocInherit

414

@DocInherit

414

def render_heading(self, cell):

415

def render_heading(self, cell):

415

marker = cell.level

416

marker = cell.level

416

return ['<h{1}>\n {0}\n</h{1}>'.format(cell.source, marker)]

417

return ['<h{1}>\n {0}\n</h{1}>'.format(cell.source, marker)]

417

418

@DocInherit

419

@DocInherit

419

def render_code(self, cell):

420

def render_code(self, cell):

420

if not cell.input:

421

if not cell.input:

421

return []

422

return []

422

423

lines = ['<table>']

424

lines = ['<table>']

424

lines.append('<tr><td><tt>In [<b>%s</b>]:</tt></td><td><tt>' % cell.prompt_number)

425

lines.append('<tr><td><tt>In [<b>%s</b>]:</tt></td><td><tt>' % cell.prompt_number)

425

lines.append("<br>\n".join(cell.input.splitlines()))

426

lines.append("<br>\n".join(cell.input.splitlines()))

426

lines.append('</tt></td></tr>')

427

lines.append('</tt></td></tr>')

427

428

for output in cell.outputs:

429

for output in cell.outputs:

429

lines.append('<tr><td></td><td>')

430

lines.append('<tr><td></td><td>')

430

conv_fn = self.dispatch(output.output_type)

431

conv_fn = self.dispatch(output.output_type)

431

lines.extend(conv_fn(output))

432

lines.extend(conv_fn(output))

432

lines.append('</td></tr>')

433

lines.append('</td></tr>')

433

434

lines.append('</table>')

435

lines.append('</table>')

435

return lines

436

return lines

436

437

@DocInherit

438

@DocInherit

438

def render_markdown(self, cell):

439

def render_markdown(self, cell):

439

return self.in_tag('pre', cell.source)

440

return self.in_tag('pre', cell.source)

440

441

@DocInherit

442

@DocInherit

442

def render_raw(self, cell):

443

def render_raw(self, cell):

443

if self.raw_as_verbatim:

444

if self.raw_as_verbatim:

444

return self.in_tag('pre', cell.source)

445

return self.in_tag('pre', cell.source)

445

else:

446

else:

446

return [cell.source]

447

return [cell.source]

447

448

@DocInherit

449

@DocInherit

449

def render_pyout(self, output):

450

def render_pyout(self, output):

450

lines = ['<tr><td><tt>Out[<b>%s</b>]:</tt></td></tr>' %

451

lines = ['<tr><td><tt>Out[<b>%s</b>]:</tt></td></tr>' %

451

output.prompt_number, '<td>']

452

output.prompt_number, '<td>']

452

453

# output is a dictionary like object with type as a key

454

# output is a dictionary like object with type as a key

454

for out_type in ('text', 'latex'):

455

for out_type in ('text', 'latex'):

455

if out_type in output:

456

if out_type in output:

456

lines.extend(self.in_tag('pre', indent(output[out_type])))

457

lines.extend(self.in_tag('pre', indent(output[out_type])))

457

458

return lines

459

return lines

459

460

@DocInherit

461

@DocInherit

461

def render_pyerr(self, output):

462

def render_pyerr(self, output):

462

# Note: a traceback is a *list* of frames.

463

# Note: a traceback is a *list* of frames.

463

return self.in_tag('pre', remove_ansi('\n'.join(output.traceback)))

464

return self.in_tag('pre', remove_ansi('\n'.join(output.traceback)))

464

465

@DocInherit

466

@DocInherit

466

def _img_lines(self, img_file):

467

def _img_lines(self, img_file):

467

return ['<img src="%s">' % img_file, '']

468

return ['<img src="%s">' % img_file, '']

468

469

@DocInherit

470

@DocInherit

470

def render_stream(self, output):

471

def render_stream(self, output):

471

lines = []

472

lines = []

472

473

if 'text' in output:

474

if 'text' in output:

474

lines.append(output.text)

475

lines.append(output.text)

475

476

return lines

477

return lines

477

478

@DocInherit

479

@DocInherit

479

def _unknown_lines(self, data):

480

def _unknown_lines(self, data):

480

return ['<h2>Warning:: Unknown cell</h2>'] + self.in_tag('pre', data)

481

return ['<h2>Warning:: Unknown cell</h2>'] + self.in_tag('pre', data)

481

482

483

class ConverterLaTeX(Converter):

484

class ConverterLaTeX(Converter):

484

"""Converts a notebook to a .tex file suitable for pdflatex.

485

"""Converts a notebook to a .tex file suitable for pdflatex.

485

486

Note: this converter *needs*:

487

Note: this converter *needs*:

487

488

- `pandoc`: for all conversion of markdown cells. If your notebook only

489

- `pandoc`: for all conversion of markdown cells. If your notebook only

489

has Raw cells, pandoc will not be needed.

490

has Raw cells, pandoc will not be needed.

490

491

- `inkscape`: if your notebook has SVG figures. These need to be

492

- `inkscape`: if your notebook has SVG figures. These need to be

492

converted to PDF before inclusion in the TeX file, as LaTeX doesn't

493

converted to PDF before inclusion in the TeX file, as LaTeX doesn't

493

understand SVG natively.

494

understand SVG natively.

494

495

You will in general obtain much better final PDF results if you configure

496

You will in general obtain much better final PDF results if you configure

496

the matplotlib backend to create SVG output with

497

the matplotlib backend to create SVG output with

497

498

%config InlineBackend.figure_format = 'svg'

499

%config InlineBackend.figure_format = 'svg'

499

500

(or set the equivalent flag at startup or in your configuration profile).

501

(or set the equivalent flag at startup or in your configuration profile).

501

"""

502

"""

502

extension = 'tex'

503

extension = 'tex'

503

documentclass = 'article'

504

documentclass = 'article'

504

documentclass_options = '11pt,english'

505

documentclass_options = '11pt,english'

505

heading_map = {1: r'\section',

506

heading_map = {1: r'\section',

506

2: r'\subsection',

507

2: r'\subsection',

507

3: r'\subsubsection',

508

3: r'\subsubsection',

508

4: r'\paragraph',

509

4: r'\paragraph',

509

5: r'\subparagraph',

510

5: r'\subparagraph',

510

6: r'\subparagraph'}

511

6: r'\subparagraph'}

511

512

def in_env(self, environment, lines):

513

def in_env(self, environment, lines):

513

"""Return list of environment lines for input lines

514

"""Return list of environment lines for input lines

514

515

Parameters

516

Parameters

516

----------

517

----------

517

env : string

518

env : string

518

Name of the environment to bracket with begin/end.

519

Name of the environment to bracket with begin/end.

519

520

lines: """

521

lines: """

521

out = [ur'\begin{%s}' % environment]

522

out = [ur'\begin{%s}' % environment]

522

if isinstance(lines, basestring):

523

if isinstance(lines, basestring):

523

out.append(lines)

524

out.append(lines)

524

else: # list

525

else: # list

525

out.extend(lines)

526

out.extend(lines)

526

out.append(ur'\end{%s}' % environment)

527

out.append(ur'\end{%s}' % environment)

527

return out

528

return out

528

529

def convert(self):

530

def convert(self):

530

# The main body is done by the logic in the parent class, and that's

531

# The main body is done by the logic in the parent class, and that's

531

# all we need if preamble support has been turned off.

532

# all we need if preamble support has been turned off.

532

body = super(ConverterLaTeX, self).convert()

533

body = super(ConverterLaTeX, self).convert()

533

if not self.with_preamble:

534

if not self.with_preamble:

534

return body

535

return body

535

# But if preamble is on, then we need to construct a proper, standalone

536

# But if preamble is on, then we need to construct a proper, standalone

536

# tex file.

537

# tex file.

537

538

# Tag the document at the top and set latex class

539

# Tag the document at the top and set latex class

539

final = [ r'%% This file was auto-generated by IPython, do NOT edit',

540

final = [ r'%% This file was auto-generated by IPython, do NOT edit',

540

r'%% Conversion from the original notebook file:',

541

r'%% Conversion from the original notebook file:',

541

r'%% {0}'.format(self.infile),

542

r'%% {0}'.format(self.infile),

542

r'%%',

543

r'%%',

543

r'\documentclass[%s]{%s}' % (self.documentclass_options,

544

r'\documentclass[%s]{%s}' % (self.documentclass_options,

544

self.documentclass),

545

self.documentclass),

545

'',

546

'',

546

]

547

]

547

# Load our own preamble, which is stored next to the main file. We

548

# Load our own preamble, which is stored next to the main file. We

548

# need to be careful in case the script entry point is a symlink

549

# need to be careful in case the script entry point is a symlink

549

myfile = __file__ if not os.path.islink(__file__) else \

550

myfile = __file__ if not os.path.islink(__file__) else \

550

os.readlink(__file__)

551

os.readlink(__file__)

551

with open(os.path.join(os.path.dirname(myfile), 'preamble.tex')) as f:

552

with open(os.path.join(os.path.dirname(myfile), 'preamble.tex')) as f:

552

final.append(f.read())

553

final.append(f.read())

553

554

# Load any additional user-supplied preamble

555

# Load any additional user-supplied preamble

555

if self.user_preamble:

556

if self.user_preamble:

556

final.extend(['', '%% Adding user preamble from file:',

557

final.extend(['', '%% Adding user preamble from file:',

557

'%% {0}'.format(self.user_preamble), ''])

558

'%% {0}'.format(self.user_preamble), ''])

558

with open(self.user_preamble) as f:

559

with open(self.user_preamble) as f:

559

final.append(f.read())

560

final.append(f.read())

560

561

# Include document body

562

# Include document body

562

final.extend([ r'\begin{document}', '',

563

final.extend([ r'\begin{document}', '',

563

body,

564

body,

564

r'\end{document}', ''])

565

r'\end{document}', ''])

565

# Retun value must be a string

566

# Retun value must be a string

566

return '\n'.join(final)

567

return '\n'.join(final)

567

568

@DocInherit

569

@DocInherit

569

def render_heading(self, cell):

570

def render_heading(self, cell):

570

marker = self.heading_map[cell.level]

571

marker = self.heading_map[cell.level]

571

return ['%s{%s}' % (marker, cell.source) ]

572

return ['%s{%s}' % (marker, cell.source) ]

572

573

@DocInherit

574

@DocInherit

574

def render_code(self, cell):

575

def render_code(self, cell):

575

if not cell.input:

576

if not cell.input:

576

return []

577

return []

577

578

# Cell codes first carry input code, we use lstlisting for that

579

# Cell codes first carry input code, we use lstlisting for that

579

lines = [ur'\begin{codecell}']

580

lines = [ur'\begin{codecell}']

580

581

lines.extend(self.in_env('codeinput',

582

lines.extend(self.in_env('codeinput',

582

self.in_env('lstlisting', cell.input)))

583

self.in_env('lstlisting', cell.input)))

583

584

outlines = []

585

outlines = []

585

for output in cell.outputs:

586

for output in cell.outputs:

586

conv_fn = self.dispatch(output.output_type)

587

conv_fn = self.dispatch(output.output_type)

587

outlines.extend(conv_fn(output))

588

outlines.extend(conv_fn(output))

588

589

# And then output of many possible types; use a frame for all of it.

590

# And then output of many possible types; use a frame for all of it.

590

if outlines:

591

if outlines:

591

lines.extend(self.in_env('codeoutput', outlines))

592

lines.extend(self.in_env('codeoutput', outlines))

592

593

lines.append(ur'\end{codecell}')

594

lines.append(ur'\end{codecell}')

594

595

return lines

596

return lines

596

597

598

@DocInherit

599

@DocInherit

599

def _img_lines(self, img_file):

600

def _img_lines(self, img_file):

600

return self.in_env('center',

601

return self.in_env('center',

601

[r'\includegraphics[width=6in]{%s}' % img_file, r'\par'])

602

[r'\includegraphics[width=6in]{%s}' % img_file, r'\par'])

602

603

def _svg_lines(self, img_file):

604

def _svg_lines(self, img_file):

604

base_file = os.path.splitext(img_file)[0]

605

base_file = os.path.splitext(img_file)[0]

605

pdf_file = base_file + '.pdf'

606

pdf_file = base_file + '.pdf'

606

subprocess.check_call([ inkscape, '--export-pdf=%s' % pdf_file,

607

subprocess.check_call([ inkscape, '--export-pdf=%s' % pdf_file,

607

img_file])

608

img_file])

608

return self._img_lines(pdf_file)

609

return self._img_lines(pdf_file)

609

610

@DocInherit

611

@DocInherit

611

def render_stream(self, output):

612

def render_stream(self, output):

612

lines = []

613

lines = []

613

614

if 'text' in output:

615

if 'text' in output:

615

lines.extend(self.in_env('verbatim', output.text.strip()))

616

lines.extend(self.in_env('verbatim', output.text.strip()))

616

617

return lines

618

return lines

618

619

@DocInherit

620

@DocInherit

620

def render_markdown(self, cell):

621

def render_markdown(self, cell):

621

return [markdown2latex(cell.source)]

622

return [markdown2latex(cell.source)]

622

623

@DocInherit

624

@DocInherit

624

def render_pyout(self, output):

625

def render_pyout(self, output):

625

lines = []

626

lines = []

626

627

# output is a dictionary like object with type as a key

628

# output is a dictionary like object with type as a key

628

if 'latex' in output:

629

if 'latex' in output:

629

lines.extend(output.latex)

630

lines.extend(output.latex)

630

631

if 'text' in output:

632

if 'text' in output:

632

lines.extend(self.in_env('verbatim', output.text))

633

lines.extend(self.in_env('verbatim', output.text))

633

634

return lines

635

return lines

635

636

@DocInherit

637

@DocInherit

637

def render_pyerr(self, output):

638

def render_pyerr(self, output):

638

# Note: a traceback is a *list* of frames.

639

# Note: a traceback is a *list* of frames.

639

return self.in_env('traceback',

640

return self.in_env('traceback',

640

self.in_env('verbatim',

641

self.in_env('verbatim',

641

remove_ansi('\n'.join(output.traceback))))

642

remove_ansi('\n'.join(output.traceback))))

642

643

@DocInherit

644

@DocInherit

644

def render_raw(self, cell):

645

def render_raw(self, cell):

645

if self.raw_as_verbatim:

646

if self.raw_as_verbatim:

646

return self.in_env('verbatim', cell.source)

647

return self.in_env('verbatim', cell.source)

647

else:

648

else:

648

return [cell.source]

649

return [cell.source]

649

650

@DocInherit

651

@DocInherit

651

def _unknown_lines(self, data):

652

def _unknown_lines(self, data):

652

return [r'{\vspace{5mm}\bf WARNING:: unknown cell:}'] + \

653

return [r'{\vspace{5mm}\bf WARNING:: unknown cell:}'] + \

653

self.in_env('verbatim', data)

654

self.in_env('verbatim', data)

654

655

656

class ConverterNotebook(Converter):

657

class ConverterNotebook(Converter):

657

"""

658

"""

658

A converter that is essentially a null-op.

659

A converter that is essentially a null-op.

659

This exists so it can be subclassed

660

This exists so it can be subclassed

660

for custom handlers of .ipynb files

661

for custom handlers of .ipynb files

661

that create new .ipynb files.

662

that create new .ipynb files.

662

663

What distinguishes this from JSONWriter is that

664

What distinguishes this from JSONWriter is that

664

subclasses can specify what to do with each type of cell.

665

subclasses can specify what to do with each type of cell.

665

666

Writes out a notebook file.

667

Writes out a notebook file.

667

668

"""

669

"""

669

extension = 'ipynb'

670

extension = 'ipynb'

670

671

def __init__(self, infile, outbase):

672

def __init__(self, infile, outbase):

672

Converter.__init__(self, infile)

673

Converter.__init__(self, infile)

673

self.outbase = outbase

674

self.outbase = outbase

674

rmtree(self.files_dir)

675

rmtree(self.files_dir)

675

676

def convert(self):

677

def convert(self):

677

return json.dumps(json.loads(Converter.convert(self, ',')), indent=1, sort_keys=True)

678

return json.dumps(json.loads(Converter.convert(self, ',')), indent=1, sort_keys=True)

678

679

def optional_header(self):

680

def optional_header(self):

680

s = \

681

s = \

681

"""{

682

"""{

682

"metadata": {

683

"metadata": {

683

"name": "%(name)s"

684

"name": "%(name)s"

684

},

685

},

685

"nbformat": 3,

686

"nbformat": 3,

686

"worksheets": [

687

"worksheets": [

687

{

688

{

688

"cells": [""" % {'name':self.outbase}

689

"cells": [""" % {'name':self.outbase}

689

690

return s.split('\n')

691

return s.split('\n')

691

692

def optional_footer(self):

693

def optional_footer(self):

693

s = \

694

s = \

694

"""]

695

"""]

695

}

696

}

696

]

697

]

697

}"""

698

}"""

698

return s.split('\n')

699

return s.split('\n')

699

700

@DocInherit

701

@DocInherit

701

def render_heading(self, cell):

702

def render_heading(self, cell):

702

return cell_to_lines(cell)

703

return cell_to_lines(cell)

703

704

@DocInherit

705

@DocInherit

705

def render_code(self, cell):

706

def render_code(self, cell):

706

return cell_to_lines(cell)

707

return cell_to_lines(cell)

707

708

@DocInherit

709

@DocInherit

709

def render_markdown(self, cell):

710

def render_markdown(self, cell):

710

return cell_to_lines(cell)

711

return cell_to_lines(cell)

711

712

@DocInherit

713

@DocInherit

713

def render_raw(self, cell):

714

def render_raw(self, cell):

714

return cell_to_lines(cell)

715

return cell_to_lines(cell)

715

716

@DocInherit

717

@DocInherit

717

def render_pyout(self, output):

718

def render_pyout(self, output):

718

return cell_to_lines(cell)

719

return cell_to_lines(cell)

719

720

@DocInherit

721

@DocInherit

721

def render_pyerr(self, output):

722

def render_pyerr(self, output):

722

return cell_to_lines(cell)

723

return cell_to_lines(cell)

723

724

#-----------------------------------------------------------------------------

725

#-----------------------------------------------------------------------------

725

# Standalone conversion functions

726

# Standalone conversion functions

726

#-----------------------------------------------------------------------------

727

#-----------------------------------------------------------------------------

727

728

def rst2simplehtml(infile):

729

def rst2simplehtml(infile):

729

"""Convert a rst file to simplified html suitable for blogger.

730

"""Convert a rst file to simplified html suitable for blogger.

730

731

This just runs rst2html with certain parameters to produce really simple

732

This just runs rst2html with certain parameters to produce really simple

732

html and strips the document header, so the resulting file can be easily

733

html and strips the document header, so the resulting file can be easily

733

pasted into a blogger edit window.

734

pasted into a blogger edit window.

734

"""

735

"""

735

736

# This is the template for the rst2html call that produces the cleanest,

737

# This is the template for the rst2html call that produces the cleanest,

737

# simplest html I could find. This should help in making it easier to

738

# simplest html I could find. This should help in making it easier to

738

# paste into the blogspot html window, though I'm still having problems

739

# paste into the blogspot html window, though I'm still having problems

739

# with linebreaks there...

740

# with linebreaks there...

740

cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "

741

cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "

741

"--no-generator --no-datestamp --no-source-link "

742

"--no-generator --no-datestamp --no-source-link "

742

"--no-toc-backlinks --no-section-numbering "

743

"--no-toc-backlinks --no-section-numbering "

743

"--strip-comments ")

744

"--strip-comments ")

744

745

cmd = "%s %s" % (cmd_template, infile)

746

cmd = "%s %s" % (cmd_template, infile)

746

proc = subprocess.Popen(cmd,

747

proc = subprocess.Popen(cmd,

747

stdout=subprocess.PIPE,

748

stdout=subprocess.PIPE,

748

stderr=subprocess.PIPE,

749

stderr=subprocess.PIPE,

749

shell=True)

750

shell=True)

750

html, stderr = proc.communicate()

751

html, stderr = proc.communicate()

751

if stderr:

752

if stderr:

752

raise IOError(stderr)

753

raise IOError(stderr)

753

754

# Make an iterator so breaking out holds state. Our implementation of

755

# Make an iterator so breaking out holds state. Our implementation of

755

# searching for the html body below is basically a trivial little state

756

# searching for the html body below is basically a trivial little state

756

# machine, so we need this.

757

# machine, so we need this.

757

walker = iter(html.splitlines())

758

walker = iter(html.splitlines())

758

759

# Find start of main text, break out to then print until we find end /div.

760

# Find start of main text, break out to then print until we find end /div.

760

# This may only work if there's a real title defined so we get a 'div class'

761

# This may only work if there's a real title defined so we get a 'div class'

761

# tag, I haven't really tried.

762

# tag, I haven't really tried.

762

for line in walker:

763

for line in walker:

763

if line.startswith('<body>'):

764

if line.startswith('<body>'):

764

break

765

break

765

766

newfname = os.path.splitext(infile)[0] + '.html'

767

newfname = os.path.splitext(infile)[0] + '.html'

767

with open(newfname, 'w') as f:

768

with open(newfname, 'w') as f:

768

for line in walker:

769

for line in walker:

769

if line.startswith('</body>'):

770

if line.startswith('</body>'):

770

break

771

break

771

f.write(line)

772

f.write(line)

772

f.write('\n')

773

f.write('\n')

773

774

return newfname

775

return newfname

775

776

#-----------------------------------------------------------------------------

777

#-----------------------------------------------------------------------------

777

# Cell-level functions -- similar to IPython.nbformat.v3.rwbase functions

778

# Cell-level functions -- similar to IPython.nbformat.v3.rwbase functions

778

# but at cell level instead of whole notebook level

779

# but at cell level instead of whole notebook level

779

#-----------------------------------------------------------------------------

780

#-----------------------------------------------------------------------------

780

781

def writes_cell(cell, **kwargs):

782

def writes_cell(cell, **kwargs):

782

kwargs['cls'] = BytesEncoder

783

kwargs['cls'] = BytesEncoder

783

kwargs['indent'] = 3

784

kwargs['indent'] = 3

784

kwargs['sort_keys'] = True

785

kwargs['sort_keys'] = True

785

kwargs['separators'] = (',',': ')

786

kwargs['separators'] = (',',': ')

786

if kwargs.pop('split_lines', True):

787

if kwargs.pop('split_lines', True):

787

cell = split_lines_cell(copy.deepcopy(cell))

788

cell = split_lines_cell(copy.deepcopy(cell))

788

return py3compat.str_to_unicode(json.dumps(cell, **kwargs), 'utf-8')

789

return py3compat.str_to_unicode(json.dumps(cell, **kwargs), 'utf-8')

789

790

_multiline_outputs = ['text', 'html', 'svg', 'latex', 'javascript', 'json']

791

_multiline_outputs = ['text', 'html', 'svg', 'latex', 'javascript', 'json']

791

def split_lines_cell(cell):

792

def split_lines_cell(cell):

792

"""

793

"""

793

Split lines within a cell as in

794

Split lines within a cell as in

794

IPython.nbformat.v3.rwbase.split_lines

795

IPython.nbformat.v3.rwbase.split_lines

795

796

"""

797

"""

797

if cell.cell_type == 'code':

798

if cell.cell_type == 'code':

798

if 'input' in cell and isinstance(cell.input, basestring):

799

if 'input' in cell and isinstance(cell.input, basestring):

799

cell.input = (cell.input + '\n').splitlines()

800

cell.input = (cell.input + '\n').splitlines()

800

for output in cell.outputs:

801

for output in cell.outputs:

801

for key in _multiline_outputs:

802

for key in _multiline_outputs:

802

item = output.get(key, None)

803

item = output.get(key, None)

803

if isinstance(item, basestring):

804

if isinstance(item, basestring):

804

output[key] = (item + '\n').splitlines()

805

output[key] = (item + '\n').splitlines()

805

else: # text, heading cell

806

else: # text, heading cell

806

for key in ['source', 'rendered']:

807

for key in ['source', 'rendered']:

807

item = cell.get(key, None)

808

item = cell.get(key, None)

808

if isinstance(item, basestring):

809

if isinstance(item, basestring):

809

cell[key] = (item + '\n').splitlines()

810

cell[key] = (item + '\n').splitlines()

810

return cell

811

return cell

811

812

def cell_to_lines(cell):

813

def cell_to_lines(cell):

813

'''

814

'''

814

Write a cell to json, returning the split lines.

815

Write a cell to json, returning the split lines.

815

'''

816

'''

816

split_lines_cell(cell)

817

split_lines_cell(cell)

817

s = writes_cell(cell).strip()

818

s = writes_cell(cell).strip()

818

return s.split('\n')

819

return s.split('\n')

819

820

821

known_formats = "rst (default), html, quick-html, latex"

822

known_formats = "rst (default), html, quick-html, latex"

822

823

def main(infile, format='rst'):

824

def main(infile, format='rst'):

824

"""Convert a notebook to html in one step"""

825

"""Convert a notebook to html in one step"""

825

# XXX: this is just quick and dirty for now. When adding a new format,

826

# XXX: this is just quick and dirty for now. When adding a new format,

826

# make sure to add it to the `known_formats` string above, which gets

827

# make sure to add it to the `known_formats` string above, which gets

827

# printed in in the catch-all else, as well as in the help

828

# printed in in the catch-all else, as well as in the help

828

if format == 'rst':

829

if format == 'rst':

829

converter = ConverterRST(infile)

830

converter = ConverterRST(infile)

830

converter.render()

831

converter.render()

831

elif format == 'html':

832

elif format == 'html':

832

#Currently, conversion to html is a 2 step process, nb->rst->html

833

#Currently, conversion to html is a 2 step process, nb->rst->html

833

converter = ConverterRST(infile)

834

converter = ConverterRST(infile)

834

rstfname = converter.render()

835

rstfname = converter.render()

835

rst2simplehtml(rstfname)

836

rst2simplehtml(rstfname)

836

elif format == 'quick-html':

837

elif format == 'quick-html':

837

converter = ConverterQuickHTML(infile)

838

converter = ConverterQuickHTML(infile)

838

rstfname = converter.render()

839

rstfname = converter.render()

839

elif format == 'latex':

840

elif format == 'latex':

840

converter = ConverterLaTeX(infile)

841

converter = ConverterLaTeX(infile)

841

latexfname = converter.render()

842

latexfname = converter.render()

842

else:

843

else:

843

raise SystemExit("Unknown format '%s', " % format +

844

raise SystemExit("Unknown format '%s', " % format +

844

"known formats are: " + known_formats)

845

"known formats are: " + known_formats)

845

846

#-----------------------------------------------------------------------------

847

#-----------------------------------------------------------------------------

847

# Script main

848

# Script main

848

#-----------------------------------------------------------------------------

849

#-----------------------------------------------------------------------------

849

850

if __name__ == '__main__':

851

if __name__ == '__main__':

851

parser = argparse.ArgumentParser(description=__doc__,

852

parser = argparse.ArgumentParser(description=__doc__,

852

formatter_class=argparse.RawTextHelpFormatter)

853

formatter_class=argparse.RawTextHelpFormatter)

853

# TODO: consider passing file like object around, rather than filenames

854

# TODO: consider passing file like object around, rather than filenames

854

# would allow us to process stdin, or even http streams

855

# would allow us to process stdin, or even http streams

855

#parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)

856

#parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)

856

857

#Require a filename as a positional argument

858

#Require a filename as a positional argument

858

parser.add_argument('infile', nargs=1)

859

parser.add_argument('infile', nargs=1)

859

parser.add_argument('-f', '--format', default='rst',

860

parser.add_argument('-f', '--format', default='rst',

860

help='Output format. Supported formats: \n' +

861

help='Output format. Supported formats: \n' +

861

known_formats)

862

known_formats)

862

args = parser.parse_args()

863

args = parser.parse_args()

863

main(infile=args.infile[0], format=args.format)

864

main(infile=args.infile[0], format=args.format)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             #!/usr/bin/env python
             """Convert IPython notebooks to other formats, such as ReST, and HTML.
             Example:
               ./nbconvert.py --format html file.ipynb
             Produces 'file.rst' and 'file.html', along with auto-generated figure files
             called nb_figure_NN.png. To avoid the two-step process, ipynb -> rst -> html,
             use '--format quick-html' which will do ipynb -> html, but won't look as
             pretty.
             """
             #-----------------------------------------------------------------------------
             # Imports
             #-----------------------------------------------------------------------------
             from __future__ import print_function
             # Stdlib
             import codecs
             import logging
             import os
             import pprint
             import re
             import subprocess
             import sys
             import json
             import copy
             from shutil import rmtree
             inkscape = 'inkscape'
             if sys.platform == 'darwin':
                 inkscape = '/Applications/Inkscape.app/Contents/Resources/bin/inkscape'
                 if not os.path.exists(inkscape):
                     inkscape = None
             # From IPython
             from IPython.external import argparse
             from IPython.nbformat import current as nbformat
             from IPython.utils.text import indent
             from decorators import DocInherit
             from IPython.nbformat.v3.nbjson import BytesEncoder
             from IPython.utils import py3compat
             #-----------------------------------------------------------------------------
             # Utility functions
             #-----------------------------------------------------------------------------
             def DocInherit(f):
                 return f
             def remove_fake_files_url(cell):
                 """Remove from the cell source the /files/ pseudo-path we use.
                 """
                 src = cell.source
                 cell.source = src.replace('/files/', '')
             def remove_ansi(src):
                 """Strip all ANSI color escape sequences from input string.
                 Parameters
                 ----------
                 src : string
                 Returns
                 -------
                 string
                 """
                 return re.sub(r'\033\[(0|\d;\d\d)m', '', src)
             # Pandoc-dependent code
             def markdown2latex(src):
                 """Convert a markdown string to LaTeX via pandoc.
                 This function will raise an error if pandoc is not installed.
                 Any error messages generated by pandoc are printed to stderr.
                 Parameters
                 ----------
                 src : string
                   Input string, assumed to be valid markdown.
                 Returns
                 -------
                 out : string
                   Output as returned by pandoc.
                 """
                 p = subprocess.Popen('pandoc -f markdown -t latex'.split(),
                                      stdin=subprocess.PIPE, stdout=subprocess.PIPE)
                 out, err = p.communicate(src.encode('utf-8'))
                 if err:
                     print(err, file=sys.stderr)
                 #print('*'*20+'\n', out, '\n'+'*'*20)  # dbg
                 return unicode(out,'utf-8')
             def markdown2rst(src):
                 """Convert a markdown string to LaTeX via pandoc.
                 This function will raise an error if pandoc is not installed.
                 Any error messages generated by pandoc are printed to stderr.
                 Parameters
                 ----------
                 src : string
                   Input string, assumed to be valid markdown.
                 Returns
                 -------
                 out : string
                   Output as returned by pandoc.
                 """
                 p = subprocess.Popen('pandoc -f markdown -t rst'.split(),
                                      stdin=subprocess.PIPE, stdout=subprocess.PIPE)
                 out, err = p.communicate(src.encode('utf-8'))
                 if err:
                     print(err, file=sys.stderr)
                 #print('*'*20+'\n', out, '\n'+'*'*20)  # dbg
                 return unicode(out,'utf-8')
             def rst_directive(directive, text=''):
                 out = [directive, '']
                 if text:
                     out.extend([indent(text), ''])
                 return out
             #-----------------------------------------------------------------------------
             # Class declarations
             #-----------------------------------------------------------------------------
             class ConversionException(Exception):
                 pass
             class Converter(object):
                 default_encoding = 'utf-8'
                 extension = str()
                 figures_counter = 0
                 infile = str()
                 infile_dir = str()
                 infile_root = str()
                 files_dir = str()
                 with_preamble = True
                 user_preamble = None
                 output = str()
                 raw_as_verbatim = False
                 def __init__(self, infile):
                     self.infile = infile
                     self.infile_dir = os.path.dirname(infile)
                     infile_root = os.path.splitext(infile)[0]
                     files_dir = infile_root + '_files'
                     if not os.path.isdir(files_dir):
                         os.mkdir(files_dir)
                     self.infile_root = infile_root
                     self.files_dir = files_dir
                     self.outbase = infile_root
                 def dispatch(self, cell_type):
                     """return cell_type dependent render method,  for example render_code
                     """
                     return getattr(self, 'render_' + cell_type, self.render_unknown)
                 def convert(self, cell_separator='\n'):
                     lines = []
                     lines.extend(self.optional_header())
                     converted_cells = []
                     for worksheet in self.nb.worksheets:
                         for cell in worksheet.cells:
                             #print(cell.cell_type)  # dbg
                             conv_fn = self.dispatch(cell.cell_type)
                             if cell.cell_type in ('markdown', 'raw'):
                                 remove_fake_files_url(cell)
                             converted_cells.append('\n'.join(conv_fn(cell)))
                     cell_lines = cell_separator.join(converted_cells).split('\n')
                     lines.extend(cell_lines)
                     lines.extend(self.optional_footer())
                     return u'\n'.join(lines)
                 def render(self):
                     "read, convert, and save self.infile"
+                    if not hasattr(self, 'nb'):
                         self.read()
                     self.output = self.convert()
                     return self.save()
                 def read(self):
                     "read and parse notebook into NotebookNode called self.nb"
                     with open(self.infile) as f:
                         self.nb = nbformat.read(f, 'json')
                 def save(self, outfile=None, encoding=None):
                     "read and parse notebook into self.nb"
                     if outfile is None:
                         outfile = self.outbase + '.' + self.extension
                     if encoding is None:
                         encoding = self.default_encoding
                     with open(outfile, 'w') as f:
                         f.write(self.output.encode(encoding))
                     return os.path.abspath(outfile)
                 def optional_header(self):
                     return []
                 def optional_footer(self):
                     return []
                 def _new_figure(self, data, fmt):
                     """Create a new figure file in the given format.
                     Returns a path relative to the input file.
                     """
                     figname = '%s_fig_%02i.%s' % (self.infile_root,
                                                   self.figures_counter, fmt)
                     self.figures_counter += 1
                     fullname = os.path.join(self.files_dir, figname)
                     # Binary files are base64-encoded, SVG is already XML
                     if fmt in ('png', 'jpg', 'pdf'):
                         data = data.decode('base64')
                         fopen = lambda fname: open(fname, 'wb')
                     else:
                         fopen = lambda fname: codecs.open(fname, 'wb', self.default_encoding)
                     with fopen(fullname) as f:
                         f.write(data)
                     return fullname
                 def render_heading(self, cell):
                     """convert a heading cell
                     Returns list."""
                     raise NotImplementedError
                 def render_code(self, cell):
                     """Convert a code cell
                     Returns list."""
                     raise NotImplementedError
                 def render_markdown(self, cell):
                     """convert a markdown cell
                     Returns list."""
                     raise NotImplementedError
                 def render_pyout(self, output):
                     """convert pyout part of a code cell
                     Returns list."""
                     raise NotImplementedError
                 def render_pyerr(self, output):
                     """convert pyerr part of a code cell
                     Returns list."""
                     raise NotImplementedError
                 def _img_lines(self, img_file):
                     """Return list of lines to include an image file."""
                     # Note: subclasses may choose to implement format-specific _FMT_lines
                     # methods if they so choose (FMT in {png, svg, jpg, pdf}).
                     raise NotImplementedError
                 def render_display_data(self, output):
                     """convert display data from the output of a code cell
                     Returns list.
                     """
                     lines = []
                     for fmt in ['png', 'svg', 'jpg', 'pdf']:
                         if fmt in output:
                             img_file = self._new_figure(output[fmt], fmt)
                             # Subclasses can have format-specific render functions (e.g.,
                             # latex has to auto-convert all SVG to PDF first).
                             lines_fun = getattr(self, '_%s_lines' % fmt, None)
                             if not lines_fun:
                                 lines_fun = self._img_lines
                             lines.extend(lines_fun(img_file))
                     return lines
                 def render_stream(self, cell):
                     """convert stream part of a code cell
                     Returns list."""
                     raise NotImplementedError
                 def render_raw(self, cell):
                     """convert a cell with raw text
                     Returns list."""
                     raise NotImplementedError
                 def render_unknown(self, cell):
                     """Render cells of unkown type
                     Returns list."""
                     data = pprint.pformat(cell)
                     logging.warning('Unknown cell:\n%s' % data)
                     return self._unknown_lines(data)
                 def _unknown_lines(self, data):
                     """Return list of lines for an unknown cell.
                     Parameters
                     ----------
                     data : str
                       The content of the unknown data as a single string.
                     """
                     raise NotImplementedError
             class ConverterRST(Converter):
                 extension = 'rst'
                 heading_level = {1: '=', 2: '-', 3: '`', 4: '\'', 5: '.', 6: '~'}
                 @DocInherit
                 def render_heading(self, cell):
                     marker = self.heading_level[cell.level]
                     return ['{0}\n{1}\n'.format(cell.source, marker * len(cell.source))]
                 @DocInherit
                 def render_code(self, cell):
                     if not cell.input:
                         return []
                     lines = ['In[%s]:' % cell.prompt_number, '']
                     lines.extend(rst_directive('.. code:: python', cell.input))
                     for output in cell.outputs:
                         conv_fn = self.dispatch(output.output_type)
                         lines.extend(conv_fn(output))
                     return lines
                 @DocInherit
                 def render_markdown(self, cell):
                     #return [cell.source]
                     return [markdown2rst(cell.source)]
                 @DocInherit
                 def render_raw(self, cell):
                     if self.raw_as_verbatim:
                         return ['::', '', indent(cell.source), '']
                     else:
                         return [cell.source]
                 @DocInherit
                 def render_pyout(self, output):
                     lines = ['Out[%s]:' % output.prompt_number, '']
                     # output is a dictionary like object with type as a key
                     if 'latex' in output:
                         lines.extend(rst_directive('.. math::', output.latex))
                     if 'text' in output:
                         lines.extend(rst_directive('.. parsed-literal::', output.text))
                     return lines
                 @DocInherit
                 def render_pyerr(self, output):
                     # Note: a traceback is a *list* of frames.
                     return ['::', '', indent(remove_ansi('\n'.join(output.traceback))), '']
                 @DocInherit
                 def _img_lines(self, img_file):
                     return ['.. image:: %s' % img_file, '']
                 @DocInherit
                 def render_stream(self, output):
                     lines = []
                     if 'text' in output:
                         lines.extend(rst_directive('.. parsed-literal::', output.text))
                     return lines
                 @DocInherit
                 def _unknown_lines(self, data):
                     return rst_directive('.. warning:: Unknown cell') + [data]
             class ConverterQuickHTML(Converter):
                 extension = 'html'
                 def in_tag(self, tag, src):
                     """Return a list of elements bracketed by the given tag"""
                     return ['<%s>' % tag, src, '</%s>' % tag]
                 def optional_header(self):
                     # XXX: inject the IPython standard CSS into here
                     s = """<html>
                     <head>
                     </head>
                     <body>
                     """
                     return s.splitlines()
                 def optional_footer(self):
                     s = """</body>
                     </html>
                     """
                     return s.splitlines()
                 @DocInherit
                 def render_heading(self, cell):
                     marker = cell.level
                     return ['<h{1}>\n  {0}\n</h{1}>'.format(cell.source, marker)]
                 @DocInherit
                 def render_code(self, cell):
                     if not cell.input:
                         return []
                     lines = ['<table>']
                     lines.append('<tr><td><tt>In [<b>%s</b>]:</tt></td><td><tt>' % cell.prompt_number)
                     lines.append("<br>\n".join(cell.input.splitlines()))
                     lines.append('</tt></td></tr>')
                     for output in cell.outputs:
                         lines.append('<tr><td></td><td>')
                         conv_fn = self.dispatch(output.output_type)
                         lines.extend(conv_fn(output))
                         lines.append('</td></tr>')
                     lines.append('</table>')
                     return lines
                 @DocInherit
                 def render_markdown(self, cell):
                     return self.in_tag('pre', cell.source)
                 @DocInherit
                 def render_raw(self, cell):
                     if self.raw_as_verbatim:
                         return self.in_tag('pre', cell.source)
                     else:
                         return [cell.source]
                 @DocInherit
                 def render_pyout(self, output):
                     lines = ['<tr><td><tt>Out[<b>%s</b>]:</tt></td></tr>' %
                              output.prompt_number, '<td>']
                     # output is a dictionary like object with type as a key
                     for out_type in ('text', 'latex'):
                         if out_type in output:
                             lines.extend(self.in_tag('pre', indent(output[out_type])))
                     return lines
                 @DocInherit
                 def render_pyerr(self, output):
                     # Note: a traceback is a *list* of frames.
                     return self.in_tag('pre', remove_ansi('\n'.join(output.traceback)))
                 @DocInherit
                 def _img_lines(self, img_file):
                     return ['<img src="%s">' % img_file, '']
                 @DocInherit
                 def render_stream(self, output):
                     lines = []
                     if 'text' in output:
                         lines.append(output.text)
                     return lines
                 @DocInherit
                 def _unknown_lines(self, data):
                     return ['<h2>Warning:: Unknown cell</h2>'] + self.in_tag('pre', data)
             class ConverterLaTeX(Converter):
                 """Converts a notebook to a .tex file suitable for pdflatex.
                 Note: this converter *needs*:
                 - `pandoc`: for all conversion of markdown cells.  If your notebook only
                    has Raw cells, pandoc will not be needed.
                 -  `inkscape`: if your notebook has SVG figures.  These need to be
                    converted to PDF before inclusion in the TeX file, as LaTeX doesn't
                    understand SVG natively.
                 You will in general obtain much better final PDF results if you configure
                 the matplotlib backend to create SVG output with
                 %config InlineBackend.figure_format = 'svg'
                 (or set the equivalent flag at startup or in your configuration profile).
                 """
                 extension = 'tex'
                 documentclass = 'article'
                 documentclass_options = '11pt,english'
                 heading_map = {1: r'\section',
 : r'\subsection',
 : r'\subsubsection',
 : r'\paragraph',
 : r'\subparagraph',
 : r'\subparagraph'}
                 def in_env(self, environment, lines):
                     """Return list of environment lines for input lines
                     Parameters
                     ----------
                     env : string
                       Name of the environment to bracket with begin/end.
                     lines: """
                     out = [ur'\begin{%s}' % environment]
                     if isinstance(lines, basestring):
                         out.append(lines)
                     else:  # list
                         out.extend(lines)
                     out.append(ur'\end{%s}' % environment)
                     return out
                 def convert(self):
                     # The main body is done by the logic in the parent class, and that's
                     # all we need if preamble support has been turned off.
                     body = super(ConverterLaTeX, self).convert()
                     if not self.with_preamble:
                         return body
                     # But if preamble is on, then we need to construct a proper, standalone
                     # tex file.
                     # Tag the document at the top and set latex class
                     final = [ r'%% This file was auto-generated by IPython, do NOT edit',
                               r'%% Conversion from the original notebook file:',
                               r'%% {0}'.format(self.infile),
                               r'%%',
                               r'\documentclass[%s]{%s}' % (self.documentclass_options,
                                                            self.documentclass),
                               '',
                              ]
                     # Load our own preamble, which is stored next to the main file.  We
                     # need to be careful in case the script entry point is a symlink
                     myfile = __file__ if not os.path.islink(__file__) else \
                       os.readlink(__file__)
                     with open(os.path.join(os.path.dirname(myfile), 'preamble.tex')) as f:
                         final.append(f.read())
                     # Load any additional user-supplied preamble
                     if self.user_preamble:
                         final.extend(['', '%% Adding user preamble from file:',
                                       '%% {0}'.format(self.user_preamble), ''])
                         with open(self.user_preamble) as f:
                             final.append(f.read())
                     # Include document body
                     final.extend([ r'\begin{document}', '',
                                    body,
                                    r'\end{document}', ''])
                     # Retun value must be a string
                     return '\n'.join(final)
                 @DocInherit
                 def render_heading(self, cell):
                     marker = self.heading_map[cell.level]
                     return ['%s{%s}' % (marker, cell.source) ]
                 @DocInherit
                 def render_code(self, cell):
                     if not cell.input:
                         return []
                     # Cell codes first carry input code, we use lstlisting for that
                     lines = [ur'\begin{codecell}']
                     lines.extend(self.in_env('codeinput',
                                           self.in_env('lstlisting', cell.input)))
                     outlines = []
                     for output in cell.outputs:
                         conv_fn = self.dispatch(output.output_type)
                         outlines.extend(conv_fn(output))
                     # And then output of many possible types; use a frame for all of it.
                     if outlines:
                         lines.extend(self.in_env('codeoutput', outlines))
                     lines.append(ur'\end{codecell}')
                     return lines
                 @DocInherit
                 def _img_lines(self, img_file):
                     return self.in_env('center',
                             [r'\includegraphics[width=6in]{%s}' % img_file, r'\par'])
                 def _svg_lines(self, img_file):
                     base_file = os.path.splitext(img_file)[0]
                     pdf_file = base_file + '.pdf'
                     subprocess.check_call([ inkscape, '--export-pdf=%s' % pdf_file,
                                            img_file])
                     return self._img_lines(pdf_file)
                 @DocInherit
                 def render_stream(self, output):
                     lines = []
                     if 'text' in output:
                         lines.extend(self.in_env('verbatim', output.text.strip()))
                     return lines
                 @DocInherit
                 def render_markdown(self, cell):
                     return [markdown2latex(cell.source)]
                 @DocInherit
                 def render_pyout(self, output):
                     lines = []
                     # output is a dictionary like object with type as a key
                     if 'latex' in output:
                         lines.extend(output.latex)
                     if 'text' in output:
                         lines.extend(self.in_env('verbatim', output.text))
                     return lines
                 @DocInherit
                 def render_pyerr(self, output):
                     # Note: a traceback is a *list* of frames.
                     return self.in_env('traceback',
                                     self.in_env('verbatim',
                                              remove_ansi('\n'.join(output.traceback))))
                 @DocInherit
                 def render_raw(self, cell):
                     if self.raw_as_verbatim:
                         return self.in_env('verbatim', cell.source)
                     else:
                         return [cell.source]
                 @DocInherit
                 def _unknown_lines(self, data):
                     return [r'{\vspace{5mm}\bf WARNING:: unknown cell:}'] + \
                       self.in_env('verbatim', data)
             class ConverterNotebook(Converter):
                 """
                 A converter that is essentially a null-op.
                 This exists so it can be subclassed
                 for custom handlers of .ipynb files
                 that create new .ipynb files.
                 What distinguishes this from JSONWriter is that
                 subclasses can specify what to do with each type of cell.
                 Writes out a notebook file.
                 """
                 extension = 'ipynb'
                 def __init__(self, infile, outbase):
                     Converter.__init__(self, infile)
                     self.outbase = outbase
                     rmtree(self.files_dir)
                 def convert(self):
                     return json.dumps(json.loads(Converter.convert(self, ',')), indent=1, sort_keys=True)
                 def optional_header(self):
                     s = \
             """{
              "metadata": {
              "name": "%(name)s"
              },
              "nbformat": 3,
              "worksheets": [
              {
              "cells": [""" % {'name':self.outbase}
                     return s.split('\n')
                 def optional_footer(self):
                     s = \
             """]
               }
              ]
             }"""
                     return s.split('\n')
                 @DocInherit
                 def render_heading(self, cell):
                     return cell_to_lines(cell)
                 @DocInherit
                 def render_code(self, cell):
                     return cell_to_lines(cell)
                 @DocInherit
                 def render_markdown(self, cell):
                     return cell_to_lines(cell)
                 @DocInherit
                 def render_raw(self, cell):
                     return cell_to_lines(cell)
                 @DocInherit
                 def render_pyout(self, output):
                     return cell_to_lines(cell)
                 @DocInherit
                 def render_pyerr(self, output):
                     return cell_to_lines(cell)
             #-----------------------------------------------------------------------------
             # Standalone conversion functions
             #-----------------------------------------------------------------------------
             def rst2simplehtml(infile):
                 """Convert a rst file to simplified html suitable for blogger.
                 This just runs rst2html with certain parameters to produce really simple
                 html and strips the document header, so the resulting file can be easily
                 pasted into a blogger edit window.
                 """
                 # This is the template for the rst2html call that produces the cleanest,
                 # simplest html I could find.  This should help in making it easier to
                 # paste into the blogspot html window, though I'm still having problems
                 # with linebreaks there...
                 cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
                                 "--no-generator --no-datestamp --no-source-link "
                                 "--no-toc-backlinks --no-section-numbering "
                                 "--strip-comments ")
                 cmd = "%s %s" % (cmd_template, infile)
                 proc = subprocess.Popen(cmd,
                                         stdout=subprocess.PIPE,
                                         stderr=subprocess.PIPE,
                                         shell=True)
                 html, stderr = proc.communicate()
                 if stderr:
                     raise IOError(stderr)
                 # Make an iterator so breaking out holds state.  Our implementation of
                 # searching for the html body below is basically a trivial little state
                 # machine, so we need this.
                 walker = iter(html.splitlines())
                 # Find start of main text, break out to then print until we find end /div.
                 # This may only work if there's a real title defined so we get a 'div class'
                 # tag, I haven't really tried.
                 for line in walker:
                     if line.startswith('<body>'):
                         break
                 newfname = os.path.splitext(infile)[0] + '.html'
                 with open(newfname, 'w') as f:
                     for line in walker:
                         if line.startswith('</body>'):
                             break
                         f.write(line)
                         f.write('\n')
                 return newfname
             #-----------------------------------------------------------------------------
             # Cell-level functions -- similar to IPython.nbformat.v3.rwbase functions
             # but at cell level instead of whole notebook level
             #-----------------------------------------------------------------------------
             def writes_cell(cell, **kwargs):
                 kwargs['cls'] = BytesEncoder
                 kwargs['indent'] = 3
                 kwargs['sort_keys'] = True
                 kwargs['separators'] = (',',': ')
                 if kwargs.pop('split_lines', True):
                     cell = split_lines_cell(copy.deepcopy(cell))
                 return py3compat.str_to_unicode(json.dumps(cell, **kwargs), 'utf-8')
             _multiline_outputs = ['text', 'html', 'svg', 'latex', 'javascript', 'json']
             def split_lines_cell(cell):
                 """
                 Split lines within a cell as in
                 IPython.nbformat.v3.rwbase.split_lines
                 """
                 if cell.cell_type == 'code':
                     if 'input' in cell and isinstance(cell.input, basestring):
                         cell.input = (cell.input + '\n').splitlines()
                     for output in cell.outputs:
                         for key in _multiline_outputs:
                             item = output.get(key, None)
                             if isinstance(item, basestring):
                                 output[key] = (item + '\n').splitlines()
                 else: # text, heading cell
                     for key in ['source', 'rendered']:
                         item = cell.get(key, None)
                         if isinstance(item, basestring):
                             cell[key] = (item + '\n').splitlines()
                 return cell
             def cell_to_lines(cell):
                 '''
                 Write a cell to json, returning the split lines.
                 '''
                 split_lines_cell(cell)
                 s = writes_cell(cell).strip()
                 return s.split('\n')
             known_formats = "rst (default), html, quick-html, latex"
             def main(infile, format='rst'):
                 """Convert a notebook to html in one step"""
                 # XXX: this is just quick and dirty for now. When adding a new format,
                 # make sure to add it to the `known_formats` string above, which gets
                 # printed in in the catch-all else, as well as in the help
                 if format == 'rst':
                     converter = ConverterRST(infile)
                     converter.render()
                 elif format == 'html':
                     #Currently, conversion to html is a 2 step process, nb->rst->html
                     converter = ConverterRST(infile)
                     rstfname = converter.render()
                     rst2simplehtml(rstfname)
                 elif format == 'quick-html':
                     converter = ConverterQuickHTML(infile)
                     rstfname = converter.render()
                 elif format == 'latex':
                     converter = ConverterLaTeX(infile)
                     latexfname = converter.render()
                 else:
                     raise SystemExit("Unknown format '%s', " % format +
                             "known formats are: " + known_formats)
             #-----------------------------------------------------------------------------
             # Script main
             #-----------------------------------------------------------------------------
             if __name__ == '__main__':
                 parser = argparse.ArgumentParser(description=__doc__,
                         formatter_class=argparse.RawTextHelpFormatter)
                 # TODO: consider passing file like object around, rather than filenames
                 # would allow us to process stdin, or even http streams
                 #parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
                 #Require a filename as a positional argument
                 parser.add_argument('infile', nargs=1)
                 parser.add_argument('-f', '--format', default='rst',
                                     help='Output format. Supported formats: \n' +
                                     known_formats)
                 args = parser.parse_args()
                 main(infile=args.infile[0], format=args.format)