##// END OF EJS Templates
Added an option to disable syntax highlighting in code blocks. Simply add the -p or --plain_output tag to the command. This is a fix for issue #21
Ivan Djokic -
Show More
@@ -1,365 +1,366 b''
1 1 from __future__ import print_function, absolute_import
2 2 from converters.utils import remove_fake_files_url
3 3
4 4 # Stdlib
5 5 import codecs
6 6 import io
7 7 import logging
8 8 import os
9 9 import pprint
10 10 import re
11 11 from types import FunctionType
12 12
13 13 # From IPython
14 14 from IPython.nbformat import current as nbformat
15 15
16 16 # local
17 17
18 18 def clean_filename(filename):
19 19 """
20 20 Remove non-alphanumeric characters from filenames.
21 21
22 22 Parameters
23 23 ----------
24 24 filename : str
25 25 The filename to be sanitized.
26 26
27 27 Returns
28 28 -------
29 29 clean : str
30 30 A sanitized filename that contains only alphanumeric
31 31 characters and underscores.
32 32 """
33 33 filename = re.sub(r'[^a-zA-Z0-9_]', '_', filename)
34 34 return filename
35 35
36 36 #-----------------------------------------------------------------------------
37 37 # Class declarations
38 38 #-----------------------------------------------------------------------------
39 39
40 40
41 41 class ConversionException(Exception):
42 42 pass
43 43
44 44
45 45 class DocStringInheritor(type):
46 46 """
47 47 This metaclass will walk the list of bases until the desired
48 48 superclass method is found AND if that method has a docstring and only
49 49 THEN does it attach the superdocstring to the derived class method.
50 50
51 51 Please use carefully, I just did the metaclass thing by following
52 52 Michael Foord's Metaclass tutorial
53 53 (http://www.voidspace.org.uk/python/articles/metaclasses.shtml), I may
54 54 have missed a step or two.
55 55
56 56 source:
57 57 http://groups.google.com/group/comp.lang.python/msg/26f7b4fcb4d66c95
58 58 by Paul McGuire
59 59 """
60 60 def __new__(meta, classname, bases, classDict):
61 61 newClassDict = {}
62 62 for attributeName, attribute in classDict.items():
63 63 if type(attribute) == FunctionType:
64 64 # look through bases for matching function by name
65 65 for baseclass in bases:
66 66 if hasattr(baseclass, attributeName):
67 67 basefn = getattr(baseclass, attributeName)
68 68 if basefn.__doc__:
69 69 attribute.__doc__ = basefn.__doc__
70 70 break
71 71 newClassDict[attributeName] = attribute
72 72 return type.__new__(meta, classname, bases, newClassDict)
73 73
74 74
75 75 class Converter(object):
76 76 __metaclass__ = DocStringInheritor
77 77 default_encoding = 'utf-8'
78 78 extension = str()
79 79 figures_counter = 0
80 80 infile = str()
81 81 infile_dir = str()
82 82 infile_root = str()
83 83 files_dir = str()
84 84 with_preamble = True
85 85 user_preamble = None
86 86 output = unicode()
87 87 raw_as_verbatim = False
88 88 blank_symbol = " "
89 89 # Which display data format is best? Subclasses can override if
90 90 # they have specific requirements.
91 91 display_data_priority = ['pdf', 'svg', 'png', 'jpg', 'text']
92 92
93 def __init__(self, infile):
93 def __init__(self, infile, highlight):
94 94 self.infile = infile
95 self.highlight = highlight
95 96 self.infile_dir, infile_root = os.path.split(infile)
96 97 infile_root = os.path.splitext(infile_root)[0]
97 98 self.clean_name = clean_filename(infile_root)
98 99 files_dir = os.path.join(self.infile_dir, self.clean_name + '_files')
99 100 if not os.path.isdir(files_dir):
100 101 os.mkdir(files_dir)
101 102 self.infile_root = infile_root
102 103 self.files_dir = files_dir
103 104 self.outbase = os.path.join(self.infile_dir, infile_root)
104 105
105 106 def __del__(self):
106 107 if os.path.isdir(self.files_dir) and not os.listdir(self.files_dir):
107 108 os.rmdir(self.files_dir)
108 109
109 110 def _get_prompt_number(self, cell):
110 111 return cell.prompt_number if hasattr(cell, 'prompt_number') \
111 112 else self.blank_symbol
112 113
113 114 def dispatch(self, cell_type):
114 115 """return cell_type dependent render method, for example render_code
115 116 """
116 117 return getattr(self, 'render_' + cell_type, self.render_unknown)
117 118
118 119 def dispatch_display_format(self, format):
119 120 """
120 121 return output_type dependent render method, for example
121 122 render_output_text
122 123 """
123 124 return getattr(self, 'render_display_format_' + format,
124 125 self.render_unknown_display)
125 126
126 127 def convert(self, cell_separator='\n'):
127 128 """
128 129 Generic method to converts notebook to a string representation.
129 130
130 131 This is accomplished by dispatching on the cell_type, so subclasses of
131 132 Convereter class do not need to re-implement this method, but just
132 133 need implementation for the methods that will be dispatched.
133 134
134 135 Parameters
135 136 ----------
136 137 cell_separator : string
137 138 Character or string to join cells with. Default is "\n"
138 139
139 140 Returns
140 141 -------
141 142 out : string
142 143 """
143 144 lines = []
144 145 lines.extend(self.optional_header())
145 146 lines.extend(self.main_body(cell_separator))
146 147 lines.extend(self.optional_footer())
147 148 return u'\n'.join(lines)
148 149
149 150 def main_body(self, cell_separator='\n'):
150 151 converted_cells = []
151 152 for worksheet in self.nb.worksheets:
152 153 for cell in worksheet.cells:
153 154 #print(cell.cell_type) # dbg
154 155 conv_fn = self.dispatch(cell.cell_type)
155 156 if cell.cell_type in ('markdown', 'raw'):
156 157 remove_fake_files_url(cell)
157 158 converted_cells.append('\n'.join(conv_fn(cell)))
158 159 cell_lines = cell_separator.join(converted_cells).split('\n')
159 160 return cell_lines
160 161
161 162 def render(self):
162 163 "read, convert, and save self.infile"
163 164 if not hasattr(self, 'nb'):
164 165 self.read()
165 166 self.output = self.convert()
166 167 assert(type(self.output) == unicode)
167 168 return self.save()
168 169
169 170 def read(self):
170 171 "read and parse notebook into NotebookNode called self.nb"
171 172 with open(self.infile) as f:
172 173 self.nb = nbformat.read(f, 'json')
173 174
174 175 def save(self, outfile=None, encoding=None):
175 176 "read and parse notebook into self.nb"
176 177 if outfile is None:
177 178 outfile = self.outbase + '.' + self.extension
178 179 if encoding is None:
179 180 encoding = self.default_encoding
180 181 with io.open(outfile, 'w', encoding=encoding) as f:
181 182 f.write(self.output)
182 183 return os.path.abspath(outfile)
183 184
184 185 def optional_header(self):
185 186 """
186 187 Optional header to insert at the top of the converted notebook
187 188
188 189 Returns a list
189 190 """
190 191 return []
191 192
192 193 def optional_footer(self):
193 194 """
194 195 Optional footer to insert at the end of the converted notebook
195 196
196 197 Returns a list
197 198 """
198 199 return []
199 200
200 201 def _new_figure(self, data, fmt):
201 202 """Create a new figure file in the given format.
202 203
203 204 Returns a path relative to the input file.
204 205 """
205 206 figname = '%s_fig_%02i.%s' % (self.clean_name,
206 207 self.figures_counter, fmt)
207 208 self.figures_counter += 1
208 209 fullname = os.path.join(self.files_dir, figname)
209 210
210 211 # Binary files are base64-encoded, SVG is already XML
211 212 if fmt in ('png', 'jpg', 'pdf'):
212 213 data = data.decode('base64')
213 214 fopen = lambda fname: open(fname, 'wb')
214 215 else:
215 216 fopen = lambda fname: codecs.open(fname, 'wb',
216 217 self.default_encoding)
217 218
218 219 with fopen(fullname) as f:
219 220 f.write(data)
220 221
221 222 return fullname
222 223
223 224 def render_heading(self, cell):
224 225 """convert a heading cell
225 226
226 227 Returns list."""
227 228 raise NotImplementedError
228 229
229 230 def render_code(self, cell):
230 231 """Convert a code cell
231 232
232 233 Returns list."""
233 234 raise NotImplementedError
234 235
235 236 def render_markdown(self, cell):
236 237 """convert a markdown cell
237 238
238 239 Returns list."""
239 240 raise NotImplementedError
240 241
241 242 def _img_lines(self, img_file):
242 243 """Return list of lines to include an image file."""
243 244 # Note: subclasses may choose to implement format-specific _FMT_lines
244 245 # methods if they so choose (FMT in {png, svg, jpg, pdf}).
245 246 raise NotImplementedError
246 247
247 248 def render_display_data(self, output):
248 249 """convert display data from the output of a code cell
249 250
250 251 Returns list.
251 252 """
252 253 for fmt in self.display_data_priority:
253 254 if fmt in output:
254 255 break
255 256 else:
256 257 for fmt in output:
257 258 if fmt != 'output_type':
258 259 break
259 260 else:
260 261 raise RuntimeError('no display data')
261 262
262 263 # Is it an image?
263 264 if fmt in ['png', 'svg', 'jpg', 'pdf']:
264 265 img_file = self._new_figure(output[fmt], fmt)
265 266 # Subclasses can have format-specific render functions (e.g.,
266 267 # latex has to auto-convert all SVG to PDF first).
267 268 lines_fun = getattr(self, '_%s_lines' % fmt, None)
268 269 if not lines_fun:
269 270 lines_fun = self._img_lines
270 271 lines = lines_fun(img_file)
271 272 else:
272 273 lines_fun = self.dispatch_display_format(fmt)
273 274 lines = lines_fun(output)
274 275
275 276 return lines
276 277
277 278 def render_raw(self, cell):
278 279 """convert a cell with raw text
279 280
280 281 Returns list."""
281 282 raise NotImplementedError
282 283
283 284 def render_unknown(self, cell):
284 285 """Render cells of unkown type
285 286
286 287 Returns list."""
287 288 data = pprint.pformat(cell)
288 289 logging.warning('Unknown cell: %s' % cell.cell_type)
289 290 return self._unknown_lines(data)
290 291
291 292 def render_unknown_display(self, output, type):
292 293 """Render cells of unkown type
293 294
294 295 Returns list."""
295 296 data = pprint.pformat(output)
296 297 logging.warning('Unknown output: %s' % output.output_type)
297 298 return self._unknown_lines(data)
298 299
299 300 def render_stream(self, output):
300 301 """render the stream part of an output
301 302
302 303 Returns list.
303 304
304 305 Identical to render_display_format_text
305 306 """
306 307 return self.render_display_format_text(output)
307 308
308 309 def render_pyout(self, output):
309 310 """convert pyout part of a code cell
310 311
311 312 Returns list."""
312 313 raise NotImplementedError
313 314
314 315 def render_pyerr(self, output):
315 316 """convert pyerr part of a code cell
316 317
317 318 Returns list."""
318 319 raise NotImplementedError
319 320
320 321 def _unknown_lines(self, data):
321 322 """Return list of lines for an unknown cell.
322 323
323 324 Parameters
324 325 ----------
325 326 data : str
326 327 The content of the unknown data as a single string.
327 328 """
328 329 raise NotImplementedError
329 330
330 331 # These are the possible format types in an output node
331 332
332 333 def render_display_format_text(self, output):
333 334 """render the text part of an output
334 335
335 336 Returns list.
336 337 """
337 338 raise NotImplementedError
338 339
339 340 def render_display_format_html(self, output):
340 341 """render the html part of an output
341 342
342 343 Returns list.
343 344 """
344 345 raise NotImplementedError
345 346
346 347 def render_display_format_latex(self, output):
347 348 """render the latex part of an output
348 349
349 350 Returns list.
350 351 """
351 352 raise NotImplementedError
352 353
353 354 def render_display_format_json(self, output):
354 355 """render the json part of an output
355 356
356 357 Returns list.
357 358 """
358 359 raise NotImplementedError
359 360
360 361 def render_display_format_javascript(self, output):
361 362 """render the javascript part of an output
362 363
363 364 Returns list.
364 365 """
365 366 raise NotImplementedError
@@ -1,185 +1,185 b''
1 1 from __future__ import absolute_import
2 2
3 3 from converters.base import Converter
4 4 from converters.utils import text_cell, output_container
5 5 from converters.utils import highlight, coalesce_streams, ansi2html
6 6
7 7 from IPython.utils import path
8 8 from markdown import markdown
9 9 import os
10 10 import io
11 11
12 12
13 13 class ConverterHTML(Converter):
14 14 extension = 'html'
15 15 blank_symbol = ' '
16 16
17 17 def in_tag(self, tag, src, attrs=None):
18 18 """Return a list of elements bracketed by the given tag"""
19 19 attr_s = '' if attrs is None else \
20 20 ' '.join("%s=%s" % (attr, value)
21 21 for attr, value in attrs.iteritems())
22 22 return ['<%s %s>' % (tag, attr_s), src, '</%s>' % tag]
23 23
24 24 def _ansi_colored(self, text):
25 25 return ['<pre>%s</pre>' % ansi2html(text)]
26 26
27 27 def _stylesheet(self, fname):
28 28 with io.open(fname, encoding='utf-8') as f:
29 29 s = f.read()
30 30 return self.in_tag('style', s, dict(type='"text/css"'))
31 31
32 32 def _out_prompt(self, output):
33 33 if output.output_type == 'pyout':
34 34 content = 'Out[%s]:' % self._get_prompt_number(output)
35 35 else:
36 36 content = ''
37 37 return ['<div class="prompt output_prompt">%s</div>' % content]
38 38
39 39 def header_body(self):
40 40 """Return the body of the header as a list of strings."""
41 41
42 42 from pygments.formatters import HtmlFormatter
43 43
44 44 header = []
45 45 static = os.path.join(path.get_ipython_package_dir(),
46 46 'frontend', 'html', 'notebook', 'static',
47 47 )
48 48 here = os.path.split(os.path.realpath(__file__))[0]
49 49 css = os.path.join(static, 'css')
50 50 for sheet in [
51 51 # do we need jquery and prettify?
52 52 # os.path.join(static, 'jquery', 'css', 'themes', 'base',
53 53 # 'jquery-ui.min.css'),
54 54 # os.path.join(static, 'prettify', 'prettify.css'),
55 55 os.path.join(css, 'boilerplate.css'),
56 56 os.path.join(css, 'fbm.css'),
57 57 os.path.join(css, 'notebook.css'),
58 58 os.path.join(css, 'renderedhtml.css'),
59 59 # our overrides:
60 60 os.path.join(here, '..', 'css', 'static_html.css'),
61 61 ]:
62 62 header.extend(self._stylesheet(sheet))
63 63
64 64 # pygments css
65 65 pygments_css = HtmlFormatter().get_style_defs('.highlight')
66 66 header.extend(['<meta charset="UTF-8">'])
67 67 header.extend(self.in_tag('style', pygments_css,
68 68 dict(type='"text/css"')))
69 69
70 70 # TODO: this should be allowed to use local mathjax:
71 71 header.extend(self.in_tag('script', '', {'type': '"text/javascript"',
72 72 'src': '"https://c328740.ssl.cf1.rackcdn.com/mathjax/'
73 73 'latest/MathJax.js?config=TeX-AMS_HTML"',
74 74 }))
75 75 with io.open(os.path.join(here, '..', 'js', 'initmathjax.js'),
76 76 encoding='utf-8') as f:
77 77 header.extend(self.in_tag('script', f.read(),
78 78 {'type': '"text/javascript"'}))
79 79 return header
80 80
81 81 def optional_header(self):
82 82 return ['<html>', '<head>'] + self.header_body() + \
83 83 ['</head>', '<body>']
84 84
85 85 def optional_footer(self):
86 86 return ['</body>', '</html>']
87 87
88 88 @text_cell
89 89 def render_heading(self, cell):
90 90 marker = cell.level
91 91 return [u'<h{1}>\n {0}\n</h{1}>'.format(cell.source, marker)]
92 92
93 93 def render_code(self, cell):
94 94 if not cell.input:
95 95 return []
96 96
97 97 lines = ['<div class="cell border-box-sizing code_cell vbox">']
98 98
99 99 lines.append('<div class="input hbox">')
100 100 n = self._get_prompt_number(cell)
101 101 lines.append(
102 102 '<div class="prompt input_prompt">In&nbsp;[%s]:</div>' % n
103 103 )
104 104 lines.append('<div class="input_area box-flex1">')
105 lines.append(highlight(cell.input))
105 lines.append(highlight(cell.input) if self.highlight else cell.input)
106 106 lines.append('</div>') # input_area
107 107 lines.append('</div>') # input
108 108
109 109 if cell.outputs:
110 110 lines.append('<div class="vbox output_wrapper">')
111 111 lines.append('<div class="output vbox">')
112 112
113 113 for output in coalesce_streams(cell.outputs):
114 114 conv_fn = self.dispatch(output.output_type)
115 115 lines.extend(conv_fn(output))
116 116
117 117 lines.append('</div>') # output
118 118 lines.append('</div>') # output_wrapper
119 119
120 120 lines.append('</div>') # cell
121 121
122 122 return lines
123 123
124 124 @text_cell
125 125 def render_markdown(self, cell):
126 126 return [markdown(cell.source)]
127 127
128 128 def render_raw(self, cell):
129 129 if self.raw_as_verbatim:
130 130 return self.in_tag('pre', cell.source)
131 131 else:
132 132 return [cell.source]
133 133
134 134 @output_container
135 135 def render_pyout(self, output):
136 136 for fmt in ['html', 'latex', 'png', 'jpeg', 'svg', 'text']:
137 137 if fmt in output:
138 138 conv_fn = self.dispatch_display_format(fmt)
139 139 return conv_fn(output)
140 140 return []
141 141
142 142 render_display_data = render_pyout
143 143
144 144 @output_container
145 145 def render_stream(self, output):
146 146 return self._ansi_colored(output.text)
147 147
148 148 @output_container
149 149 def render_pyerr(self, output):
150 150 # Note: a traceback is a *list* of frames.
151 151 # lines = []
152 152
153 153 # stb =
154 154 return self._ansi_colored('\n'.join(output.traceback))
155 155
156 156 def _img_lines(self, img_file):
157 157 return ['<img src="%s">' % img_file, '</img>']
158 158
159 159 def _unknown_lines(self, data):
160 160 return ['<h2>Warning:: Unknown cell</h2>'] + self.in_tag('pre', data)
161 161
162 162 def render_display_format_png(self, output):
163 163 return ['<img src="data:image/png;base64,%s"></img>' % output.png]
164 164
165 165 def render_display_format_svg(self, output):
166 166 return [output.svg]
167 167
168 168 def render_display_format_jpeg(self, output):
169 169 return ['<img src="data:image/jpeg;base64,%s"></img>' % output.jpeg]
170 170
171 171 def render_display_format_text(self, output):
172 172 return self._ansi_colored(output.text)
173 173
174 174 def render_display_format_html(self, output):
175 175 return [output.html]
176 176
177 177 def render_display_format_latex(self, output):
178 178 return [output.latex]
179 179
180 180 def render_display_format_json(self, output):
181 181 # html ignores json
182 182 return []
183 183
184 184 def render_display_format_javascript(self, output):
185 185 return [output.javascript]
@@ -1,85 +1,86 b''
1 1 #!/usr/bin/env python
2 2 """Convert IPython notebooks to other formats, such as ReST, and HTML.
3 3
4 4 Example:
5 5 ./nbconvert.py --format rst file.ipynb
6 6
7 7 Produces 'file.rst', along with auto-generated figure files
8 8 called nb_figure_NN.png.
9 9 """
10 10 #-----------------------------------------------------------------------------
11 11 # Imports
12 12 #-----------------------------------------------------------------------------
13 13 from __future__ import print_function
14 14
15 15 # From IPython
16 16 from IPython.external import argparse
17 17
18 18 # local
19 19 from converters.html import ConverterHTML
20 20 from converters.markdown import ConverterMarkdown
21 21 from converters.bloggerhtml import ConverterBloggerHTML
22 22 from converters.rst import ConverterRST
23 23 from converters.latex import ConverterLaTeX
24 24 from converters.python import ConverterPy
25 25
26 26
27 27 # When adding a new format, make sure to add it to the `converters`
28 28 # dictionary below. This is used to create the list of known formats,
29 29 # which gets printed in case an unknown format is encounteres, as well
30 30 # as in the help
31 31
32 32 converters = {
33 33 'rst': ConverterRST,
34 34 'markdown': ConverterMarkdown,
35 35 'html': ConverterHTML,
36 36 'blogger-html': ConverterBloggerHTML,
37 37 'latex': ConverterLaTeX,
38 38 'py': ConverterPy,
39 39 }
40 40
41 41 default_format = 'rst'
42 42
43 43 # Extract the list of known formats and mark the first format as the default.
44 44 known_formats = ', '.join([key + " (default)" if key == default_format else key
45 45 for key in converters])
46 46
47 47
48 def main(infile, format='rst', preamble=None, exclude=None):
48 def main(infile, highlight, format='rst', preamble=None, exclude=None):
49 49 """Convert a notebook to html in one step"""
50 50 try:
51 51 ConverterClass = converters[format]
52 52 except KeyError:
53 53 raise SystemExit("Unknown format '%s', " % format +
54 54 "known formats are: " + known_formats)
55 55
56 converter = ConverterClass(infile)
56 converter = ConverterClass(infile, highlight)
57 57 converter.render()
58 58
59 59 #-----------------------------------------------------------------------------
60 60 # Script main
61 61 #-----------------------------------------------------------------------------
62 62
63 63 if __name__ == '__main__':
64 64 parser = argparse.ArgumentParser(description=__doc__,
65 65 formatter_class=argparse.RawTextHelpFormatter)
66 66 # TODO: consider passing file like object around, rather than filenames
67 67 # would allow us to process stdin, or even http streams
68 68 #parser.add_argument('infile', nargs='?', type=argparse.FileType('r'),
69 69 # default=sys.stdin)
70 70
71 71 #Require a filename as a positional argument
72 72 parser.add_argument('infile', nargs=1)
73 73 parser.add_argument('-f', '--format', default='rst',
74 74 help='Output format. Supported formats: \n' +
75 75 known_formats)
76 76 parser.add_argument('-p', '--preamble',
77 77 help='Path to a user-specified preamble file')
78 78 parser.add_argument('-e', '--exclude', default='',
79 79 help='Comma-separated list of cells to exclude')
80
80 parser.add_argument('-p', '--plain_output', action='store_false',
81 help='Plain output which will contain no syntax highlighting.')
81 82 args = parser.parse_args()
82 83 exclude_cells = [s.strip() for s in args.exclude.split(',')]
83 84
84 main(infile=args.infile[0], format=args.format,
85 preamble=args.preamble, exclude=exclude_cells)
85 main(infile=args.infile[0], highlight=args.plain_output,
86 format=args.format, preamble=args.preamble, exclude=exclude_cells)
General Comments 0
You need to be logged in to leave comments. Login now