##// END OF EJS Templates
add possibility to preprocess ipynb files
add possibility to preprocess ipynb files

File last commit:

r8789:e0bdc672
r9184:b5e3545c
Show More
utils.py
366 lines | 10.5 KiB | text/x-python | PythonLexer
David Warde-Farley
Introduce standard structure from coding guidelines in converters/.
r8789 """A one-line description.
A longer description that spans multiple lines. Explain the purpose of the
file and provide a short list of the key classes/functions it contains. This
is the docstring shown when some does 'import foo;foo?' in IPython, so it
should be reasonably useful and informative.
"""
#-----------------------------------------------------------------------------
# Copyright (c) 2012, the IPython Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Imports
#-----------------------------------------------------------------------------
Matthias BUSSONNIER
latex working
r8618 from __future__ import print_function
Matthias BUSSONNIER
more test fixed
r8623
David Warde-Farley
Introduce standard structure from coding guidelines in converters/.
r8789 # Stdlib imports
Matthias BUSSONNIER
latex working
r8618 import subprocess
Matthias BUSSONNIER
more test fixed
r8623 import copy
import json
Matthias BUSSONNIER
latex working
r8618 import re
Matthias BUSSONNIER
add missing imports
r8629 import os
import sys
Matthias BUSSONNIER
more test fixed
r8623
David Warde-Farley
Introduce standard structure from coding guidelines in converters/.
r8789 # IPython imports
Matthias BUSSONNIER
all seem to convert again
r8620 from IPython.utils.text import indent
David Warde-Farley
Unused imports.
r8748 from IPython.utils import py3compat
Matthias BUSSONNIER
more test fixed
r8623 from IPython.nbformat.v3.nbjson import BytesEncoder
Matthias BUSSONNIER
latex working
r8618
David Warde-Farley
Introduce standard structure from coding guidelines in converters/.
r8789 # Our own imports
from lexers import IPythonLexer
#-----------------------------------------------------------------------------
# Globals and constants
#-----------------------------------------------------------------------------
_multiline_outputs = ['text', 'html', 'svg', 'latex', 'javascript', 'json']
David Warde-Farley
PEP8-ify several files
r8747
Matthias BUSSONNIER
latex working
r8618 #-----------------------------------------------------------------------------
# Utility functions
#-----------------------------------------------------------------------------
Matthias BUSSONNIER
all seem to convert again
r8620 def highlight(src, lang='ipython'):
David Warde-Farley
PEP8-ify several files
r8747 """
Return a syntax-highlighted version of the input source.
Matthias BUSSONNIER
all seem to convert again
r8620 """
from pygments import highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters import HtmlFormatter
David Warde-Farley
PEP8-ify several files
r8747
Matthias BUSSONNIER
all seem to convert again
r8620 if lang == 'ipython':
lexer = IPythonLexer()
else:
lexer = get_lexer_by_name(lang, stripall=True)
David Warde-Farley
PEP8-ify several files
r8747
Matthias BUSSONNIER
all seem to convert again
r8620 return highlight(src, lexer, HtmlFormatter())
David Warde-Farley
PEP8-ify several files
r8747
Matthias BUSSONNIER
latex working
r8618 def output_container(f):
"""add a prompt-area next to an output"""
def wrapped(self, output):
rendered = f(self, output)
if not rendered:
# empty output
return []
lines = []
lines.append('<div class="hbox output_area">')
lines.extend(self._out_prompt(output))
classes = "output_subarea output_%s" % output.output_type
if 'html' in output.keys():
David Warde-Farley
PEP8-ify several files
r8747 classes += ' output_html rendered_html'
Matthias BUSSONNIER
latex working
r8618 if output.output_type == 'stream':
classes += " output_%s" % output.stream
lines.append('<div class="%s">' % classes)
lines.extend(rendered)
David Warde-Farley
PEP8-ify several files
r8747 lines.append('</div>') # subarea
lines.append('</div>') # output_area
Matthias BUSSONNIER
latex working
r8618 return lines
David Warde-Farley
PEP8-ify several files
r8747
Matthias BUSSONNIER
latex working
r8618 return wrapped
David Warde-Farley
PEP8-ify several files
r8747
Matthias BUSSONNIER
latex working
r8618 def text_cell(f):
"""wrap text cells in appropriate divs"""
def wrapped(self, cell):
rendered = f(self, cell)
classes = "text_cell_render border-box-sizing rendered_html"
lines = ['<div class="%s">' % classes] + rendered + ['</div>']
return lines
return wrapped
def remove_fake_files_url(cell):
"""Remove from the cell source the /files/ pseudo-path we use.
"""
src = cell.source
cell.source = src.replace('/files/', '')
# ANSI color functions:
def remove_ansi(src):
"""Strip all ANSI color escape sequences from input string.
Parameters
----------
src : string
Returns
-------
string
"""
return re.sub(r'\033\[(0|\d;\d\d)m', '', src)
def ansi2html(txt):
"""Render ANSI colors as HTML colors
David Warde-Farley
PEP8-ify several files
r8747
Matthias BUSSONNIER
latex working
r8618 This is equivalent to util.fixConsole in utils.js
David Warde-Farley
PEP8-ify several files
r8747
Matthias BUSSONNIER
latex working
r8618 Parameters
----------
txt : string
Returns
-------
string
"""
David Warde-Farley
PEP8-ify several files
r8747
Matthias BUSSONNIER
latex working
r8618 ansi_colormap = {
'30': 'ansiblack',
'31': 'ansired',
'32': 'ansigreen',
'33': 'ansiyellow',
'34': 'ansiblue',
'35': 'ansipurple',
'36': 'ansicyan',
'37': 'ansigrey',
'01': 'ansibold',
}
David Warde-Farley
PEP8-ify several files
r8747
Matthias BUSSONNIER
latex working
r8618 # do ampersand first
txt = txt.replace('&', '&amp;')
html_escapes = {
'<': '&lt;',
'>': '&gt;',
"'": '&apos;',
'"': '&quot;',
'`': '&#96;',
}
for c, escape in html_escapes.iteritems():
txt = txt.replace(c, escape)
David Warde-Farley
PEP8-ify several files
r8747
Matthias BUSSONNIER
latex working
r8618 ansi_re = re.compile('\x1b' + r'\[([\dA-Fa-f;]*?)m')
m = ansi_re.search(txt)
opened = False
cmds = []
opener = ''
closer = ''
while m:
cmds = m.groups()[0].split(';')
closer = '</span>' if opened else ''
David Warde-Farley
PEP8-ify several files
r8747 # True if there is there more than one element in cmds, *or*
# if there is only one but it is not equal to a string of zeroes.
opened = len(cmds) > 1 or cmds[0] != '0' * len(cmds[0])
Matthias BUSSONNIER
latex working
r8618 classes = []
for cmd in cmds:
if cmd in ansi_colormap:
classes.append(ansi_colormap.get(cmd))
David Warde-Farley
PEP8-ify several files
r8747
Matthias BUSSONNIER
latex working
r8618 if classes:
opener = '<span class="%s">' % (' '.join(classes))
else:
opener = ''
txt = re.sub(ansi_re, closer + opener, txt, 1)
David Warde-Farley
PEP8-ify several files
r8747
Matthias BUSSONNIER
latex working
r8618 m = ansi_re.search(txt)
David Warde-Farley
PEP8-ify several files
r8747
Matthias BUSSONNIER
latex working
r8618 if opened:
txt += '</span>'
return txt
# Pandoc-dependent code
def markdown2latex(src):
"""Convert a markdown string to LaTeX via pandoc.
This function will raise an error if pandoc is not installed.
Any error messages generated by pandoc are printed to stderr.
Parameters
----------
src : string
Input string, assumed to be valid markdown.
Returns
-------
out : string
Output as returned by pandoc.
"""
p = subprocess.Popen('pandoc -f markdown -t latex'.split(),
stdin=subprocess.PIPE, stdout=subprocess.PIPE)
out, err = p.communicate(src.encode('utf-8'))
if err:
print(err, file=sys.stderr)
#print('*'*20+'\n', out, '\n'+'*'*20) # dbg
David Warde-Farley
PEP8-ify several files
r8747 return unicode(out, 'utf-8')
Matthias BUSSONNIER
latex working
r8618
def markdown2rst(src):
"""Convert a markdown string to LaTeX via pandoc.
This function will raise an error if pandoc is not installed.
Any error messages generated by pandoc are printed to stderr.
Parameters
----------
src : string
Input string, assumed to be valid markdown.
Returns
-------
out : string
Output as returned by pandoc.
"""
p = subprocess.Popen('pandoc -f markdown -t rst'.split(),
stdin=subprocess.PIPE, stdout=subprocess.PIPE)
out, err = p.communicate(src.encode('utf-8'))
if err:
print(err, file=sys.stderr)
#print('*'*20+'\n', out, '\n'+'*'*20) # dbg
David Warde-Farley
PEP8-ify several files
r8747 return unicode(out, 'utf-8')
Matthias BUSSONNIER
latex working
r8618
def rst_directive(directive, text=''):
"""
Makes ReST directive block and indents any text passed to it.
"""
out = [directive, '']
if text:
out.extend([indent(text), ''])
return out
def coalesce_streams(outputs):
"""merge consecutive sequences of stream output into single stream
David Warde-Farley
PEP8-ify several files
r8747
Matthias BUSSONNIER
latex working
r8618 to prevent extra newlines inserted at flush calls
David Warde-Farley
PEP8-ify several files
r8747
Matthias BUSSONNIER
latex working
r8618 TODO: handle \r deletion
"""
new_outputs = []
last = outputs[0]
new_outputs = [last]
for output in outputs[1:]:
if (output.output_type == 'stream' and
last.output_type == 'stream' and
last.stream == output.stream
):
last.text += output.text
else:
new_outputs.append(output)
David Warde-Farley
PEP8-ify several files
r8747
Matthias BUSSONNIER
latex working
r8618 return new_outputs
def rst2simplehtml(infile):
"""Convert a rst file to simplified html suitable for blogger.
This just runs rst2html with certain parameters to produce really simple
html and strips the document header, so the resulting file can be easily
pasted into a blogger edit window.
"""
# This is the template for the rst2html call that produces the cleanest,
# simplest html I could find. This should help in making it easier to
# paste into the blogspot html window, though I'm still having problems
# with linebreaks there...
cmd_template = ("rst2html --link-stylesheet --no-xml-declaration "
"--no-generator --no-datestamp --no-source-link "
"--no-toc-backlinks --no-section-numbering "
"--strip-comments ")
cmd = "%s %s" % (cmd_template, infile)
proc = subprocess.Popen(cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=True)
html, stderr = proc.communicate()
if stderr:
raise IOError(stderr)
# Make an iterator so breaking out holds state. Our implementation of
# searching for the html body below is basically a trivial little state
# machine, so we need this.
walker = iter(html.splitlines())
# Find start of main text, break out to then print until we find end /div.
David Warde-Farley
PEP8-ify several files
r8747 # This may only work if there's a real title defined so we get a 'div
# class' tag, I haven't really tried.
Matthias BUSSONNIER
latex working
r8618 for line in walker:
if line.startswith('<body>'):
break
newfname = os.path.splitext(infile)[0] + '.html'
with open(newfname, 'w') as f:
for line in walker:
if line.startswith('</body>'):
break
f.write(line)
f.write('\n')
return newfname
David Warde-Farley
PEP8-ify several files
r8747
Matthias BUSSONNIER
latex working
r8618 #-----------------------------------------------------------------------------
# Cell-level functions -- similar to IPython.nbformat.v3.rwbase functions
# but at cell level instead of whole notebook level
#-----------------------------------------------------------------------------
def writes_cell(cell, **kwargs):
kwargs['cls'] = BytesEncoder
kwargs['indent'] = 3
kwargs['sort_keys'] = True
David Warde-Farley
PEP8-ify several files
r8747 kwargs['separators'] = (',', ': ')
Matthias BUSSONNIER
latex working
r8618 if kwargs.pop('split_lines', True):
cell = split_lines_cell(copy.deepcopy(cell))
return py3compat.str_to_unicode(json.dumps(cell, **kwargs), 'utf-8')
def split_lines_cell(cell):
"""
David Warde-Farley
PEP8-ify several files
r8747 Split lines within a cell as in
Matthias BUSSONNIER
latex working
r8618 IPython.nbformat.v3.rwbase.split_lines
"""
if cell.cell_type == 'code':
if 'input' in cell and isinstance(cell.input, basestring):
cell.input = (cell.input + '\n').splitlines()
for output in cell.outputs:
for key in _multiline_outputs:
item = output.get(key, None)
if isinstance(item, basestring):
output[key] = (item + '\n').splitlines()
David Warde-Farley
PEP8-ify several files
r8747 else: # text, heading cell
Matthias BUSSONNIER
latex working
r8618 for key in ['source', 'rendered']:
item = cell.get(key, None)
if isinstance(item, basestring):
cell[key] = (item + '\n').splitlines()
return cell
def cell_to_lines(cell):
'''
Write a cell to json, returning the split lines.
'''
split_lines_cell(cell)
s = writes_cell(cell).strip()
return s.split('\n')