utils.py
366 lines
| 10.5 KiB
| text/x-python
|
PythonLexer
/ converters / utils.py
David Warde-Farley
|
r8789 | """A one-line description. | ||
A longer description that spans multiple lines. Explain the purpose of the | ||||
file and provide a short list of the key classes/functions it contains. This | ||||
is the docstring shown when some does 'import foo;foo?' in IPython, so it | ||||
should be reasonably useful and informative. | ||||
""" | ||||
#----------------------------------------------------------------------------- | ||||
# Copyright (c) 2012, the IPython Development Team. | ||||
# | ||||
# Distributed under the terms of the Modified BSD License. | ||||
# | ||||
# The full license is in the file COPYING.txt, distributed with this software. | ||||
#----------------------------------------------------------------------------- | ||||
#----------------------------------------------------------------------------- | ||||
# Imports | ||||
#----------------------------------------------------------------------------- | ||||
Matthias BUSSONNIER
|
r8618 | from __future__ import print_function | ||
Matthias BUSSONNIER
|
r8623 | |||
David Warde-Farley
|
r8789 | # Stdlib imports | ||
Matthias BUSSONNIER
|
r8618 | import subprocess | ||
Matthias BUSSONNIER
|
r8623 | import copy | ||
import json | ||||
Matthias BUSSONNIER
|
r8618 | import re | ||
Matthias BUSSONNIER
|
r8629 | import os | ||
import sys | ||||
Matthias BUSSONNIER
|
r8623 | |||
David Warde-Farley
|
r8789 | # IPython imports | ||
Matthias BUSSONNIER
|
r8620 | from IPython.utils.text import indent | ||
David Warde-Farley
|
r8748 | from IPython.utils import py3compat | ||
Matthias BUSSONNIER
|
r8623 | from IPython.nbformat.v3.nbjson import BytesEncoder | ||
Matthias BUSSONNIER
|
r8618 | |||
David Warde-Farley
|
r8789 | # Our own imports | ||
from lexers import IPythonLexer | ||||
#----------------------------------------------------------------------------- | ||||
# Globals and constants | ||||
#----------------------------------------------------------------------------- | ||||
_multiline_outputs = ['text', 'html', 'svg', 'latex', 'javascript', 'json'] | ||||
David Warde-Farley
|
r8747 | |||
Matthias BUSSONNIER
|
r8618 | #----------------------------------------------------------------------------- | ||
# Utility functions | ||||
#----------------------------------------------------------------------------- | ||||
Matthias BUSSONNIER
|
r8620 | def highlight(src, lang='ipython'): | ||
David Warde-Farley
|
r8747 | """ | ||
Return a syntax-highlighted version of the input source. | ||||
Matthias BUSSONNIER
|
r8620 | """ | ||
from pygments import highlight | ||||
from pygments.lexers import get_lexer_by_name | ||||
from pygments.formatters import HtmlFormatter | ||||
David Warde-Farley
|
r8747 | |||
Matthias BUSSONNIER
|
r8620 | if lang == 'ipython': | ||
lexer = IPythonLexer() | ||||
else: | ||||
lexer = get_lexer_by_name(lang, stripall=True) | ||||
David Warde-Farley
|
r8747 | |||
Matthias BUSSONNIER
|
r8620 | return highlight(src, lexer, HtmlFormatter()) | ||
David Warde-Farley
|
r8747 | |||
Matthias BUSSONNIER
|
r8618 | def output_container(f): | ||
"""add a prompt-area next to an output""" | ||||
def wrapped(self, output): | ||||
rendered = f(self, output) | ||||
if not rendered: | ||||
# empty output | ||||
return [] | ||||
lines = [] | ||||
lines.append('<div class="hbox output_area">') | ||||
lines.extend(self._out_prompt(output)) | ||||
classes = "output_subarea output_%s" % output.output_type | ||||
if 'html' in output.keys(): | ||||
David Warde-Farley
|
r8747 | classes += ' output_html rendered_html' | ||
Matthias BUSSONNIER
|
r8618 | if output.output_type == 'stream': | ||
classes += " output_%s" % output.stream | ||||
lines.append('<div class="%s">' % classes) | ||||
lines.extend(rendered) | ||||
David Warde-Farley
|
r8747 | lines.append('</div>') # subarea | ||
lines.append('</div>') # output_area | ||||
Matthias BUSSONNIER
|
r8618 | return lines | ||
David Warde-Farley
|
r8747 | |||
Matthias BUSSONNIER
|
r8618 | return wrapped | ||
David Warde-Farley
|
r8747 | |||
Matthias BUSSONNIER
|
r8618 | def text_cell(f): | ||
"""wrap text cells in appropriate divs""" | ||||
def wrapped(self, cell): | ||||
rendered = f(self, cell) | ||||
classes = "text_cell_render border-box-sizing rendered_html" | ||||
lines = ['<div class="%s">' % classes] + rendered + ['</div>'] | ||||
return lines | ||||
return wrapped | ||||
def remove_fake_files_url(cell): | ||||
"""Remove from the cell source the /files/ pseudo-path we use. | ||||
""" | ||||
src = cell.source | ||||
cell.source = src.replace('/files/', '') | ||||
# ANSI color functions: | ||||
def remove_ansi(src): | ||||
"""Strip all ANSI color escape sequences from input string. | ||||
Parameters | ||||
---------- | ||||
src : string | ||||
Returns | ||||
------- | ||||
string | ||||
""" | ||||
return re.sub(r'\033\[(0|\d;\d\d)m', '', src) | ||||
def ansi2html(txt): | ||||
"""Render ANSI colors as HTML colors | ||||
David Warde-Farley
|
r8747 | |||
Matthias BUSSONNIER
|
r8618 | This is equivalent to util.fixConsole in utils.js | ||
David Warde-Farley
|
r8747 | |||
Matthias BUSSONNIER
|
r8618 | Parameters | ||
---------- | ||||
txt : string | ||||
Returns | ||||
------- | ||||
string | ||||
""" | ||||
David Warde-Farley
|
r8747 | |||
Matthias BUSSONNIER
|
r8618 | ansi_colormap = { | ||
'30': 'ansiblack', | ||||
'31': 'ansired', | ||||
'32': 'ansigreen', | ||||
'33': 'ansiyellow', | ||||
'34': 'ansiblue', | ||||
'35': 'ansipurple', | ||||
'36': 'ansicyan', | ||||
'37': 'ansigrey', | ||||
'01': 'ansibold', | ||||
} | ||||
David Warde-Farley
|
r8747 | |||
Matthias BUSSONNIER
|
r8618 | # do ampersand first | ||
txt = txt.replace('&', '&') | ||||
html_escapes = { | ||||
'<': '<', | ||||
'>': '>', | ||||
"'": ''', | ||||
'"': '"', | ||||
'`': '`', | ||||
} | ||||
for c, escape in html_escapes.iteritems(): | ||||
txt = txt.replace(c, escape) | ||||
David Warde-Farley
|
r8747 | |||
Matthias BUSSONNIER
|
r8618 | ansi_re = re.compile('\x1b' + r'\[([\dA-Fa-f;]*?)m') | ||
m = ansi_re.search(txt) | ||||
opened = False | ||||
cmds = [] | ||||
opener = '' | ||||
closer = '' | ||||
while m: | ||||
cmds = m.groups()[0].split(';') | ||||
closer = '</span>' if opened else '' | ||||
David Warde-Farley
|
r8747 | # True if there is there more than one element in cmds, *or* | ||
# if there is only one but it is not equal to a string of zeroes. | ||||
opened = len(cmds) > 1 or cmds[0] != '0' * len(cmds[0]) | ||||
Matthias BUSSONNIER
|
r8618 | classes = [] | ||
for cmd in cmds: | ||||
if cmd in ansi_colormap: | ||||
classes.append(ansi_colormap.get(cmd)) | ||||
David Warde-Farley
|
r8747 | |||
Matthias BUSSONNIER
|
r8618 | if classes: | ||
opener = '<span class="%s">' % (' '.join(classes)) | ||||
else: | ||||
opener = '' | ||||
txt = re.sub(ansi_re, closer + opener, txt, 1) | ||||
David Warde-Farley
|
r8747 | |||
Matthias BUSSONNIER
|
r8618 | m = ansi_re.search(txt) | ||
David Warde-Farley
|
r8747 | |||
Matthias BUSSONNIER
|
r8618 | if opened: | ||
txt += '</span>' | ||||
return txt | ||||
# Pandoc-dependent code | ||||
def markdown2latex(src): | ||||
"""Convert a markdown string to LaTeX via pandoc. | ||||
This function will raise an error if pandoc is not installed. | ||||
Any error messages generated by pandoc are printed to stderr. | ||||
Parameters | ||||
---------- | ||||
src : string | ||||
Input string, assumed to be valid markdown. | ||||
Returns | ||||
------- | ||||
out : string | ||||
Output as returned by pandoc. | ||||
""" | ||||
p = subprocess.Popen('pandoc -f markdown -t latex'.split(), | ||||
stdin=subprocess.PIPE, stdout=subprocess.PIPE) | ||||
out, err = p.communicate(src.encode('utf-8')) | ||||
if err: | ||||
print(err, file=sys.stderr) | ||||
#print('*'*20+'\n', out, '\n'+'*'*20) # dbg | ||||
David Warde-Farley
|
r8747 | return unicode(out, 'utf-8') | ||
Matthias BUSSONNIER
|
r8618 | |||
def markdown2rst(src): | ||||
"""Convert a markdown string to LaTeX via pandoc. | ||||
This function will raise an error if pandoc is not installed. | ||||
Any error messages generated by pandoc are printed to stderr. | ||||
Parameters | ||||
---------- | ||||
src : string | ||||
Input string, assumed to be valid markdown. | ||||
Returns | ||||
------- | ||||
out : string | ||||
Output as returned by pandoc. | ||||
""" | ||||
p = subprocess.Popen('pandoc -f markdown -t rst'.split(), | ||||
stdin=subprocess.PIPE, stdout=subprocess.PIPE) | ||||
out, err = p.communicate(src.encode('utf-8')) | ||||
if err: | ||||
print(err, file=sys.stderr) | ||||
#print('*'*20+'\n', out, '\n'+'*'*20) # dbg | ||||
David Warde-Farley
|
r8747 | return unicode(out, 'utf-8') | ||
Matthias BUSSONNIER
|
r8618 | |||
def rst_directive(directive, text=''): | ||||
""" | ||||
Makes ReST directive block and indents any text passed to it. | ||||
""" | ||||
out = [directive, ''] | ||||
if text: | ||||
out.extend([indent(text), '']) | ||||
return out | ||||
def coalesce_streams(outputs): | ||||
"""merge consecutive sequences of stream output into single stream | ||||
David Warde-Farley
|
r8747 | |||
Matthias BUSSONNIER
|
r8618 | to prevent extra newlines inserted at flush calls | ||
David Warde-Farley
|
r8747 | |||
Matthias BUSSONNIER
|
r8618 | TODO: handle \r deletion | ||
""" | ||||
new_outputs = [] | ||||
last = outputs[0] | ||||
new_outputs = [last] | ||||
for output in outputs[1:]: | ||||
if (output.output_type == 'stream' and | ||||
last.output_type == 'stream' and | ||||
last.stream == output.stream | ||||
): | ||||
last.text += output.text | ||||
else: | ||||
new_outputs.append(output) | ||||
David Warde-Farley
|
r8747 | |||
Matthias BUSSONNIER
|
r8618 | return new_outputs | ||
def rst2simplehtml(infile): | ||||
"""Convert a rst file to simplified html suitable for blogger. | ||||
This just runs rst2html with certain parameters to produce really simple | ||||
html and strips the document header, so the resulting file can be easily | ||||
pasted into a blogger edit window. | ||||
""" | ||||
# This is the template for the rst2html call that produces the cleanest, | ||||
# simplest html I could find. This should help in making it easier to | ||||
# paste into the blogspot html window, though I'm still having problems | ||||
# with linebreaks there... | ||||
cmd_template = ("rst2html --link-stylesheet --no-xml-declaration " | ||||
"--no-generator --no-datestamp --no-source-link " | ||||
"--no-toc-backlinks --no-section-numbering " | ||||
"--strip-comments ") | ||||
cmd = "%s %s" % (cmd_template, infile) | ||||
proc = subprocess.Popen(cmd, | ||||
stdout=subprocess.PIPE, | ||||
stderr=subprocess.PIPE, | ||||
shell=True) | ||||
html, stderr = proc.communicate() | ||||
if stderr: | ||||
raise IOError(stderr) | ||||
# Make an iterator so breaking out holds state. Our implementation of | ||||
# searching for the html body below is basically a trivial little state | ||||
# machine, so we need this. | ||||
walker = iter(html.splitlines()) | ||||
# Find start of main text, break out to then print until we find end /div. | ||||
David Warde-Farley
|
r8747 | # This may only work if there's a real title defined so we get a 'div | ||
# class' tag, I haven't really tried. | ||||
Matthias BUSSONNIER
|
r8618 | for line in walker: | ||
if line.startswith('<body>'): | ||||
break | ||||
newfname = os.path.splitext(infile)[0] + '.html' | ||||
with open(newfname, 'w') as f: | ||||
for line in walker: | ||||
if line.startswith('</body>'): | ||||
break | ||||
f.write(line) | ||||
f.write('\n') | ||||
return newfname | ||||
David Warde-Farley
|
r8747 | |||
Matthias BUSSONNIER
|
r8618 | #----------------------------------------------------------------------------- | ||
# Cell-level functions -- similar to IPython.nbformat.v3.rwbase functions | ||||
# but at cell level instead of whole notebook level | ||||
#----------------------------------------------------------------------------- | ||||
def writes_cell(cell, **kwargs): | ||||
kwargs['cls'] = BytesEncoder | ||||
kwargs['indent'] = 3 | ||||
kwargs['sort_keys'] = True | ||||
David Warde-Farley
|
r8747 | kwargs['separators'] = (',', ': ') | ||
Matthias BUSSONNIER
|
r8618 | if kwargs.pop('split_lines', True): | ||
cell = split_lines_cell(copy.deepcopy(cell)) | ||||
return py3compat.str_to_unicode(json.dumps(cell, **kwargs), 'utf-8') | ||||
def split_lines_cell(cell): | ||||
""" | ||||
David Warde-Farley
|
r8747 | Split lines within a cell as in | ||
Matthias BUSSONNIER
|
r8618 | IPython.nbformat.v3.rwbase.split_lines | ||
""" | ||||
if cell.cell_type == 'code': | ||||
if 'input' in cell and isinstance(cell.input, basestring): | ||||
cell.input = (cell.input + '\n').splitlines() | ||||
for output in cell.outputs: | ||||
for key in _multiline_outputs: | ||||
item = output.get(key, None) | ||||
if isinstance(item, basestring): | ||||
output[key] = (item + '\n').splitlines() | ||||
David Warde-Farley
|
r8747 | else: # text, heading cell | ||
Matthias BUSSONNIER
|
r8618 | for key in ['source', 'rendered']: | ||
item = cell.get(key, None) | ||||
if isinstance(item, basestring): | ||||
cell[key] = (item + '\n').splitlines() | ||||
return cell | ||||
def cell_to_lines(cell): | ||||
''' | ||||
Write a cell to json, returning the split lines. | ||||
''' | ||||
split_lines_cell(cell) | ||||
s = writes_cell(cell).strip() | ||||
return s.split('\n') | ||||