strings.py
220 lines
| 5.8 KiB
| text/x-python
|
PythonLexer
MinRK
|
r11302 | # coding: utf-8 | |
Jonathan Frederic
|
r10676 | """String filters. | |
Jonathan Frederic
|
r10433 | ||
Jonathan Frederic
|
r10676 | Contains a collection of useful string manipulation filters for use in Jinja | |
templates. | |||
Jonathan Frederic
|
r10433 | """ | |
#----------------------------------------------------------------------------- | |||
# Copyright (c) 2013, the IPython Development Team. | |||
# | |||
# Distributed under the terms of the Modified BSD License. | |||
# | |||
# The full license is in the file COPYING.txt, distributed with this software. | |||
#----------------------------------------------------------------------------- | |||
#----------------------------------------------------------------------------- | |||
# Imports | |||
#----------------------------------------------------------------------------- | |||
MinRK
|
r11972 | import os | |
MinRK
|
r11202 | import re | |
Jonathan Frederic
|
r10676 | import textwrap | |
Thomas Kluyver
|
r13397 | try: | |
from urllib.parse import quote # Py 3 | |||
except ImportError: | |||
from urllib2 import quote # Py 2 | |||
MinRK
|
r11302 | from xml.etree import ElementTree | |
MinRK
|
r11711 | ||
from IPython.core.interactiveshell import InteractiveShell | |||
MinRK
|
r11302 | from IPython.utils import py3compat | |
Brian E. Granger
|
r11089 | ||
Jonathan Frederic
|
r10433 | #----------------------------------------------------------------------------- | |
# Functions | |||
#----------------------------------------------------------------------------- | |||
Jonathan Frederic
|
r10676 | ||
Brian E. Granger
|
r11088 | __all__ = [ | |
Jonathan Frederic
|
r11685 | 'wrap_text', | |
'html2text', | |||
MinRK
|
r11302 | 'add_anchor', | |
Brian E. Granger
|
r11088 | 'strip_dollars', | |
Jonathan Frederic
|
r11685 | 'strip_files_prefix', | |
'comment_lines', | |||
MinRK
|
r11711 | 'get_lines', | |
'ipython2python', | |||
MinRK
|
r11972 | 'posix_path', | |
MinRK
|
r12451 | 'path2url', | |
MinRK
|
r15434 | 'add_prompts', | |
'ascii_only', | |||
Brian E. Granger
|
r11088 | ] | |
Jonathan Frederic
|
r11685 | def wrap_text(text, width=100): | |
Jonathan Frederic
|
r10676 | """ | |
Intelligently wrap text. | |||
Wrap text without breaking words if possible. | |||
Parameters | |||
---------- | |||
text : str | |||
Text to wrap. | |||
width : int, optional | |||
Number of characters to wrap to, default 100. | |||
""" | |||
Jonathan Frederic
|
r10433 | ||
Jonathan Frederic
|
r10676 | split_text = text.split('\n') | |
wrp = map(lambda x:textwrap.wrap(x,width), split_text) | |||
Jonathan Frederic
|
r10433 | wrpd = map('\n'.join, wrp) | |
Jonathan Frederic
|
r10434 | return '\n'.join(wrpd) | |
MinRK
|
r11302 | ||
Jonathan Frederic
|
r11685 | def html2text(element): | |
MinRK
|
r11302 | """extract inner text from html | |
Analog of jQuery's $(element).text() | |||
""" | |||
Jonathan Frederic
|
r11547 | if isinstance(element, py3compat.string_types): | |
MinRK
|
r13844 | try: | |
element = ElementTree.fromstring(element) | |||
except Exception: | |||
# failed to parse, just return it unmodified | |||
return element | |||
MinRK
|
r11302 | ||
text = element.text or "" | |||
for child in element: | |||
Jonathan Frederic
|
r11685 | text += html2text(child) | |
MinRK
|
r11302 | text += (element.tail or "") | |
return text | |||
def add_anchor(html): | |||
"""Add an anchor-link to an html header tag | |||
MinRK
|
r11293 | ||
MinRK
|
r11302 | For use in heading cells | |
MinRK
|
r11293 | """ | |
MinRK
|
r13844 | try: | |
h = ElementTree.fromstring(py3compat.cast_bytes_py2(html, encoding='utf-8')) | |||
except Exception: | |||
# failed to parse, just return it unmodified | |||
return html | |||
Jonathan Frederic
|
r11685 | link = html2text(h).replace(' ', '-') | |
MinRK
|
r11302 | h.set('id', link) | |
a = ElementTree.Element("a", {"class" : "anchor-link", "href" : "#" + link}) | |||
a.text = u'¶' | |||
h.append(a) | |||
Jonathan Frederic
|
r11927 | ||
# Known issue of Python3.x, ElementTree.tostring() returns a byte string | |||
# instead of a text string. See issue http://bugs.python.org/issue10942 | |||
# Workaround is to make sure the bytes are casted to a string. | |||
Jonathan Frederic
|
r11946 | return py3compat.decode(ElementTree.tostring(h), 'utf-8') | |
MinRK
|
r11293 | ||
Jonathan Frederic
|
r10434 | ||
Jonathan Frederic
|
r12708 | def add_prompts(code, first='>>> ', cont='... '): | |
"""Add prompts to code snippets""" | |||
new_code = [] | |||
code_list = code.split('\n') | |||
new_code.append(first + code_list[0]) | |||
for line in code_list[1:]: | |||
new_code.append(cont + line) | |||
return '\n'.join(new_code) | |||
Jonathan Frederic
|
r10434 | def strip_dollars(text): | |
Jonathan Frederic
|
r10676 | """ | |
Remove all dollar symbols from text | |||
Parameters | |||
---------- | |||
text : str | |||
Text to remove dollars from | |||
""" | |||
Jonathan Frederic
|
r10434 | ||
Jonathan Frederic
|
r10485 | return text.strip('$') | |
jakobgager
|
r10882 | ||
MinRK
|
r13868 | files_url_pattern = re.compile(r'(src|href)\=([\'"]?)/?files/') | |
markdown_url_pattern = re.compile(r'(!?)\[(?P<caption>.*?)\]\(/?files/(?P<location>.*?)\)') | |||
MinRK
|
r11202 | ||
Jonathan Frederic
|
r11685 | def strip_files_prefix(text): | |
Jonathan Frederic
|
r10676 | """ | |
Peter Davis
|
r13593 | Fix all fake URLs that start with `files/`, stripping out the `files/` prefix. | |
Applies to both urls (for html) and relative paths (for markdown paths). | |||
Jonathan Frederic
|
r10676 | ||
Parameters | |||
---------- | |||
text : str | |||
MinRK
|
r11202 | Text in which to replace 'src="files/real...' with 'src="real...' | |
Jonathan Frederic
|
r10676 | """ | |
Peter Davis
|
r13593 | cleaned_text = files_url_pattern.sub(r"\1=\2", text) | |
MinRK
|
r13868 | cleaned_text = markdown_url_pattern.sub(r'\1[\2](\3)', cleaned_text) | |
Peter Davis
|
r13593 | return cleaned_text | |
Jonathan Frederic
|
r10485 | ||
Jonathan Frederic
|
r11685 | def comment_lines(text, prefix='# '): | |
Jonathan Frederic
|
r10676 | """ | |
Build a Python comment line from input text. | |||
Parameters | |||
---------- | |||
text : str | |||
Text to comment out. | |||
Jonathan Frederic
|
r11685 | prefix : str | |
Character to append to the start of each line. | |||
Jonathan Frederic
|
r10676 | """ | |
#Replace line breaks with line breaks and comment symbols. | |||
#Also add a comment symbol at the beginning to comment out | |||
#the first line. | |||
Jonathan Frederic
|
r11685 | return prefix + ('\n'+prefix).join(text.split('\n')) | |
Jonathan Frederic
|
r10676 | ||
Jonathan Frederic
|
r10485 | ||
Jonathan Frederic
|
r10676 | def get_lines(text, start=None,end=None): | |
Jonathan Frederic
|
r10485 | """ | |
Split the input text into separate lines and then return the | |||
lines that the caller is interested in. | |||
Jonathan Frederic
|
r10676 | ||
Parameters | |||
---------- | |||
text : str | |||
Text to parse lines from. | |||
start : int, optional | |||
First line to grab from. | |||
end : int, optional | |||
Last line to grab from. | |||
Jonathan Frederic
|
r10485 | """ | |
# Split the input into lines. | |||
Jonathan Frederic
|
r10676 | lines = text.split("\n") | |
Jonathan Frederic
|
r10485 | ||
# Return the right lines. | |||
return "\n".join(lines[start:end]) #re-join | |||
MinRK
|
r11711 | ||
def ipython2python(code): | |||
"""Transform IPython syntax to pure Python syntax | |||
Parameters | |||
---------- | |||
code : str | |||
IPython code, to be transformed to pure Python | |||
""" | |||
shell = InteractiveShell.instance() | |||
return shell.input_transformer_manager.transform_cell(code) | |||
MinRK
|
r11972 | ||
def posix_path(path): | |||
"""Turn a path into posix-style path/to/etc | |||
Mainly for use in latex on Windows, | |||
where native Windows paths are not allowed. | |||
""" | |||
if os.path.sep != '/': | |||
return path.replace(os.path.sep, '/') | |||
return path | |||
MinRK
|
r12451 | ||
def path2url(path): | |||
"""Turn a file path into a URL""" | |||
parts = path.split(os.path.sep) | |||
return '/'.join(quote(part) for part in parts) | |||
MinRK
|
r15434 | ||
def ascii_only(s): | |||
"""ensure a string is ascii""" | |||
s = py3compat.cast_unicode(s) | |||
return s.encode('ascii', 'replace').decode('ascii') |