##// END OF EJS Templates
New new button
New new button

File last commit:

r18596:2d590459
r19694:5aec2c25
Show More
strings.py
221 lines | 5.6 KiB | text/x-python | PythonLexer
MinRK
add html_text and add_anchor filters...
r11302 # coding: utf-8
Jonathan Frederic
Cleanup and refactor of filters
r10676 """String filters.
Jonathan Frederic
Moved wrap code into Strings utility file.
r10433
Jonathan Frederic
Cleanup and refactor of filters
r10676 Contains a collection of useful string manipulation filters for use in Jinja
templates.
Jonathan Frederic
Moved wrap code into Strings utility file.
r10433 """
MinRK
remove heading cells in v4
r18596 # Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
Jonathan Frederic
Moved wrap code into Strings utility file.
r10433
MinRK
add posix_path filter...
r11972 import os
MinRK
fix `file/` URL replacements in nbconvert
r11202 import re
Jonathan Frederic
Cleanup and refactor of filters
r10676 import textwrap
Thomas Kluyver
Fixes for nbconvert under Python 3
r13397 try:
from urllib.parse import quote # Py 3
except ImportError:
from urllib2 import quote # Py 2
MinRK
add html_text and add_anchor filters...
r11302 from xml.etree import ElementTree
MinRK
convert IPython syntax to Python syntax in nbconvert python template...
r11711
from IPython.core.interactiveshell import InteractiveShell
MinRK
add html_text and add_anchor filters...
r11302 from IPython.utils import py3compat
Brian E. Granger
Fixing import for nbconvert.
r11089
Jonathan Frederic
Cleanup and refactor of filters
r10676
Brian E. Granger
Fixing import logic.
r11088 __all__ = [
Jonathan Frederic
Filter names cleanup
r11685 'wrap_text',
'html2text',
MinRK
add html_text and add_anchor filters...
r11302 'add_anchor',
Brian E. Granger
Fixing import logic.
r11088 'strip_dollars',
Jonathan Frederic
Filter names cleanup
r11685 'strip_files_prefix',
'comment_lines',
MinRK
convert IPython syntax to Python syntax in nbconvert python template...
r11711 'get_lines',
'ipython2python',
MinRK
add posix_path filter...
r11972 'posix_path',
MinRK
fix markdown images...
r12451 'path2url',
MinRK
add ascii_only filter
r15434 'add_prompts',
'ascii_only',
Benjamin ABEL
Fix issue #5877 with tests...
r18427 'prevent_list_blocks',
Brian E. Granger
Fixing import logic.
r11088 ]
Jonathan Frederic
Filter names cleanup
r11685 def wrap_text(text, width=100):
Jonathan Frederic
Cleanup and refactor of filters
r10676 """
Intelligently wrap text.
Wrap text without breaking words if possible.
Parameters
----------
text : str
Text to wrap.
width : int, optional
Number of characters to wrap to, default 100.
"""
Jonathan Frederic
Moved wrap code into Strings utility file.
r10433
Jonathan Frederic
Cleanup and refactor of filters
r10676 split_text = text.split('\n')
wrp = map(lambda x:textwrap.wrap(x,width), split_text)
Jonathan Frederic
Moved wrap code into Strings utility file.
r10433 wrpd = map('\n'.join, wrp)
Jonathan Frederic
Moved more code to Strings utilities file
r10434 return '\n'.join(wrpd)
MinRK
add html_text and add_anchor filters...
r11302
Jonathan Frederic
Filter names cleanup
r11685 def html2text(element):
MinRK
add html_text and add_anchor filters...
r11302 """extract inner text from html
Analog of jQuery's $(element).text()
"""
Jonathan Frederic
Fixes for Py3.3
r11547 if isinstance(element, py3compat.string_types):
MinRK
don't do anything if add_anchor fails
r13844 try:
element = ElementTree.fromstring(element)
except Exception:
# failed to parse, just return it unmodified
return element
MinRK
add html_text and add_anchor filters...
r11302
text = element.text or ""
for child in element:
Jonathan Frederic
Filter names cleanup
r11685 text += html2text(child)
MinRK
add html_text and add_anchor filters...
r11302 text += (element.tail or "")
return text
def add_anchor(html):
MinRK
remove heading cells in v4
r18596 """Add an anchor-link to an html header
MinRK
allow extra pandoc args
r11293
MinRK
remove heading cells in v4
r18596 For use on markdown headings
MinRK
allow extra pandoc args
r11293 """
MinRK
don't do anything if add_anchor fails
r13844 try:
h = ElementTree.fromstring(py3compat.cast_bytes_py2(html, encoding='utf-8'))
except Exception:
# failed to parse, just return it unmodified
return html
Jonathan Frederic
Filter names cleanup
r11685 link = html2text(h).replace(' ', '-')
MinRK
add html_text and add_anchor filters...
r11302 h.set('id', link)
a = ElementTree.Element("a", {"class" : "anchor-link", "href" : "#" + link})
a.text = u'¶'
h.append(a)
Jonathan Frederic
Moved add_anchor bytes-strings fix into add_anchor
r11927
# Known issue of Python3.x, ElementTree.tostring() returns a byte string
# instead of a text string. See issue http://bugs.python.org/issue10942
# Workaround is to make sure the bytes are casted to a string.
Jonathan Frederic
Simplify decode to unicode
r11946 return py3compat.decode(ElementTree.tostring(h), 'utf-8')
MinRK
allow extra pandoc args
r11293
Jonathan Frederic
Moved more code to Strings utilities file
r10434
Jonathan Frederic
Moved add_prompts into correct location.
r12708 def add_prompts(code, first='>>> ', cont='... '):
"""Add prompts to code snippets"""
new_code = []
code_list = code.split('\n')
new_code.append(first + code_list[0])
for line in code_list[1:]:
new_code.append(cont + line)
return '\n'.join(new_code)
Jonathan Frederic
Moved more code to Strings utilities file
r10434 def strip_dollars(text):
Jonathan Frederic
Cleanup and refactor of filters
r10676 """
Remove all dollar symbols from text
Parameters
----------
text : str
Text to remove dollars from
"""
Jonathan Frederic
Moved more code to Strings utilities file
r10434
Jonathan Frederic
Post code-review, extended refactor.
r10485 return text.strip('$')
jakobgager
Small latex mods: Escapes, Headings, Equations...
r10882
MinRK
update patterns for strip_files filter...
r13868 files_url_pattern = re.compile(r'(src|href)\=([\'"]?)/?files/')
markdown_url_pattern = re.compile(r'(!?)\[(?P<caption>.*?)\]\(/?files/(?P<location>.*?)\)')
MinRK
fix `file/` URL replacements in nbconvert
r11202
Jonathan Frederic
Filter names cleanup
r11685 def strip_files_prefix(text):
Jonathan Frederic
Cleanup and refactor of filters
r10676 """
Peter Davis
`strip_files_prefix` now also strips markdown style links, `latex_base` updated to include filter
r13593 Fix all fake URLs that start with `files/`, stripping out the `files/` prefix.
Applies to both urls (for html) and relative paths (for markdown paths).
Jonathan Frederic
Cleanup and refactor of filters
r10676
Parameters
----------
text : str
MinRK
fix `file/` URL replacements in nbconvert
r11202 Text in which to replace 'src="files/real...' with 'src="real...'
Jonathan Frederic
Cleanup and refactor of filters
r10676 """
Peter Davis
`strip_files_prefix` now also strips markdown style links, `latex_base` updated to include filter
r13593 cleaned_text = files_url_pattern.sub(r"\1=\2", text)
MinRK
update patterns for strip_files filter...
r13868 cleaned_text = markdown_url_pattern.sub(r'\1[\2](\3)', cleaned_text)
Peter Davis
`strip_files_prefix` now also strips markdown style links, `latex_base` updated to include filter
r13593 return cleaned_text
Jonathan Frederic
Post code-review, extended refactor.
r10485
Jonathan Frederic
Filter names cleanup
r11685 def comment_lines(text, prefix='# '):
Jonathan Frederic
Cleanup and refactor of filters
r10676 """
Build a Python comment line from input text.
Parameters
----------
text : str
Text to comment out.
Jonathan Frederic
Filter names cleanup
r11685 prefix : str
Character to append to the start of each line.
Jonathan Frederic
Cleanup and refactor of filters
r10676 """
#Replace line breaks with line breaks and comment symbols.
#Also add a comment symbol at the beginning to comment out
#the first line.
Jonathan Frederic
Filter names cleanup
r11685 return prefix + ('\n'+prefix).join(text.split('\n'))
Jonathan Frederic
Cleanup and refactor of filters
r10676
Jonathan Frederic
Post code-review, extended refactor.
r10485
Jonathan Frederic
Cleanup and refactor of filters
r10676 def get_lines(text, start=None,end=None):
Jonathan Frederic
Post code-review, extended refactor.
r10485 """
Split the input text into separate lines and then return the
lines that the caller is interested in.
Jonathan Frederic
Cleanup and refactor of filters
r10676
Parameters
----------
text : str
Text to parse lines from.
start : int, optional
First line to grab from.
end : int, optional
Last line to grab from.
Jonathan Frederic
Post code-review, extended refactor.
r10485 """
# Split the input into lines.
Jonathan Frederic
Cleanup and refactor of filters
r10676 lines = text.split("\n")
Jonathan Frederic
Post code-review, extended refactor.
r10485
# Return the right lines.
return "\n".join(lines[start:end]) #re-join
MinRK
convert IPython syntax to Python syntax in nbconvert python template...
r11711
def ipython2python(code):
"""Transform IPython syntax to pure Python syntax
Parameters
----------
code : str
IPython code, to be transformed to pure Python
"""
shell = InteractiveShell.instance()
return shell.input_transformer_manager.transform_cell(code)
MinRK
add posix_path filter...
r11972
def posix_path(path):
"""Turn a path into posix-style path/to/etc
Mainly for use in latex on Windows,
where native Windows paths are not allowed.
"""
if os.path.sep != '/':
return path.replace(os.path.sep, '/')
return path
MinRK
fix markdown images...
r12451
def path2url(path):
"""Turn a file path into a URL"""
parts = path.split(os.path.sep)
return '/'.join(quote(part) for part in parts)
MinRK
add ascii_only filter
r15434
def ascii_only(s):
"""ensure a string is ascii"""
s = py3compat.cast_unicode(s)
Benjamin ABEL
Fix issue #5877 with tests...
r18427 return s.encode('ascii', 'replace').decode('ascii')
def prevent_list_blocks(s):
"""
Prevent presence of enumerate or itemize blocks in latex headings cells
"""
out = re.sub('(^\s*\d*)\.', '\\1\.', s)
out = re.sub('(^\s*)\-', '\\1\-', out)
out = re.sub('(^\s*)\+', '\\1\+', out)
out = re.sub('(^\s*)\*', '\\1\*', out)
return out