##// END OF EJS Templates
Backport PR #2738: Unicode content crashes the pager (console)...
Backport PR #2738: Unicode content crashes the pager (console) We've run into an interesting bug in the astropy project. https://github.com/astropy/astropy/issues/600 When displaying a docstring that contains Unicode and is also long enough that it gets sent to the pager it fails since the docstring can't be sent to the pager as ascii. This crashes in the middle of sending content to the pager, so the shell ends up in an inconsistent state and stops echoing the keyboard etc. The fix (attached) is merely to encode the content sent to the pager in the same encoding as the terminal (`sys.stdout.encoding`). Strictly speaking, this isn't always the right thing to do, since the pager may be configured to expect a different encoding than the terminal, but that is sort of an irrational way to configure a machine... ;) For example, `less`, in the absence of any special environment variables to tell it otherwise, uses the standard `LC*` environment variables to determine what to do, which should be the same mechanism the terminal also uses by default. If anyone can suggest a better fix, I'm all for it. Perhaps it should be configurable, defaulting to `sys.stdout.encoding`?

File last commit:

r6054:e0112c89
r9853:7f9a133e
Show More
pygments_highlighter.py
224 lines | 8.1 KiB | text/x-python | PythonLexer
/ IPython / frontend / qt / console / pygments_highlighter.py
# System library imports.
from IPython.external.qt import QtGui
from pygments.formatters.html import HtmlFormatter
from pygments.lexer import RegexLexer, _TokenType, Text, Error
from pygments.lexers import PythonLexer
from pygments.styles import get_style_by_name
def get_tokens_unprocessed(self, text, stack=('root',)):
""" Split ``text`` into (tokentype, text) pairs.
Monkeypatched to store the final stack on the object itself.
"""
pos = 0
tokendefs = self._tokens
if hasattr(self, '_saved_state_stack'):
statestack = list(self._saved_state_stack)
else:
statestack = list(stack)
statetokens = tokendefs[statestack[-1]]
while 1:
for rexmatch, action, new_state in statetokens:
m = rexmatch(text, pos)
if m:
if type(action) is _TokenType:
yield pos, action, m.group()
else:
for item in action(self, m):
yield item
pos = m.end()
if new_state is not None:
# state transition
if isinstance(new_state, tuple):
for state in new_state:
if state == '#pop':
statestack.pop()
elif state == '#push':
statestack.append(statestack[-1])
else:
statestack.append(state)
elif isinstance(new_state, int):
# pop
del statestack[new_state:]
elif new_state == '#push':
statestack.append(statestack[-1])
else:
assert False, "wrong state def: %r" % new_state
statetokens = tokendefs[statestack[-1]]
break
else:
try:
if text[pos] == '\n':
# at EOL, reset state to "root"
pos += 1
statestack = ['root']
statetokens = tokendefs['root']
yield pos, Text, u'\n'
continue
yield pos, Error, text[pos]
pos += 1
except IndexError:
break
self._saved_state_stack = list(statestack)
# Monkeypatch!
RegexLexer.get_tokens_unprocessed = get_tokens_unprocessed
class PygmentsBlockUserData(QtGui.QTextBlockUserData):
""" Storage for the user data associated with each line.
"""
syntax_stack = ('root',)
def __init__(self, **kwds):
for key, value in kwds.iteritems():
setattr(self, key, value)
QtGui.QTextBlockUserData.__init__(self)
def __repr__(self):
attrs = ['syntax_stack']
kwds = ', '.join([ '%s=%r' % (attr, getattr(self, attr))
for attr in attrs ])
return 'PygmentsBlockUserData(%s)' % kwds
class PygmentsHighlighter(QtGui.QSyntaxHighlighter):
""" Syntax highlighter that uses Pygments for parsing. """
#---------------------------------------------------------------------------
# 'QSyntaxHighlighter' interface
#---------------------------------------------------------------------------
def __init__(self, parent, lexer=None):
super(PygmentsHighlighter, self).__init__(parent)
self._document = QtGui.QTextDocument()
self._formatter = HtmlFormatter(nowrap=True)
self._lexer = lexer if lexer else PythonLexer()
self.set_style('default')
def highlightBlock(self, string):
""" Highlight a block of text.
"""
prev_data = self.currentBlock().previous().userData()
if prev_data is not None:
self._lexer._saved_state_stack = prev_data.syntax_stack
elif hasattr(self._lexer, '_saved_state_stack'):
del self._lexer._saved_state_stack
# Lex the text using Pygments
index = 0
for token, text in self._lexer.get_tokens(string):
length = len(text)
self.setFormat(index, length, self._get_format(token))
index += length
if hasattr(self._lexer, '_saved_state_stack'):
data = PygmentsBlockUserData(
syntax_stack=self._lexer._saved_state_stack)
self.currentBlock().setUserData(data)
# Clean up for the next go-round.
del self._lexer._saved_state_stack
#---------------------------------------------------------------------------
# 'PygmentsHighlighter' interface
#---------------------------------------------------------------------------
def set_style(self, style):
""" Sets the style to the specified Pygments style.
"""
if isinstance(style, basestring):
style = get_style_by_name(style)
self._style = style
self._clear_caches()
def set_style_sheet(self, stylesheet):
""" Sets a CSS stylesheet. The classes in the stylesheet should
correspond to those generated by:
pygmentize -S <style> -f html
Note that 'set_style' and 'set_style_sheet' completely override each
other, i.e. they cannot be used in conjunction.
"""
self._document.setDefaultStyleSheet(stylesheet)
self._style = None
self._clear_caches()
#---------------------------------------------------------------------------
# Protected interface
#---------------------------------------------------------------------------
def _clear_caches(self):
""" Clear caches for brushes and formats.
"""
self._brushes = {}
self._formats = {}
def _get_format(self, token):
""" Returns a QTextCharFormat for token or None.
"""
if token in self._formats:
return self._formats[token]
if self._style is None:
result = self._get_format_from_document(token, self._document)
else:
result = self._get_format_from_style(token, self._style)
self._formats[token] = result
return result
def _get_format_from_document(self, token, document):
""" Returns a QTextCharFormat for token by
"""
code, html = self._formatter._format_lines([(token, u'dummy')]).next()
self._document.setHtml(html)
return QtGui.QTextCursor(self._document).charFormat()
def _get_format_from_style(self, token, style):
""" Returns a QTextCharFormat for token by reading a Pygments style.
"""
result = QtGui.QTextCharFormat()
for key, value in style.style_for_token(token).items():
if value:
if key == 'color':
result.setForeground(self._get_brush(value))
elif key == 'bgcolor':
result.setBackground(self._get_brush(value))
elif key == 'bold':
result.setFontWeight(QtGui.QFont.Bold)
elif key == 'italic':
result.setFontItalic(True)
elif key == 'underline':
result.setUnderlineStyle(
QtGui.QTextCharFormat.SingleUnderline)
elif key == 'sans':
result.setFontStyleHint(QtGui.QFont.SansSerif)
elif key == 'roman':
result.setFontStyleHint(QtGui.QFont.Times)
elif key == 'mono':
result.setFontStyleHint(QtGui.QFont.TypeWriter)
return result
def _get_brush(self, color):
""" Returns a brush for the color.
"""
result = self._brushes.get(color)
if result is None:
qcolor = self._get_color(color)
result = QtGui.QBrush(qcolor)
self._brushes[color] = result
return result
def _get_color(self, color):
""" Returns a QColor built from a Pygments color string.
"""
qcolor = QtGui.QColor()
qcolor.setRgb(int(color[:2], base=16),
int(color[2:4], base=16),
int(color[4:6], base=16))
return qcolor