PyColorize.py
331 lines
| 10.6 KiB
| text/x-python
|
PythonLexer
fperez
|
r0 | # -*- coding: utf-8 -*- | ||
""" | ||||
Fernando Perez
|
r1853 | Class and program to colorize python source code for ANSI terminals. | ||
fperez
|
r0 | |||
Fernando Perez
|
r1853 | Based on an HTML code highlighter by Jurgen Hermann found at: | ||
http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52298 | ||||
fperez
|
r0 | |||
Fernando Perez
|
r1853 | Modifications by Fernando Perez (fperez@colorado.edu). | ||
fperez
|
r0 | |||
Fernando Perez
|
r1853 | Information on the original HTML highlighter follows: | ||
fperez
|
r0 | |||
Fernando Perez
|
r1853 | MoinMoin - Python Source Parser | ||
fperez
|
r0 | |||
Fernando Perez
|
r1853 | Title: Colorize Python source using the built-in tokenizer | ||
fperez
|
r0 | |||
Fernando Perez
|
r1853 | Submitter: Jurgen Hermann | ||
Last Updated:2001/04/06 | ||||
fperez
|
r0 | |||
Fernando Perez
|
r1853 | Version no:1.2 | ||
fperez
|
r0 | |||
Fernando Perez
|
r1853 | Description: | ||
This code is part of MoinMoin (http://moin.sourceforge.net/) and converts | ||||
Python source code to HTML markup, rendering comments, keywords, | ||||
operators, numeric and string literals in different colors. | ||||
It shows how to use the built-in keyword, token and tokenize modules to | ||||
scan Python source code and re-emit it with no changes to its original | ||||
formatting (which is the hard part). | ||||
""" | ||||
Jörgen Stenarson
|
r8299 | |||
Matthias Bussonnier
|
r24360 | __all__ = ['ANSICodeColors', 'Parser'] | ||
fperez
|
r0 | |||
_scheme_default = 'Linux' | ||||
Matthias BUSSONNIER
|
r7817 | |||
fperez
|
r0 | # Imports | ||
fperez
|
r52 | import keyword | ||
import os | ||||
import sys | ||||
import token | ||||
import tokenize | ||||
fperez
|
r0 | |||
Matthias Bussonnier
|
r22943 | generate_tokens = tokenize.generate_tokens | ||
Thomas Kluyver
|
r4758 | |||
Matthias Bussonnier
|
r24360 | from IPython.utils.coloransi import TermColors, InputTermColors,ColorScheme, ColorSchemeTable | ||
Matthias Bussonnier
|
r22109 | from .colorable import Colorable | ||
Srinivas Reddy Thatiparthy
|
r23110 | from io import StringIO | ||
fperez
|
r0 | |||
############################################################################# | ||||
Srinivas Reddy Thatiparthy
|
r23033 | ### Python Source Parser (does Highlighting) | ||
fperez
|
r0 | ############################################################################# | ||
_KEYWORD = token.NT_OFFSET + 1 | ||||
_TEXT = token.NT_OFFSET + 2 | ||||
#**************************************************************************** | ||||
# Builtin color schemes | ||||
Colors = TermColors # just a shorthand | ||||
# Build a few color schemes | ||||
NoColor = ColorScheme( | ||||
'NoColor',{ | ||||
Matthias Bussonnier
|
r21774 | 'header' : Colors.NoColor, | ||
fperez
|
r0 | token.NUMBER : Colors.NoColor, | ||
token.OP : Colors.NoColor, | ||||
token.STRING : Colors.NoColor, | ||||
tokenize.COMMENT : Colors.NoColor, | ||||
token.NAME : Colors.NoColor, | ||||
token.ERRORTOKEN : Colors.NoColor, | ||||
_KEYWORD : Colors.NoColor, | ||||
_TEXT : Colors.NoColor, | ||||
Matthias Bussonnier
|
r21778 | 'in_prompt' : InputTermColors.NoColor, # Input prompt | ||
'in_number' : InputTermColors.NoColor, # Input prompt number | ||||
'in_prompt2' : InputTermColors.NoColor, # Continuation prompt | ||||
Tayfun Sen
|
r21828 | 'in_normal' : InputTermColors.NoColor, # color off (usu. Colors.Normal) | ||
Matthias Bussonnier
|
r21778 | |||
Tayfun Sen
|
r21828 | 'out_prompt' : Colors.NoColor, # Output prompt | ||
'out_number' : Colors.NoColor, # Output prompt number | ||||
Matthias Bussonnier
|
r21778 | |||
Tayfun Sen
|
r21828 | 'normal' : Colors.NoColor # color off (usu. Colors.Normal) | ||
fperez
|
r0 | } ) | ||
LinuxColors = ColorScheme( | ||||
'Linux',{ | ||||
Matthias Bussonnier
|
r21774 | 'header' : Colors.LightRed, | ||
fperez
|
r0 | token.NUMBER : Colors.LightCyan, | ||
token.OP : Colors.Yellow, | ||||
token.STRING : Colors.LightBlue, | ||||
tokenize.COMMENT : Colors.LightRed, | ||||
MinRK
|
r4243 | token.NAME : Colors.Normal, | ||
fperez
|
r0 | token.ERRORTOKEN : Colors.Red, | ||
_KEYWORD : Colors.LightGreen, | ||||
_TEXT : Colors.Yellow, | ||||
Matthias Bussonnier
|
r21778 | 'in_prompt' : InputTermColors.Green, | ||
'in_number' : InputTermColors.LightGreen, | ||||
'in_prompt2' : InputTermColors.Green, | ||||
Tayfun Sen
|
r21828 | 'in_normal' : InputTermColors.Normal, # color off (usu. Colors.Normal) | ||
Matthias Bussonnier
|
r21778 | |||
Tayfun Sen
|
r21828 | 'out_prompt' : Colors.Red, | ||
'out_number' : Colors.LightRed, | ||||
Matthias Bussonnier
|
r21778 | |||
Tayfun Sen
|
r21828 | 'normal' : Colors.Normal # color off (usu. Colors.Normal) | ||
fperez
|
r0 | } ) | ||
Matthias Bussonnier
|
r22609 | NeutralColors = ColorScheme( | ||
'Neutral',{ | ||||
'header' : Colors.Red, | ||||
token.NUMBER : Colors.Cyan, | ||||
token.OP : Colors.Blue, | ||||
token.STRING : Colors.Blue, | ||||
tokenize.COMMENT : Colors.Red, | ||||
token.NAME : Colors.Normal, | ||||
token.ERRORTOKEN : Colors.Red, | ||||
_KEYWORD : Colors.Green, | ||||
_TEXT : Colors.Blue, | ||||
'in_prompt' : InputTermColors.Blue, | ||||
'in_number' : InputTermColors.LightBlue, | ||||
'in_prompt2' : InputTermColors.Blue, | ||||
'in_normal' : InputTermColors.Normal, # color off (usu. Colors.Normal) | ||||
'out_prompt' : Colors.Red, | ||||
'out_number' : Colors.LightRed, | ||||
'normal' : Colors.Normal # color off (usu. Colors.Normal) | ||||
} ) | ||||
Thomas Kluyver
|
r22760 | # Hack: the 'neutral' colours are not very visible on a dark background on | ||
# Windows. Since Windows command prompts have a dark background by default, and | ||||
# relatively few users are likely to alter that, we will use the 'Linux' colours, | ||||
# designed for a dark background, as the default on Windows. Changing it here | ||||
# avoids affecting the prompt colours rendered by prompt_toolkit, where the | ||||
# neutral defaults do work OK. | ||||
if os.name == 'nt': | ||||
NeutralColors = LinuxColors.copy(name='Neutral') | ||||
Matthias Bussonnier
|
r22609 | |||
fperez
|
r0 | LightBGColors = ColorScheme( | ||
'LightBG',{ | ||||
Matthias Bussonnier
|
r21774 | 'header' : Colors.Red, | ||
fperez
|
r0 | token.NUMBER : Colors.Cyan, | ||
token.OP : Colors.Blue, | ||||
token.STRING : Colors.Blue, | ||||
tokenize.COMMENT : Colors.Red, | ||||
MinRK
|
r4243 | token.NAME : Colors.Normal, | ||
fperez
|
r0 | token.ERRORTOKEN : Colors.Red, | ||
Matthias Bussonnier
|
r22609 | |||
fperez
|
r0 | _KEYWORD : Colors.Green, | ||
_TEXT : Colors.Blue, | ||||
Matthias Bussonnier
|
r21778 | 'in_prompt' : InputTermColors.Blue, | ||
'in_number' : InputTermColors.LightBlue, | ||||
'in_prompt2' : InputTermColors.Blue, | ||||
Tayfun Sen
|
r21828 | 'in_normal' : InputTermColors.Normal, # color off (usu. Colors.Normal) | ||
Matthias Bussonnier
|
r21778 | |||
Tayfun Sen
|
r21828 | 'out_prompt' : Colors.Red, | ||
'out_number' : Colors.LightRed, | ||||
Matthias Bussonnier
|
r21778 | |||
Tayfun Sen
|
r21828 | 'normal' : Colors.Normal # color off (usu. Colors.Normal) | ||
fperez
|
r0 | } ) | ||
# Build table of color schemes (needed by the parser) | ||||
Matthias Bussonnier
|
r22609 | ANSICodeColors = ColorSchemeTable([NoColor,LinuxColors,LightBGColors, NeutralColors], | ||
fperez
|
r0 | _scheme_default) | ||
Matthias Bussonnier
|
r22911 | Undefined = object() | ||
Matthias Bussonnier
|
r22109 | class Parser(Colorable): | ||
fperez
|
r0 | """ Format colored Python source. | ||
""" | ||||
Matthias Bussonnier
|
r22109 | def __init__(self, color_table=None, out = sys.stdout, parent=None, style=None): | ||
fperez
|
r0 | """ Create a parser with a specified color table and output channel. | ||
Call format() to process code. | ||||
""" | ||||
Matthias Bussonnier
|
r22109 | |||
super(Parser, self).__init__(parent=parent) | ||||
Matthias Bussonnier
|
r24360 | self.color_table = color_table if color_table else ANSICodeColors | ||
fperez
|
r0 | self.out = out | ||
Matthias Bussonnier
|
r24360 | self.pos = None | ||
self.lines = None | ||||
self.raw = None | ||||
Matthias Bussonnier
|
r22911 | if not style: | ||
self.style = self.default_style | ||||
else: | ||||
self.style = style | ||||
fperez
|
r0 | |||
Matthias Bussonnier
|
r22911 | def format(self, raw, out=None, scheme=Undefined): | ||
import warnings | ||||
if scheme is not Undefined: | ||||
warnings.warn('The `scheme` argument of IPython.utils.PyColorize:Parser.format is deprecated since IPython 6.0.' | ||||
'It will have no effect. Set the parser `style` directly.', | ||||
stacklevel=2) | ||||
return self.format2(raw, out)[0] | ||||
fperez
|
r553 | |||
Matthias Bussonnier
|
r22911 | def format2(self, raw, out = None): | ||
fperez
|
r0 | """ Parse and send the colored source. | ||
If out and scheme are not specified, the defaults (given to | ||||
constructor) are used. | ||||
out should be a file-type object. Optionally, out can be given as the | ||||
string 'str' and the parser will automatically return the output in a | ||||
string.""" | ||||
ernie french
|
r6001 | |||
fperez
|
r0 | string_output = 0 | ||
jdh2358
|
r622 | if out == 'str' or self.out == 'str' or \ | ||
Matthias Bussonnier
|
r24334 | isinstance(self.out, StringIO): | ||
jdh2358
|
r622 | # XXX - I don't really like this state handling logic, but at this | ||
# point I don't want to make major changes, so adding the | ||||
# isinstance() check is the simplest I can do to ensure correct | ||||
# behavior. | ||||
fperez
|
r0 | out_old = self.out | ||
Thomas Kluyver
|
r13366 | self.out = StringIO() | ||
fperez
|
r0 | string_output = 1 | ||
elif out is not None: | ||||
self.out = out | ||||
Matthias Bussonnier
|
r24334 | else: | ||
raise ValueError('`out` or `self.out` should be file-like or the value `"str"`') | ||||
fperez
|
r588 | |||
# Fast return of the unmodified input for NoColor scheme | ||||
Matthias Bussonnier
|
r22911 | if self.style == 'NoColor': | ||
fperez
|
r588 | error = False | ||
self.out.write(raw) | ||||
if string_output: | ||||
Matthias Bussonnier
|
r24360 | return raw, error | ||
return None, error | ||||
ernie french
|
r6001 | |||
fperez
|
r588 | # local shorthands | ||
Matthias Bussonnier
|
r22911 | colors = self.color_table[self.style].colors | ||
fperez
|
r0 | self.colors = colors # put in object so __call__ sees it | ||
fperez
|
r588 | |||
# Remove trailing whitespace and normalize tabs | ||||
self.raw = raw.expandtabs().rstrip() | ||||
ernie french
|
r6001 | |||
fperez
|
r0 | # store line offsets in self.lines | ||
self.lines = [0, 0] | ||||
pos = 0 | ||||
jdh2358
|
r595 | raw_find = self.raw.find | ||
fperez
|
r588 | lines_append = self.lines.append | ||
Matthias Bussonnier
|
r24360 | while True: | ||
fperez
|
r588 | pos = raw_find('\n', pos) + 1 | ||
Matthias Bussonnier
|
r24360 | if not pos: | ||
break | ||||
fperez
|
r588 | lines_append(pos) | ||
lines_append(len(self.raw)) | ||||
fperez
|
r0 | |||
# parse the source and write it | ||||
self.pos = 0 | ||||
Thomas Kluyver
|
r13366 | text = StringIO(self.raw) | ||
fperez
|
r553 | |||
error = False | ||||
fperez
|
r0 | try: | ||
Thomas Kluyver
|
r4768 | for atoken in generate_tokens(text.readline): | ||
self(*atoken) | ||||
except tokenize.TokenError as ex: | ||||
msg = ex.args[0] | ||||
line = ex.args[1][0] | ||||
fperez
|
r0 | self.out.write("%s\n\n*** ERROR: %s%s%s\n" % | ||
(colors[token.ERRORTOKEN], | ||||
msg, self.raw[self.lines[line]:], | ||||
colors.normal) | ||||
) | ||||
fperez
|
r553 | error = True | ||
fperez
|
r0 | self.out.write(colors.normal+'\n') | ||
if string_output: | ||||
output = self.out.getvalue() | ||||
self.out = out_old | ||||
fperez
|
r553 | return (output, error) | ||
return (None, error) | ||||
fperez
|
r0 | |||
Matthias Bussonnier
|
r24360 | |||
def _inner_call_(self, toktype, toktext, start_pos): | ||||
Matthias Bussonnier
|
r24334 | """like call but write to a temporary buffer""" | ||
buff = StringIO() | ||||
Matthias Bussonnier
|
r24360 | srow, scol = start_pos | ||
fperez
|
r0 | colors = self.colors | ||
Matthias Bussonnier
|
r24334 | owrite = buff.write | ||
fperez
|
r0 | |||
# line separator, so this works across platforms | ||||
linesep = os.linesep | ||||
# calculate new positions | ||||
oldpos = self.pos | ||||
newpos = self.lines[srow] + scol | ||||
self.pos = newpos + len(toktext) | ||||
# send the original whitespace, if needed | ||||
if newpos > oldpos: | ||||
fperez
|
r588 | owrite(self.raw[oldpos:newpos]) | ||
fperez
|
r0 | |||
# skip indenting tokens | ||||
if toktype in [token.INDENT, token.DEDENT]: | ||||
self.pos = newpos | ||||
Matthias Bussonnier
|
r24334 | buff.seek(0) | ||
return buff.read() | ||||
fperez
|
r0 | |||
# map token type to a color group | ||||
Rémy Léone
|
r21780 | if token.LPAR <= toktype <= token.OP: | ||
fperez
|
r0 | toktype = token.OP | ||
elif toktype == token.NAME and keyword.iskeyword(toktext): | ||||
toktype = _KEYWORD | ||||
color = colors.get(toktype, colors[_TEXT]) | ||||
# Triple quoted strings must be handled carefully so that backtracking | ||||
# in pagers works correctly. We need color terminators on _each_ line. | ||||
if linesep in toktext: | ||||
toktext = toktext.replace(linesep, '%s%s%s' % | ||||
(colors.normal,linesep,color)) | ||||
# send text | ||||
fperez
|
r588 | owrite('%s%s%s' % (color,toktext,colors.normal)) | ||
Matthias Bussonnier
|
r24334 | buff.seek(0) | ||
return buff.read() | ||||
def __call__(self, toktype, toktext, start_pos, end_pos, line): | ||||
""" Token handler, with syntax highlighting.""" | ||||
self.out.write( | ||||
Matthias Bussonnier
|
r24360 | self._inner_call_(toktype, toktext, start_pos)) | ||