PyColorize.py
282 lines
| 8.3 KiB
| text/x-python
|
PythonLexer
/ IPython / PyColorize.py
fperez
|
r0 | # -*- coding: utf-8 -*- | ||
""" | ||||
Class and program to colorize python source code for ANSI terminals. | ||||
Based on an HTML code highlighter by Jurgen Hermann found at: | ||||
http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52298 | ||||
Modifications by Fernando Perez (fperez@colorado.edu). | ||||
Information on the original HTML highlighter follows: | ||||
MoinMoin - Python Source Parser | ||||
Title:olorize Python source using the built-in tokenizer | ||||
Submitter: Jurgen Hermann | ||||
Last Updated:2001/04/06 | ||||
Version no:1.2 | ||||
Description: | ||||
This code is part of MoinMoin (http://moin.sourceforge.net/) and converts | ||||
Python source code to HTML markup, rendering comments, keywords, | ||||
operators, numeric and string literals in different colors. | ||||
It shows how to use the built-in keyword, token and tokenize modules to | ||||
scan Python source code and re-emit it with no changes to its original | ||||
formatting (which is the hard part). | ||||
jdh2358
|
r595 | $Id: PyColorize.py 2225 2007-04-08 02:48:16Z jdh2358 $""" | ||
fperez
|
r0 | |||
__all__ = ['ANSICodeColors','Parser'] | ||||
_scheme_default = 'Linux' | ||||
# Imports | ||||
fperez
|
r52 | import cStringIO | ||
import keyword | ||||
import os | ||||
import string | ||||
import sys | ||||
import token | ||||
import tokenize | ||||
fperez
|
r0 | |||
from IPython.ColorANSI import * | ||||
############################################################################# | ||||
### Python Source Parser (does Hilighting) | ||||
############################################################################# | ||||
_KEYWORD = token.NT_OFFSET + 1 | ||||
_TEXT = token.NT_OFFSET + 2 | ||||
#**************************************************************************** | ||||
# Builtin color schemes | ||||
Colors = TermColors # just a shorthand | ||||
# Build a few color schemes | ||||
NoColor = ColorScheme( | ||||
'NoColor',{ | ||||
token.NUMBER : Colors.NoColor, | ||||
token.OP : Colors.NoColor, | ||||
token.STRING : Colors.NoColor, | ||||
tokenize.COMMENT : Colors.NoColor, | ||||
token.NAME : Colors.NoColor, | ||||
token.ERRORTOKEN : Colors.NoColor, | ||||
_KEYWORD : Colors.NoColor, | ||||
_TEXT : Colors.NoColor, | ||||
'normal' : Colors.NoColor # color off (usu. Colors.Normal) | ||||
} ) | ||||
LinuxColors = ColorScheme( | ||||
'Linux',{ | ||||
token.NUMBER : Colors.LightCyan, | ||||
token.OP : Colors.Yellow, | ||||
token.STRING : Colors.LightBlue, | ||||
tokenize.COMMENT : Colors.LightRed, | ||||
token.NAME : Colors.White, | ||||
token.ERRORTOKEN : Colors.Red, | ||||
_KEYWORD : Colors.LightGreen, | ||||
_TEXT : Colors.Yellow, | ||||
'normal' : Colors.Normal # color off (usu. Colors.Normal) | ||||
} ) | ||||
LightBGColors = ColorScheme( | ||||
'LightBG',{ | ||||
token.NUMBER : Colors.Cyan, | ||||
token.OP : Colors.Blue, | ||||
token.STRING : Colors.Blue, | ||||
tokenize.COMMENT : Colors.Red, | ||||
token.NAME : Colors.Black, | ||||
token.ERRORTOKEN : Colors.Red, | ||||
_KEYWORD : Colors.Green, | ||||
_TEXT : Colors.Blue, | ||||
'normal' : Colors.Normal # color off (usu. Colors.Normal) | ||||
} ) | ||||
# Build table of color schemes (needed by the parser) | ||||
ANSICodeColors = ColorSchemeTable([NoColor,LinuxColors,LightBGColors], | ||||
_scheme_default) | ||||
class Parser: | ||||
""" Format colored Python source. | ||||
""" | ||||
def __init__(self, color_table=None,out = sys.stdout): | ||||
""" Create a parser with a specified color table and output channel. | ||||
Call format() to process code. | ||||
""" | ||||
self.color_table = color_table and color_table or ANSICodeColors | ||||
self.out = out | ||||
def format(self, raw, out = None, scheme = ''): | ||||
fperez
|
r553 | return self.format2(raw, out, scheme)[0] | ||
def format2(self, raw, out = None, scheme = ''): | ||||
fperez
|
r0 | """ Parse and send the colored source. | ||
If out and scheme are not specified, the defaults (given to | ||||
constructor) are used. | ||||
out should be a file-type object. Optionally, out can be given as the | ||||
string 'str' and the parser will automatically return the output in a | ||||
string.""" | ||||
jdh2358
|
r595 | |||
fperez
|
r0 | string_output = 0 | ||
if out == 'str' or self.out == 'str': | ||||
out_old = self.out | ||||
self.out = cStringIO.StringIO() | ||||
string_output = 1 | ||||
elif out is not None: | ||||
self.out = out | ||||
fperez
|
r588 | |||
# Fast return of the unmodified input for NoColor scheme | ||||
if scheme == 'NoColor': | ||||
error = False | ||||
self.out.write(raw) | ||||
if string_output: | ||||
return raw,error | ||||
else: | ||||
return None,error | ||||
# local shorthands | ||||
fperez
|
r0 | colors = self.color_table[scheme].colors | ||
self.colors = colors # put in object so __call__ sees it | ||||
fperez
|
r588 | |||
# Remove trailing whitespace and normalize tabs | ||||
self.raw = raw.expandtabs().rstrip() | ||||
jdh2358
|
r595 | |||
fperez
|
r0 | # store line offsets in self.lines | ||
self.lines = [0, 0] | ||||
pos = 0 | ||||
jdh2358
|
r595 | raw_find = self.raw.find | ||
fperez
|
r588 | lines_append = self.lines.append | ||
fperez
|
r0 | while 1: | ||
fperez
|
r588 | pos = raw_find('\n', pos) + 1 | ||
fperez
|
r0 | if not pos: break | ||
fperez
|
r588 | lines_append(pos) | ||
lines_append(len(self.raw)) | ||||
fperez
|
r0 | |||
# parse the source and write it | ||||
self.pos = 0 | ||||
text = cStringIO.StringIO(self.raw) | ||||
fperez
|
r553 | |||
error = False | ||||
fperez
|
r0 | try: | ||
tokenize.tokenize(text.readline, self) | ||||
except tokenize.TokenError, ex: | ||||
msg = ex[0] | ||||
line = ex[1][0] | ||||
self.out.write("%s\n\n*** ERROR: %s%s%s\n" % | ||||
(colors[token.ERRORTOKEN], | ||||
msg, self.raw[self.lines[line]:], | ||||
colors.normal) | ||||
) | ||||
fperez
|
r553 | error = True | ||
fperez
|
r0 | self.out.write(colors.normal+'\n') | ||
if string_output: | ||||
output = self.out.getvalue() | ||||
self.out = out_old | ||||
fperez
|
r553 | return (output, error) | ||
return (None, error) | ||||
fperez
|
r0 | |||
def __call__(self, toktype, toktext, (srow,scol), (erow,ecol), line): | ||||
""" Token handler, with syntax highlighting.""" | ||||
fperez
|
r588 | # local shorthands | ||
fperez
|
r0 | colors = self.colors | ||
fperez
|
r588 | owrite = self.out.write | ||
fperez
|
r0 | |||
# line separator, so this works across platforms | ||||
linesep = os.linesep | ||||
# calculate new positions | ||||
oldpos = self.pos | ||||
newpos = self.lines[srow] + scol | ||||
self.pos = newpos + len(toktext) | ||||
# handle newlines | ||||
if toktype in [token.NEWLINE, tokenize.NL]: | ||||
fperez
|
r588 | owrite(linesep) | ||
fperez
|
r0 | return | ||
# send the original whitespace, if needed | ||||
if newpos > oldpos: | ||||
fperez
|
r588 | owrite(self.raw[oldpos:newpos]) | ||
fperez
|
r0 | |||
# skip indenting tokens | ||||
if toktype in [token.INDENT, token.DEDENT]: | ||||
self.pos = newpos | ||||
return | ||||
# map token type to a color group | ||||
if token.LPAR <= toktype and toktype <= token.OP: | ||||
toktype = token.OP | ||||
elif toktype == token.NAME and keyword.iskeyword(toktext): | ||||
toktype = _KEYWORD | ||||
color = colors.get(toktype, colors[_TEXT]) | ||||
#print '<%s>' % toktext, # dbg | ||||
# Triple quoted strings must be handled carefully so that backtracking | ||||
# in pagers works correctly. We need color terminators on _each_ line. | ||||
if linesep in toktext: | ||||
toktext = toktext.replace(linesep, '%s%s%s' % | ||||
(colors.normal,linesep,color)) | ||||
# send text | ||||
fperez
|
r588 | owrite('%s%s%s' % (color,toktext,colors.normal)) | ||
fperez
|
r0 | |||
def main(): | ||||
"""Colorize a python file using ANSI color escapes and print to stdout. | ||||
Usage: | ||||
%s [-s scheme] filename | ||||
Options: | ||||
-s scheme: give the color scheme to use. Currently only 'Linux' | ||||
(default) and 'LightBG' and 'NoColor' are implemented (give without | ||||
quotes). """ | ||||
def usage(): | ||||
print >> sys.stderr, main.__doc__ % sys.argv[0] | ||||
sys.exit(1) | ||||
# FIXME: rewrite this to at least use getopt | ||||
try: | ||||
if sys.argv[1] == '-s': | ||||
scheme_name = sys.argv[2] | ||||
del sys.argv[1:3] | ||||
else: | ||||
scheme_name = _scheme_default | ||||
except: | ||||
usage() | ||||
try: | ||||
fname = sys.argv[1] | ||||
except: | ||||
usage() | ||||
# write colorized version to stdout | ||||
parser = Parser() | ||||
try: | ||||
parser.format(file(fname).read(),scheme = scheme_name) | ||||
except IOError,msg: | ||||
# if user reads through a pager and quits, don't print traceback | ||||
if msg.args != (32,'Broken pipe'): | ||||
raise | ||||
if __name__ == "__main__": | ||||
main() | ||||