PyColorize.py
310 lines
| 9.5 KiB
| text/x-python
|
PythonLexer
fperez
|
r0 | # -*- coding: utf-8 -*- | ||
""" | ||||
Fernando Perez
|
r1853 | Class and program to colorize python source code for ANSI terminals. | ||
fperez
|
r0 | |||
Fernando Perez
|
r1853 | Based on an HTML code highlighter by Jurgen Hermann found at: | ||
http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52298 | ||||
fperez
|
r0 | |||
Fernando Perez
|
r1853 | Modifications by Fernando Perez (fperez@colorado.edu). | ||
fperez
|
r0 | |||
Fernando Perez
|
r1853 | Information on the original HTML highlighter follows: | ||
fperez
|
r0 | |||
Fernando Perez
|
r1853 | MoinMoin - Python Source Parser | ||
fperez
|
r0 | |||
Fernando Perez
|
r1853 | Title: Colorize Python source using the built-in tokenizer | ||
fperez
|
r0 | |||
Fernando Perez
|
r1853 | Submitter: Jurgen Hermann | ||
Last Updated:2001/04/06 | ||||
fperez
|
r0 | |||
Fernando Perez
|
r1853 | Version no:1.2 | ||
fperez
|
r0 | |||
Fernando Perez
|
r1853 | Description: | ||
This code is part of MoinMoin (http://moin.sourceforge.net/) and converts | ||||
Python source code to HTML markup, rendering comments, keywords, | ||||
operators, numeric and string literals in different colors. | ||||
It shows how to use the built-in keyword, token and tokenize modules to | ||||
scan Python source code and re-emit it with no changes to its original | ||||
formatting (which is the hard part). | ||||
""" | ||||
Matthias BUSSONNIER
|
r7817 | from __future__ import print_function | ||
fperez
|
r0 | |||
Jörgen Stenarson
|
r8299 | from __future__ import unicode_literals | ||
fperez
|
r0 | __all__ = ['ANSICodeColors','Parser'] | ||
_scheme_default = 'Linux' | ||||
Matthias BUSSONNIER
|
r7817 | |||
fperez
|
r0 | # Imports | ||
MinRK
|
r4794 | import StringIO | ||
fperez
|
r52 | import keyword | ||
import os | ||||
import sys | ||||
import token | ||||
import tokenize | ||||
fperez
|
r0 | |||
Thomas Kluyver
|
r4758 | try: | ||
generate_tokens = tokenize.generate_tokens | ||||
except AttributeError: | ||||
# Python 3. Note that we use the undocumented _tokenize because it expects | ||||
# strings, not bytes. See also Python issue #9969. | ||||
generate_tokens = tokenize._tokenize | ||||
Brian Granger
|
r2010 | from IPython.utils.coloransi import * | ||
fperez
|
r0 | |||
############################################################################# | ||||
### Python Source Parser (does Hilighting) | ||||
############################################################################# | ||||
_KEYWORD = token.NT_OFFSET + 1 | ||||
_TEXT = token.NT_OFFSET + 2 | ||||
#**************************************************************************** | ||||
# Builtin color schemes | ||||
Colors = TermColors # just a shorthand | ||||
# Build a few color schemes | ||||
NoColor = ColorScheme( | ||||
'NoColor',{ | ||||
token.NUMBER : Colors.NoColor, | ||||
token.OP : Colors.NoColor, | ||||
token.STRING : Colors.NoColor, | ||||
tokenize.COMMENT : Colors.NoColor, | ||||
token.NAME : Colors.NoColor, | ||||
token.ERRORTOKEN : Colors.NoColor, | ||||
_KEYWORD : Colors.NoColor, | ||||
_TEXT : Colors.NoColor, | ||||
'normal' : Colors.NoColor # color off (usu. Colors.Normal) | ||||
} ) | ||||
LinuxColors = ColorScheme( | ||||
'Linux',{ | ||||
token.NUMBER : Colors.LightCyan, | ||||
token.OP : Colors.Yellow, | ||||
token.STRING : Colors.LightBlue, | ||||
tokenize.COMMENT : Colors.LightRed, | ||||
MinRK
|
r4243 | token.NAME : Colors.Normal, | ||
fperez
|
r0 | token.ERRORTOKEN : Colors.Red, | ||
_KEYWORD : Colors.LightGreen, | ||||
_TEXT : Colors.Yellow, | ||||
'normal' : Colors.Normal # color off (usu. Colors.Normal) | ||||
} ) | ||||
LightBGColors = ColorScheme( | ||||
'LightBG',{ | ||||
token.NUMBER : Colors.Cyan, | ||||
token.OP : Colors.Blue, | ||||
token.STRING : Colors.Blue, | ||||
tokenize.COMMENT : Colors.Red, | ||||
MinRK
|
r4243 | token.NAME : Colors.Normal, | ||
fperez
|
r0 | token.ERRORTOKEN : Colors.Red, | ||
_KEYWORD : Colors.Green, | ||||
_TEXT : Colors.Blue, | ||||
'normal' : Colors.Normal # color off (usu. Colors.Normal) | ||||
} ) | ||||
# Build table of color schemes (needed by the parser) | ||||
ANSICodeColors = ColorSchemeTable([NoColor,LinuxColors,LightBGColors], | ||||
_scheme_default) | ||||
class Parser: | ||||
""" Format colored Python source. | ||||
""" | ||||
def __init__(self, color_table=None,out = sys.stdout): | ||||
""" Create a parser with a specified color table and output channel. | ||||
Call format() to process code. | ||||
""" | ||||
self.color_table = color_table and color_table or ANSICodeColors | ||||
self.out = out | ||||
def format(self, raw, out = None, scheme = ''): | ||||
fperez
|
r553 | return self.format2(raw, out, scheme)[0] | ||
def format2(self, raw, out = None, scheme = ''): | ||||
fperez
|
r0 | """ Parse and send the colored source. | ||
If out and scheme are not specified, the defaults (given to | ||||
constructor) are used. | ||||
out should be a file-type object. Optionally, out can be given as the | ||||
string 'str' and the parser will automatically return the output in a | ||||
string.""" | ||||
ernie french
|
r6001 | |||
fperez
|
r0 | string_output = 0 | ||
jdh2358
|
r622 | if out == 'str' or self.out == 'str' or \ | ||
MinRK
|
r4794 | isinstance(self.out,StringIO.StringIO): | ||
jdh2358
|
r622 | # XXX - I don't really like this state handling logic, but at this | ||
# point I don't want to make major changes, so adding the | ||||
# isinstance() check is the simplest I can do to ensure correct | ||||
# behavior. | ||||
fperez
|
r0 | out_old = self.out | ||
MinRK
|
r4794 | self.out = StringIO.StringIO() | ||
fperez
|
r0 | string_output = 1 | ||
elif out is not None: | ||||
self.out = out | ||||
fperez
|
r588 | |||
# Fast return of the unmodified input for NoColor scheme | ||||
if scheme == 'NoColor': | ||||
error = False | ||||
self.out.write(raw) | ||||
if string_output: | ||||
return raw,error | ||||
else: | ||||
return None,error | ||||
ernie french
|
r6001 | |||
fperez
|
r588 | # local shorthands | ||
fperez
|
r0 | colors = self.color_table[scheme].colors | ||
self.colors = colors # put in object so __call__ sees it | ||||
fperez
|
r588 | |||
# Remove trailing whitespace and normalize tabs | ||||
self.raw = raw.expandtabs().rstrip() | ||||
ernie french
|
r6001 | |||
fperez
|
r0 | # store line offsets in self.lines | ||
self.lines = [0, 0] | ||||
pos = 0 | ||||
jdh2358
|
r595 | raw_find = self.raw.find | ||
fperez
|
r588 | lines_append = self.lines.append | ||
fperez
|
r0 | while 1: | ||
fperez
|
r588 | pos = raw_find('\n', pos) + 1 | ||
fperez
|
r0 | if not pos: break | ||
fperez
|
r588 | lines_append(pos) | ||
lines_append(len(self.raw)) | ||||
fperez
|
r0 | |||
# parse the source and write it | ||||
self.pos = 0 | ||||
MinRK
|
r4794 | text = StringIO.StringIO(self.raw) | ||
fperez
|
r553 | |||
error = False | ||||
fperez
|
r0 | try: | ||
Thomas Kluyver
|
r4768 | for atoken in generate_tokens(text.readline): | ||
self(*atoken) | ||||
except tokenize.TokenError as ex: | ||||
msg = ex.args[0] | ||||
line = ex.args[1][0] | ||||
fperez
|
r0 | self.out.write("%s\n\n*** ERROR: %s%s%s\n" % | ||
(colors[token.ERRORTOKEN], | ||||
msg, self.raw[self.lines[line]:], | ||||
colors.normal) | ||||
) | ||||
fperez
|
r553 | error = True | ||
fperez
|
r0 | self.out.write(colors.normal+'\n') | ||
if string_output: | ||||
output = self.out.getvalue() | ||||
self.out = out_old | ||||
fperez
|
r553 | return (output, error) | ||
return (None, error) | ||||
fperez
|
r0 | |||
Matthias BUSSONNIER
|
r7882 | def __call__(self, toktype, toktext, start_pos, end_pos, line): | ||
fperez
|
r0 | """ Token handler, with syntax highlighting.""" | ||
Matthias BUSSONNIER
|
r7882 | (srow,scol) = start_pos | ||
(erow,ecol) = end_pos | ||||
fperez
|
r0 | colors = self.colors | ||
fperez
|
r588 | owrite = self.out.write | ||
fperez
|
r0 | |||
# line separator, so this works across platforms | ||||
linesep = os.linesep | ||||
# calculate new positions | ||||
oldpos = self.pos | ||||
newpos = self.lines[srow] + scol | ||||
self.pos = newpos + len(toktext) | ||||
# send the original whitespace, if needed | ||||
if newpos > oldpos: | ||||
fperez
|
r588 | owrite(self.raw[oldpos:newpos]) | ||
fperez
|
r0 | |||
# skip indenting tokens | ||||
if toktype in [token.INDENT, token.DEDENT]: | ||||
self.pos = newpos | ||||
return | ||||
# map token type to a color group | ||||
if token.LPAR <= toktype and toktype <= token.OP: | ||||
toktype = token.OP | ||||
elif toktype == token.NAME and keyword.iskeyword(toktext): | ||||
toktype = _KEYWORD | ||||
color = colors.get(toktype, colors[_TEXT]) | ||||
#print '<%s>' % toktext, # dbg | ||||
# Triple quoted strings must be handled carefully so that backtracking | ||||
# in pagers works correctly. We need color terminators on _each_ line. | ||||
if linesep in toktext: | ||||
toktext = toktext.replace(linesep, '%s%s%s' % | ||||
(colors.normal,linesep,color)) | ||||
# send text | ||||
fperez
|
r588 | owrite('%s%s%s' % (color,toktext,colors.normal)) | ||
ernie french
|
r6001 | |||
vivainio
|
r645 | def main(argv=None): | ||
vivainio
|
r646 | """Run as a command-line script: colorize a python file or stdin using ANSI | ||
color escapes and print to stdout. | ||||
fperez
|
r0 | |||
vivainio
|
r645 | Inputs: | ||
fperez
|
r0 | |||
vivainio
|
r645 | - argv(None): a list of strings like sys.argv[1:] giving the command-line | ||
arguments. If None, use sys.argv[1:]. | ||||
""" | ||||
fperez
|
r0 | |||
vivainio
|
r646 | usage_msg = """%prog [options] [filename] | ||
fperez
|
r0 | |||
vivainio
|
r646 | Colorize a python file or stdin using ANSI color escapes and print to stdout. | ||
If no filename is given, or if filename is -, read standard input.""" | ||||
vivainio
|
r645 | |||
Thomas Kluyver
|
r9397 | import optparse | ||
vivainio
|
r645 | parser = optparse.OptionParser(usage=usage_msg) | ||
newopt = parser.add_option | ||||
newopt('-s','--scheme',metavar='NAME',dest='scheme_name',action='store', | ||||
choices=['Linux','LightBG','NoColor'],default=_scheme_default, | ||||
help="give the color scheme to use. Currently only 'Linux'\ | ||||
(default) and 'LightBG' and 'NoColor' are implemented (give without\ | ||||
quotes)") | ||||
opts,args = parser.parse_args(argv) | ||||
vivainio
|
r646 | if len(args) > 1: | ||
parser.error("you must give at most one filename.") | ||||
if len(args) == 0: | ||||
fname = '-' # no filename given; setup to read from stdin | ||||
else: | ||||
fname = args[0] | ||||
if fname == '-': | ||||
stream = sys.stdin | ||||
else: | ||||
Thomas Spura
|
r3232 | try: | ||
Brandon Parsons
|
r6650 | stream = open(fname) | ||
Matthias BUSSONNIER
|
r7787 | except IOError as msg: | ||
Matthias BUSSONNIER
|
r7817 | print(msg, file=sys.stderr) | ||
Thomas Spura
|
r3232 | sys.exit(1) | ||
fperez
|
r0 | |||
parser = Parser() | ||||
vivainio
|
r646 | |||
# we need nested try blocks because pre-2.5 python doesn't support unified | ||||
# try-except-finally | ||||
fperez
|
r0 | try: | ||
vivainio
|
r646 | try: | ||
# write colorized version to stdout | ||||
parser.format(stream.read(),scheme=opts.scheme_name) | ||||
Matthias BUSSONNIER
|
r7787 | except IOError as msg: | ||
vivainio
|
r646 | # if user reads through a pager and quits, don't print traceback | ||
if msg.args != (32,'Broken pipe'): | ||||
raise | ||||
finally: | ||||
if stream is not sys.stdin: | ||||
stream.close() # in case a non-handled exception happened above | ||||
fperez
|
r0 | |||
if __name__ == "__main__": | ||||
main() | ||||