# -*- coding: utf-8 -*- """ Class and program to colorize python source code for ANSI terminals. Based on an HTML code highlighter by Jurgen Hermann found at: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52298 Modifications by Fernando Perez (fperez@colorado.edu). Information on the original HTML highlighter follows: MoinMoin - Python Source Parser Title: Colorize Python source using the built-in tokenizer Submitter: Jurgen Hermann Last Updated:2001/04/06 Version no:1.2 Description: This code is part of MoinMoin (http://moin.sourceforge.net/) and converts Python source code to HTML markup, rendering comments, keywords, operators, numeric and string literals in different colors. It shows how to use the built-in keyword, token and tokenize modules to scan Python source code and re-emit it with no changes to its original formatting (which is the hard part). """ from __future__ import print_function from __future__ import absolute_import from __future__ import unicode_literals __all__ = ['ANSICodeColors','Parser'] _scheme_default = 'Linux' # Imports import keyword import os import sys import token import tokenize generate_tokens = tokenize.generate_tokens from IPython.utils.coloransi import TermColors, InputTermColors ,ColorScheme, ColorSchemeTable from IPython.utils.py3compat import PY3 from .colorable import Colorable if PY3: from io import StringIO else: from StringIO import StringIO ############################################################################# ### Python Source Parser (does Hilighting) ############################################################################# _KEYWORD = token.NT_OFFSET + 1 _TEXT = token.NT_OFFSET + 2 #**************************************************************************** # Builtin color schemes Colors = TermColors # just a shorthand # Build a few color schemes NoColor = ColorScheme( 'NoColor',{ 'header' : Colors.NoColor, token.NUMBER : Colors.NoColor, token.OP : Colors.NoColor, token.STRING : Colors.NoColor, tokenize.COMMENT : Colors.NoColor, token.NAME : Colors.NoColor, token.ERRORTOKEN : Colors.NoColor, _KEYWORD : Colors.NoColor, _TEXT : Colors.NoColor, 'in_prompt' : InputTermColors.NoColor, # Input prompt 'in_number' : InputTermColors.NoColor, # Input prompt number 'in_prompt2' : InputTermColors.NoColor, # Continuation prompt 'in_normal' : InputTermColors.NoColor, # color off (usu. Colors.Normal) 'out_prompt' : Colors.NoColor, # Output prompt 'out_number' : Colors.NoColor, # Output prompt number 'normal' : Colors.NoColor # color off (usu. Colors.Normal) } ) LinuxColors = ColorScheme( 'Linux',{ 'header' : Colors.LightRed, token.NUMBER : Colors.LightCyan, token.OP : Colors.Yellow, token.STRING : Colors.LightBlue, tokenize.COMMENT : Colors.LightRed, token.NAME : Colors.Normal, token.ERRORTOKEN : Colors.Red, _KEYWORD : Colors.LightGreen, _TEXT : Colors.Yellow, 'in_prompt' : InputTermColors.Green, 'in_number' : InputTermColors.LightGreen, 'in_prompt2' : InputTermColors.Green, 'in_normal' : InputTermColors.Normal, # color off (usu. Colors.Normal) 'out_prompt' : Colors.Red, 'out_number' : Colors.LightRed, 'normal' : Colors.Normal # color off (usu. Colors.Normal) } ) NeutralColors = ColorScheme( 'Neutral',{ 'header' : Colors.Red, token.NUMBER : Colors.Cyan, token.OP : Colors.Blue, token.STRING : Colors.Blue, tokenize.COMMENT : Colors.Red, token.NAME : Colors.Normal, token.ERRORTOKEN : Colors.Red, _KEYWORD : Colors.Green, _TEXT : Colors.Blue, 'in_prompt' : InputTermColors.Blue, 'in_number' : InputTermColors.LightBlue, 'in_prompt2' : InputTermColors.Blue, 'in_normal' : InputTermColors.Normal, # color off (usu. Colors.Normal) 'out_prompt' : Colors.Red, 'out_number' : Colors.LightRed, 'normal' : Colors.Normal # color off (usu. Colors.Normal) } ) # Hack: the 'neutral' colours are not very visible on a dark background on # Windows. Since Windows command prompts have a dark background by default, and # relatively few users are likely to alter that, we will use the 'Linux' colours, # designed for a dark background, as the default on Windows. Changing it here # avoids affecting the prompt colours rendered by prompt_toolkit, where the # neutral defaults do work OK. if os.name == 'nt': NeutralColors = LinuxColors.copy(name='Neutral') LightBGColors = ColorScheme( 'LightBG',{ 'header' : Colors.Red, token.NUMBER : Colors.Cyan, token.OP : Colors.Blue, token.STRING : Colors.Blue, tokenize.COMMENT : Colors.Red, token.NAME : Colors.Normal, token.ERRORTOKEN : Colors.Red, _KEYWORD : Colors.Green, _TEXT : Colors.Blue, 'in_prompt' : InputTermColors.Blue, 'in_number' : InputTermColors.LightBlue, 'in_prompt2' : InputTermColors.Blue, 'in_normal' : InputTermColors.Normal, # color off (usu. Colors.Normal) 'out_prompt' : Colors.Red, 'out_number' : Colors.LightRed, 'normal' : Colors.Normal # color off (usu. Colors.Normal) } ) # Build table of color schemes (needed by the parser) ANSICodeColors = ColorSchemeTable([NoColor,LinuxColors,LightBGColors, NeutralColors], _scheme_default) Undefined = object() class Parser(Colorable): """ Format colored Python source. """ def __init__(self, color_table=None, out = sys.stdout, parent=None, style=None): """ Create a parser with a specified color table and output channel. Call format() to process code. """ super(Parser, self).__init__(parent=parent) self.color_table = color_table and color_table or ANSICodeColors self.out = out if not style: self.style = self.default_style else: self.style = style def format(self, raw, out=None, scheme=Undefined): import warnings if scheme is not Undefined: warnings.warn('The `scheme` argument of IPython.utils.PyColorize:Parser.format is deprecated since IPython 6.0.' 'It will have no effect. Set the parser `style` directly.', stacklevel=2) return self.format2(raw, out)[0] def format2(self, raw, out = None): """ Parse and send the colored source. If out and scheme are not specified, the defaults (given to constructor) are used. out should be a file-type object. Optionally, out can be given as the string 'str' and the parser will automatically return the output in a string.""" string_output = 0 if out == 'str' or self.out == 'str' or \ isinstance(self.out,StringIO): # XXX - I don't really like this state handling logic, but at this # point I don't want to make major changes, so adding the # isinstance() check is the simplest I can do to ensure correct # behavior. out_old = self.out self.out = StringIO() string_output = 1 elif out is not None: self.out = out # Fast return of the unmodified input for NoColor scheme if self.style == 'NoColor': error = False self.out.write(raw) if string_output: return raw,error else: return None,error # local shorthands colors = self.color_table[self.style].colors self.colors = colors # put in object so __call__ sees it # Remove trailing whitespace and normalize tabs self.raw = raw.expandtabs().rstrip() # store line offsets in self.lines self.lines = [0, 0] pos = 0 raw_find = self.raw.find lines_append = self.lines.append while 1: pos = raw_find('\n', pos) + 1 if not pos: break lines_append(pos) lines_append(len(self.raw)) # parse the source and write it self.pos = 0 text = StringIO(self.raw) error = False try: for atoken in generate_tokens(text.readline): self(*atoken) except tokenize.TokenError as ex: msg = ex.args[0] line = ex.args[1][0] self.out.write("%s\n\n*** ERROR: %s%s%s\n" % (colors[token.ERRORTOKEN], msg, self.raw[self.lines[line]:], colors.normal) ) error = True self.out.write(colors.normal+'\n') if string_output: output = self.out.getvalue() self.out = out_old return (output, error) return (None, error) def __call__(self, toktype, toktext, start_pos, end_pos, line): """ Token handler, with syntax highlighting.""" (srow,scol) = start_pos (erow,ecol) = end_pos colors = self.colors owrite = self.out.write # line separator, so this works across platforms linesep = os.linesep # calculate new positions oldpos = self.pos newpos = self.lines[srow] + scol self.pos = newpos + len(toktext) # send the original whitespace, if needed if newpos > oldpos: owrite(self.raw[oldpos:newpos]) # skip indenting tokens if toktype in [token.INDENT, token.DEDENT]: self.pos = newpos return # map token type to a color group if token.LPAR <= toktype <= token.OP: toktype = token.OP elif toktype == token.NAME and keyword.iskeyword(toktext): toktype = _KEYWORD color = colors.get(toktype, colors[_TEXT]) #print '<%s>' % toktext, # dbg # Triple quoted strings must be handled carefully so that backtracking # in pagers works correctly. We need color terminators on _each_ line. if linesep in toktext: toktext = toktext.replace(linesep, '%s%s%s' % (colors.normal,linesep,color)) # send text owrite('%s%s%s' % (color,toktext,colors.normal))