##// END OF EJS Templates
- Fix state handling bug in format(). Closes #146.
jdh2358 -
Show More

The requested changes are too big and content was truncated. Show full diff

@@ -1,282 +1,287 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 Class and program to colorize python source code for ANSI terminals.
4 4
5 5 Based on an HTML code highlighter by Jurgen Hermann found at:
6 6 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52298
7 7
8 8 Modifications by Fernando Perez (fperez@colorado.edu).
9 9
10 10 Information on the original HTML highlighter follows:
11 11
12 12 MoinMoin - Python Source Parser
13 13
14 14 Title:olorize Python source using the built-in tokenizer
15 15
16 16 Submitter: Jurgen Hermann
17 17 Last Updated:2001/04/06
18 18
19 19 Version no:1.2
20 20
21 21 Description:
22 22
23 23 This code is part of MoinMoin (http://moin.sourceforge.net/) and converts
24 24 Python source code to HTML markup, rendering comments, keywords,
25 25 operators, numeric and string literals in different colors.
26 26
27 27 It shows how to use the built-in keyword, token and tokenize modules to
28 28 scan Python source code and re-emit it with no changes to its original
29 29 formatting (which is the hard part).
30 30
31 $Id: PyColorize.py 2225 2007-04-08 02:48:16Z jdh2358 $"""
31 $Id: PyColorize.py 2274 2007-04-26 14:41:43Z jdh2358 $"""
32 32
33 33 __all__ = ['ANSICodeColors','Parser']
34 34
35 35 _scheme_default = 'Linux'
36 36
37 37 # Imports
38 38 import cStringIO
39 39 import keyword
40 40 import os
41 41 import string
42 42 import sys
43 43 import token
44 44 import tokenize
45 45
46 46 from IPython.ColorANSI import *
47 47
48 48 #############################################################################
49 49 ### Python Source Parser (does Hilighting)
50 50 #############################################################################
51 51
52 52 _KEYWORD = token.NT_OFFSET + 1
53 53 _TEXT = token.NT_OFFSET + 2
54 54
55 55 #****************************************************************************
56 56 # Builtin color schemes
57 57
58 58 Colors = TermColors # just a shorthand
59 59
60 60 # Build a few color schemes
61 61 NoColor = ColorScheme(
62 62 'NoColor',{
63 63 token.NUMBER : Colors.NoColor,
64 64 token.OP : Colors.NoColor,
65 65 token.STRING : Colors.NoColor,
66 66 tokenize.COMMENT : Colors.NoColor,
67 67 token.NAME : Colors.NoColor,
68 68 token.ERRORTOKEN : Colors.NoColor,
69 69
70 70 _KEYWORD : Colors.NoColor,
71 71 _TEXT : Colors.NoColor,
72 72
73 73 'normal' : Colors.NoColor # color off (usu. Colors.Normal)
74 74 } )
75 75
76 76 LinuxColors = ColorScheme(
77 77 'Linux',{
78 78 token.NUMBER : Colors.LightCyan,
79 79 token.OP : Colors.Yellow,
80 80 token.STRING : Colors.LightBlue,
81 81 tokenize.COMMENT : Colors.LightRed,
82 82 token.NAME : Colors.White,
83 83 token.ERRORTOKEN : Colors.Red,
84 84
85 85 _KEYWORD : Colors.LightGreen,
86 86 _TEXT : Colors.Yellow,
87 87
88 88 'normal' : Colors.Normal # color off (usu. Colors.Normal)
89 89 } )
90 90
91 91 LightBGColors = ColorScheme(
92 92 'LightBG',{
93 93 token.NUMBER : Colors.Cyan,
94 94 token.OP : Colors.Blue,
95 95 token.STRING : Colors.Blue,
96 96 tokenize.COMMENT : Colors.Red,
97 97 token.NAME : Colors.Black,
98 98 token.ERRORTOKEN : Colors.Red,
99 99
100 100 _KEYWORD : Colors.Green,
101 101 _TEXT : Colors.Blue,
102 102
103 103 'normal' : Colors.Normal # color off (usu. Colors.Normal)
104 104 } )
105 105
106 106 # Build table of color schemes (needed by the parser)
107 107 ANSICodeColors = ColorSchemeTable([NoColor,LinuxColors,LightBGColors],
108 108 _scheme_default)
109 109
110 110 class Parser:
111 111 """ Format colored Python source.
112 112 """
113 113
114 114 def __init__(self, color_table=None,out = sys.stdout):
115 115 """ Create a parser with a specified color table and output channel.
116 116
117 117 Call format() to process code.
118 118 """
119 119 self.color_table = color_table and color_table or ANSICodeColors
120 120 self.out = out
121 121
122 122 def format(self, raw, out = None, scheme = ''):
123 123 return self.format2(raw, out, scheme)[0]
124 124
125 125 def format2(self, raw, out = None, scheme = ''):
126 126 """ Parse and send the colored source.
127 127
128 128 If out and scheme are not specified, the defaults (given to
129 129 constructor) are used.
130 130
131 131 out should be a file-type object. Optionally, out can be given as the
132 132 string 'str' and the parser will automatically return the output in a
133 133 string."""
134 134
135 135 string_output = 0
136 if out == 'str' or self.out == 'str':
136 if out == 'str' or self.out == 'str' or \
137 isinstance(self.out,cStringIO.OutputType):
138 # XXX - I don't really like this state handling logic, but at this
139 # point I don't want to make major changes, so adding the
140 # isinstance() check is the simplest I can do to ensure correct
141 # behavior.
137 142 out_old = self.out
138 143 self.out = cStringIO.StringIO()
139 144 string_output = 1
140 145 elif out is not None:
141 146 self.out = out
142 147
143 148 # Fast return of the unmodified input for NoColor scheme
144 149 if scheme == 'NoColor':
145 150 error = False
146 151 self.out.write(raw)
147 152 if string_output:
148 153 return raw,error
149 154 else:
150 155 return None,error
151 156
152 157 # local shorthands
153 158 colors = self.color_table[scheme].colors
154 159 self.colors = colors # put in object so __call__ sees it
155 160
156 161 # Remove trailing whitespace and normalize tabs
157 162 self.raw = raw.expandtabs().rstrip()
158 163
159 164 # store line offsets in self.lines
160 165 self.lines = [0, 0]
161 166 pos = 0
162 167 raw_find = self.raw.find
163 168 lines_append = self.lines.append
164 169 while 1:
165 170 pos = raw_find('\n', pos) + 1
166 171 if not pos: break
167 172 lines_append(pos)
168 173 lines_append(len(self.raw))
169 174
170 175 # parse the source and write it
171 176 self.pos = 0
172 177 text = cStringIO.StringIO(self.raw)
173 178
174 179 error = False
175 180 try:
176 181 tokenize.tokenize(text.readline, self)
177 182 except tokenize.TokenError, ex:
178 183 msg = ex[0]
179 184 line = ex[1][0]
180 185 self.out.write("%s\n\n*** ERROR: %s%s%s\n" %
181 186 (colors[token.ERRORTOKEN],
182 187 msg, self.raw[self.lines[line]:],
183 188 colors.normal)
184 189 )
185 190 error = True
186 191 self.out.write(colors.normal+'\n')
187 192 if string_output:
188 193 output = self.out.getvalue()
189 194 self.out = out_old
190 195 return (output, error)
191 196 return (None, error)
192 197
193 198 def __call__(self, toktype, toktext, (srow,scol), (erow,ecol), line):
194 199 """ Token handler, with syntax highlighting."""
195 200
196 201 # local shorthands
197 202 colors = self.colors
198 203 owrite = self.out.write
199 204
200 205 # line separator, so this works across platforms
201 206 linesep = os.linesep
202 207
203 208 # calculate new positions
204 209 oldpos = self.pos
205 210 newpos = self.lines[srow] + scol
206 211 self.pos = newpos + len(toktext)
207 212
208 213 # handle newlines
209 214 if toktype in [token.NEWLINE, tokenize.NL]:
210 215 owrite(linesep)
211 216 return
212 217
213 218 # send the original whitespace, if needed
214 219 if newpos > oldpos:
215 220 owrite(self.raw[oldpos:newpos])
216 221
217 222 # skip indenting tokens
218 223 if toktype in [token.INDENT, token.DEDENT]:
219 224 self.pos = newpos
220 225 return
221 226
222 227 # map token type to a color group
223 228 if token.LPAR <= toktype and toktype <= token.OP:
224 229 toktype = token.OP
225 230 elif toktype == token.NAME and keyword.iskeyword(toktext):
226 231 toktype = _KEYWORD
227 232 color = colors.get(toktype, colors[_TEXT])
228 233
229 234 #print '<%s>' % toktext, # dbg
230 235
231 236 # Triple quoted strings must be handled carefully so that backtracking
232 237 # in pagers works correctly. We need color terminators on _each_ line.
233 238 if linesep in toktext:
234 239 toktext = toktext.replace(linesep, '%s%s%s' %
235 240 (colors.normal,linesep,color))
236 241
237 242 # send text
238 243 owrite('%s%s%s' % (color,toktext,colors.normal))
239 244
240 245 def main():
241 246 """Colorize a python file using ANSI color escapes and print to stdout.
242 247
243 248 Usage:
244 249 %s [-s scheme] filename
245 250
246 251 Options:
247 252
248 253 -s scheme: give the color scheme to use. Currently only 'Linux'
249 254 (default) and 'LightBG' and 'NoColor' are implemented (give without
250 255 quotes). """
251 256
252 257 def usage():
253 258 print >> sys.stderr, main.__doc__ % sys.argv[0]
254 259 sys.exit(1)
255 260
256 261 # FIXME: rewrite this to at least use getopt
257 262 try:
258 263 if sys.argv[1] == '-s':
259 264 scheme_name = sys.argv[2]
260 265 del sys.argv[1:3]
261 266 else:
262 267 scheme_name = _scheme_default
263 268
264 269 except:
265 270 usage()
266 271
267 272 try:
268 273 fname = sys.argv[1]
269 274 except:
270 275 usage()
271 276
272 277 # write colorized version to stdout
273 278 parser = Parser()
274 279 try:
275 280 parser.format(file(fname).read(),scheme = scheme_name)
276 281 except IOError,msg:
277 282 # if user reads through a pager and quits, don't print traceback
278 283 if msg.args != (32,'Broken pipe'):
279 284 raise
280 285
281 286 if __name__ == "__main__":
282 287 main()
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
General Comments 0
You need to be logged in to leave comments. Login now