##// END OF EJS Templates
Fix bugs in PyColorize
Thomas Kluyver -
Show More
@@ -1,311 +1,311 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 Class and program to colorize python source code for ANSI terminals.
4 4
5 5 Based on an HTML code highlighter by Jurgen Hermann found at:
6 6 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52298
7 7
8 8 Modifications by Fernando Perez (fperez@colorado.edu).
9 9
10 10 Information on the original HTML highlighter follows:
11 11
12 12 MoinMoin - Python Source Parser
13 13
14 14 Title: Colorize Python source using the built-in tokenizer
15 15
16 16 Submitter: Jurgen Hermann
17 17 Last Updated:2001/04/06
18 18
19 19 Version no:1.2
20 20
21 21 Description:
22 22
23 23 This code is part of MoinMoin (http://moin.sourceforge.net/) and converts
24 24 Python source code to HTML markup, rendering comments, keywords,
25 25 operators, numeric and string literals in different colors.
26 26
27 27 It shows how to use the built-in keyword, token and tokenize modules to
28 28 scan Python source code and re-emit it with no changes to its original
29 29 formatting (which is the hard part).
30 30 """
31 31
32 32 __all__ = ['ANSICodeColors','Parser']
33 33
34 34 _scheme_default = 'Linux'
35 35
36 36 # Imports
37 37 import cStringIO
38 38 import keyword
39 39 import os
40 40 import optparse
41 41 import sys
42 42 import token
43 43 import tokenize
44 44
45 45 try:
46 46 generate_tokens = tokenize.generate_tokens
47 47 except AttributeError:
48 48 # Python 3. Note that we use the undocumented _tokenize because it expects
49 49 # strings, not bytes. See also Python issue #9969.
50 50 generate_tokens = tokenize._tokenize
51 51
52 52 from IPython.utils.coloransi import *
53 53
54 54 #############################################################################
55 55 ### Python Source Parser (does Hilighting)
56 56 #############################################################################
57 57
58 58 _KEYWORD = token.NT_OFFSET + 1
59 59 _TEXT = token.NT_OFFSET + 2
60 60
61 61 #****************************************************************************
62 62 # Builtin color schemes
63 63
64 64 Colors = TermColors # just a shorthand
65 65
66 66 # Build a few color schemes
67 67 NoColor = ColorScheme(
68 68 'NoColor',{
69 69 token.NUMBER : Colors.NoColor,
70 70 token.OP : Colors.NoColor,
71 71 token.STRING : Colors.NoColor,
72 72 tokenize.COMMENT : Colors.NoColor,
73 73 token.NAME : Colors.NoColor,
74 74 token.ERRORTOKEN : Colors.NoColor,
75 75
76 76 _KEYWORD : Colors.NoColor,
77 77 _TEXT : Colors.NoColor,
78 78
79 79 'normal' : Colors.NoColor # color off (usu. Colors.Normal)
80 80 } )
81 81
82 82 LinuxColors = ColorScheme(
83 83 'Linux',{
84 84 token.NUMBER : Colors.LightCyan,
85 85 token.OP : Colors.Yellow,
86 86 token.STRING : Colors.LightBlue,
87 87 tokenize.COMMENT : Colors.LightRed,
88 88 token.NAME : Colors.Normal,
89 89 token.ERRORTOKEN : Colors.Red,
90 90
91 91 _KEYWORD : Colors.LightGreen,
92 92 _TEXT : Colors.Yellow,
93 93
94 94 'normal' : Colors.Normal # color off (usu. Colors.Normal)
95 95 } )
96 96
97 97 LightBGColors = ColorScheme(
98 98 'LightBG',{
99 99 token.NUMBER : Colors.Cyan,
100 100 token.OP : Colors.Blue,
101 101 token.STRING : Colors.Blue,
102 102 tokenize.COMMENT : Colors.Red,
103 103 token.NAME : Colors.Normal,
104 104 token.ERRORTOKEN : Colors.Red,
105 105
106 106 _KEYWORD : Colors.Green,
107 107 _TEXT : Colors.Blue,
108 108
109 109 'normal' : Colors.Normal # color off (usu. Colors.Normal)
110 110 } )
111 111
112 112 # Build table of color schemes (needed by the parser)
113 113 ANSICodeColors = ColorSchemeTable([NoColor,LinuxColors,LightBGColors],
114 114 _scheme_default)
115 115
116 116 class Parser:
117 117 """ Format colored Python source.
118 118 """
119 119
120 120 def __init__(self, color_table=None,out = sys.stdout):
121 121 """ Create a parser with a specified color table and output channel.
122 122
123 123 Call format() to process code.
124 124 """
125 125 self.color_table = color_table and color_table or ANSICodeColors
126 126 self.out = out
127 127
128 128 def format(self, raw, out = None, scheme = ''):
129 129 return self.format2(raw, out, scheme)[0]
130 130
131 131 def format2(self, raw, out = None, scheme = ''):
132 132 """ Parse and send the colored source.
133 133
134 134 If out and scheme are not specified, the defaults (given to
135 135 constructor) are used.
136 136
137 137 out should be a file-type object. Optionally, out can be given as the
138 138 string 'str' and the parser will automatically return the output in a
139 139 string."""
140 140
141 141 string_output = 0
142 142 if out == 'str' or self.out == 'str' or \
143 143 isinstance(self.out,cStringIO.OutputType):
144 144 # XXX - I don't really like this state handling logic, but at this
145 145 # point I don't want to make major changes, so adding the
146 146 # isinstance() check is the simplest I can do to ensure correct
147 147 # behavior.
148 148 out_old = self.out
149 149 self.out = cStringIO.StringIO()
150 150 string_output = 1
151 151 elif out is not None:
152 152 self.out = out
153 153
154 154 # Fast return of the unmodified input for NoColor scheme
155 155 if scheme == 'NoColor':
156 156 error = False
157 157 self.out.write(raw)
158 158 if string_output:
159 159 return raw,error
160 160 else:
161 161 return None,error
162 162
163 163 # local shorthands
164 164 colors = self.color_table[scheme].colors
165 165 self.colors = colors # put in object so __call__ sees it
166 166
167 167 # Remove trailing whitespace and normalize tabs
168 168 self.raw = raw.expandtabs().rstrip()
169 169
170 170 # store line offsets in self.lines
171 171 self.lines = [0, 0]
172 172 pos = 0
173 173 raw_find = self.raw.find
174 174 lines_append = self.lines.append
175 175 while 1:
176 176 pos = raw_find('\n', pos) + 1
177 177 if not pos: break
178 178 lines_append(pos)
179 179 lines_append(len(self.raw))
180 180
181 181 # parse the source and write it
182 182 self.pos = 0
183 183 text = cStringIO.StringIO(self.raw)
184 184
185 185 error = False
186 186 try:
187 for token in generate_tokens(text.readline):
188 self(*token)
189 except tokenize.TokenError, ex:
190 msg = ex[0]
191 line = ex[1][0]
187 for atoken in generate_tokens(text.readline):
188 self(*atoken)
189 except tokenize.TokenError as ex:
190 msg = ex.args[0]
191 line = ex.args[1][0]
192 192 self.out.write("%s\n\n*** ERROR: %s%s%s\n" %
193 193 (colors[token.ERRORTOKEN],
194 194 msg, self.raw[self.lines[line]:],
195 195 colors.normal)
196 196 )
197 197 error = True
198 198 self.out.write(colors.normal+'\n')
199 199 if string_output:
200 200 output = self.out.getvalue()
201 201 self.out = out_old
202 202 return (output, error)
203 203 return (None, error)
204 204
205 205 def __call__(self, toktype, toktext, (srow,scol), (erow,ecol), line):
206 206 """ Token handler, with syntax highlighting."""
207 207
208 208 # local shorthands
209 209 colors = self.colors
210 210 owrite = self.out.write
211 211
212 212 # line separator, so this works across platforms
213 213 linesep = os.linesep
214 214
215 215 # calculate new positions
216 216 oldpos = self.pos
217 217 newpos = self.lines[srow] + scol
218 218 self.pos = newpos + len(toktext)
219 219
220 220 # handle newlines
221 221 if toktype in [token.NEWLINE, tokenize.NL]:
222 222 owrite(linesep)
223 223 return
224 224
225 225 # send the original whitespace, if needed
226 226 if newpos > oldpos:
227 227 owrite(self.raw[oldpos:newpos])
228 228
229 229 # skip indenting tokens
230 230 if toktype in [token.INDENT, token.DEDENT]:
231 231 self.pos = newpos
232 232 return
233 233
234 234 # map token type to a color group
235 235 if token.LPAR <= toktype and toktype <= token.OP:
236 236 toktype = token.OP
237 237 elif toktype == token.NAME and keyword.iskeyword(toktext):
238 238 toktype = _KEYWORD
239 239 color = colors.get(toktype, colors[_TEXT])
240 240
241 241 #print '<%s>' % toktext, # dbg
242 242
243 243 # Triple quoted strings must be handled carefully so that backtracking
244 244 # in pagers works correctly. We need color terminators on _each_ line.
245 245 if linesep in toktext:
246 246 toktext = toktext.replace(linesep, '%s%s%s' %
247 247 (colors.normal,linesep,color))
248 248
249 249 # send text
250 250 owrite('%s%s%s' % (color,toktext,colors.normal))
251 251
252 252 def main(argv=None):
253 253 """Run as a command-line script: colorize a python file or stdin using ANSI
254 254 color escapes and print to stdout.
255 255
256 256 Inputs:
257 257
258 258 - argv(None): a list of strings like sys.argv[1:] giving the command-line
259 259 arguments. If None, use sys.argv[1:].
260 260 """
261 261
262 262 usage_msg = """%prog [options] [filename]
263 263
264 264 Colorize a python file or stdin using ANSI color escapes and print to stdout.
265 265 If no filename is given, or if filename is -, read standard input."""
266 266
267 267 parser = optparse.OptionParser(usage=usage_msg)
268 268 newopt = parser.add_option
269 269 newopt('-s','--scheme',metavar='NAME',dest='scheme_name',action='store',
270 270 choices=['Linux','LightBG','NoColor'],default=_scheme_default,
271 271 help="give the color scheme to use. Currently only 'Linux'\
272 272 (default) and 'LightBG' and 'NoColor' are implemented (give without\
273 273 quotes)")
274 274
275 275 opts,args = parser.parse_args(argv)
276 276
277 277 if len(args) > 1:
278 278 parser.error("you must give at most one filename.")
279 279
280 280 if len(args) == 0:
281 281 fname = '-' # no filename given; setup to read from stdin
282 282 else:
283 283 fname = args[0]
284 284
285 285 if fname == '-':
286 286 stream = sys.stdin
287 287 else:
288 288 try:
289 289 stream = file(fname)
290 290 except IOError,msg:
291 291 print >> sys.stderr, msg
292 292 sys.exit(1)
293 293
294 294 parser = Parser()
295 295
296 296 # we need nested try blocks because pre-2.5 python doesn't support unified
297 297 # try-except-finally
298 298 try:
299 299 try:
300 300 # write colorized version to stdout
301 301 parser.format(stream.read(),scheme=opts.scheme_name)
302 302 except IOError,msg:
303 303 # if user reads through a pager and quits, don't print traceback
304 304 if msg.args != (32,'Broken pipe'):
305 305 raise
306 306 finally:
307 307 if stream is not sys.stdin:
308 308 stream.close() # in case a non-handled exception happened above
309 309
310 310 if __name__ == "__main__":
311 311 main()
General Comments 0
You need to be logged in to leave comments. Login now