##// END OF EJS Templates
- Fix state handling bug in format(). Closes #146.
jdh2358 -
Show More

The requested changes are too big and content was truncated. Show full diff

@@ -1,282 +1,287 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 """
2 """
3 Class and program to colorize python source code for ANSI terminals.
3 Class and program to colorize python source code for ANSI terminals.
4
4
5 Based on an HTML code highlighter by Jurgen Hermann found at:
5 Based on an HTML code highlighter by Jurgen Hermann found at:
6 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52298
6 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52298
7
7
8 Modifications by Fernando Perez (fperez@colorado.edu).
8 Modifications by Fernando Perez (fperez@colorado.edu).
9
9
10 Information on the original HTML highlighter follows:
10 Information on the original HTML highlighter follows:
11
11
12 MoinMoin - Python Source Parser
12 MoinMoin - Python Source Parser
13
13
14 Title:olorize Python source using the built-in tokenizer
14 Title:olorize Python source using the built-in tokenizer
15
15
16 Submitter: Jurgen Hermann
16 Submitter: Jurgen Hermann
17 Last Updated:2001/04/06
17 Last Updated:2001/04/06
18
18
19 Version no:1.2
19 Version no:1.2
20
20
21 Description:
21 Description:
22
22
23 This code is part of MoinMoin (http://moin.sourceforge.net/) and converts
23 This code is part of MoinMoin (http://moin.sourceforge.net/) and converts
24 Python source code to HTML markup, rendering comments, keywords,
24 Python source code to HTML markup, rendering comments, keywords,
25 operators, numeric and string literals in different colors.
25 operators, numeric and string literals in different colors.
26
26
27 It shows how to use the built-in keyword, token and tokenize modules to
27 It shows how to use the built-in keyword, token and tokenize modules to
28 scan Python source code and re-emit it with no changes to its original
28 scan Python source code and re-emit it with no changes to its original
29 formatting (which is the hard part).
29 formatting (which is the hard part).
30
30
31 $Id: PyColorize.py 2225 2007-04-08 02:48:16Z jdh2358 $"""
31 $Id: PyColorize.py 2274 2007-04-26 14:41:43Z jdh2358 $"""
32
32
33 __all__ = ['ANSICodeColors','Parser']
33 __all__ = ['ANSICodeColors','Parser']
34
34
35 _scheme_default = 'Linux'
35 _scheme_default = 'Linux'
36
36
37 # Imports
37 # Imports
38 import cStringIO
38 import cStringIO
39 import keyword
39 import keyword
40 import os
40 import os
41 import string
41 import string
42 import sys
42 import sys
43 import token
43 import token
44 import tokenize
44 import tokenize
45
45
46 from IPython.ColorANSI import *
46 from IPython.ColorANSI import *
47
47
48 #############################################################################
48 #############################################################################
49 ### Python Source Parser (does Hilighting)
49 ### Python Source Parser (does Hilighting)
50 #############################################################################
50 #############################################################################
51
51
52 _KEYWORD = token.NT_OFFSET + 1
52 _KEYWORD = token.NT_OFFSET + 1
53 _TEXT = token.NT_OFFSET + 2
53 _TEXT = token.NT_OFFSET + 2
54
54
55 #****************************************************************************
55 #****************************************************************************
56 # Builtin color schemes
56 # Builtin color schemes
57
57
58 Colors = TermColors # just a shorthand
58 Colors = TermColors # just a shorthand
59
59
60 # Build a few color schemes
60 # Build a few color schemes
61 NoColor = ColorScheme(
61 NoColor = ColorScheme(
62 'NoColor',{
62 'NoColor',{
63 token.NUMBER : Colors.NoColor,
63 token.NUMBER : Colors.NoColor,
64 token.OP : Colors.NoColor,
64 token.OP : Colors.NoColor,
65 token.STRING : Colors.NoColor,
65 token.STRING : Colors.NoColor,
66 tokenize.COMMENT : Colors.NoColor,
66 tokenize.COMMENT : Colors.NoColor,
67 token.NAME : Colors.NoColor,
67 token.NAME : Colors.NoColor,
68 token.ERRORTOKEN : Colors.NoColor,
68 token.ERRORTOKEN : Colors.NoColor,
69
69
70 _KEYWORD : Colors.NoColor,
70 _KEYWORD : Colors.NoColor,
71 _TEXT : Colors.NoColor,
71 _TEXT : Colors.NoColor,
72
72
73 'normal' : Colors.NoColor # color off (usu. Colors.Normal)
73 'normal' : Colors.NoColor # color off (usu. Colors.Normal)
74 } )
74 } )
75
75
76 LinuxColors = ColorScheme(
76 LinuxColors = ColorScheme(
77 'Linux',{
77 'Linux',{
78 token.NUMBER : Colors.LightCyan,
78 token.NUMBER : Colors.LightCyan,
79 token.OP : Colors.Yellow,
79 token.OP : Colors.Yellow,
80 token.STRING : Colors.LightBlue,
80 token.STRING : Colors.LightBlue,
81 tokenize.COMMENT : Colors.LightRed,
81 tokenize.COMMENT : Colors.LightRed,
82 token.NAME : Colors.White,
82 token.NAME : Colors.White,
83 token.ERRORTOKEN : Colors.Red,
83 token.ERRORTOKEN : Colors.Red,
84
84
85 _KEYWORD : Colors.LightGreen,
85 _KEYWORD : Colors.LightGreen,
86 _TEXT : Colors.Yellow,
86 _TEXT : Colors.Yellow,
87
87
88 'normal' : Colors.Normal # color off (usu. Colors.Normal)
88 'normal' : Colors.Normal # color off (usu. Colors.Normal)
89 } )
89 } )
90
90
91 LightBGColors = ColorScheme(
91 LightBGColors = ColorScheme(
92 'LightBG',{
92 'LightBG',{
93 token.NUMBER : Colors.Cyan,
93 token.NUMBER : Colors.Cyan,
94 token.OP : Colors.Blue,
94 token.OP : Colors.Blue,
95 token.STRING : Colors.Blue,
95 token.STRING : Colors.Blue,
96 tokenize.COMMENT : Colors.Red,
96 tokenize.COMMENT : Colors.Red,
97 token.NAME : Colors.Black,
97 token.NAME : Colors.Black,
98 token.ERRORTOKEN : Colors.Red,
98 token.ERRORTOKEN : Colors.Red,
99
99
100 _KEYWORD : Colors.Green,
100 _KEYWORD : Colors.Green,
101 _TEXT : Colors.Blue,
101 _TEXT : Colors.Blue,
102
102
103 'normal' : Colors.Normal # color off (usu. Colors.Normal)
103 'normal' : Colors.Normal # color off (usu. Colors.Normal)
104 } )
104 } )
105
105
106 # Build table of color schemes (needed by the parser)
106 # Build table of color schemes (needed by the parser)
107 ANSICodeColors = ColorSchemeTable([NoColor,LinuxColors,LightBGColors],
107 ANSICodeColors = ColorSchemeTable([NoColor,LinuxColors,LightBGColors],
108 _scheme_default)
108 _scheme_default)
109
109
110 class Parser:
110 class Parser:
111 """ Format colored Python source.
111 """ Format colored Python source.
112 """
112 """
113
113
114 def __init__(self, color_table=None,out = sys.stdout):
114 def __init__(self, color_table=None,out = sys.stdout):
115 """ Create a parser with a specified color table and output channel.
115 """ Create a parser with a specified color table and output channel.
116
116
117 Call format() to process code.
117 Call format() to process code.
118 """
118 """
119 self.color_table = color_table and color_table or ANSICodeColors
119 self.color_table = color_table and color_table or ANSICodeColors
120 self.out = out
120 self.out = out
121
121
122 def format(self, raw, out = None, scheme = ''):
122 def format(self, raw, out = None, scheme = ''):
123 return self.format2(raw, out, scheme)[0]
123 return self.format2(raw, out, scheme)[0]
124
124
125 def format2(self, raw, out = None, scheme = ''):
125 def format2(self, raw, out = None, scheme = ''):
126 """ Parse and send the colored source.
126 """ Parse and send the colored source.
127
127
128 If out and scheme are not specified, the defaults (given to
128 If out and scheme are not specified, the defaults (given to
129 constructor) are used.
129 constructor) are used.
130
130
131 out should be a file-type object. Optionally, out can be given as the
131 out should be a file-type object. Optionally, out can be given as the
132 string 'str' and the parser will automatically return the output in a
132 string 'str' and the parser will automatically return the output in a
133 string."""
133 string."""
134
134
135 string_output = 0
135 string_output = 0
136 if out == 'str' or self.out == 'str':
136 if out == 'str' or self.out == 'str' or \
137 isinstance(self.out,cStringIO.OutputType):
138 # XXX - I don't really like this state handling logic, but at this
139 # point I don't want to make major changes, so adding the
140 # isinstance() check is the simplest I can do to ensure correct
141 # behavior.
137 out_old = self.out
142 out_old = self.out
138 self.out = cStringIO.StringIO()
143 self.out = cStringIO.StringIO()
139 string_output = 1
144 string_output = 1
140 elif out is not None:
145 elif out is not None:
141 self.out = out
146 self.out = out
142
147
143 # Fast return of the unmodified input for NoColor scheme
148 # Fast return of the unmodified input for NoColor scheme
144 if scheme == 'NoColor':
149 if scheme == 'NoColor':
145 error = False
150 error = False
146 self.out.write(raw)
151 self.out.write(raw)
147 if string_output:
152 if string_output:
148 return raw,error
153 return raw,error
149 else:
154 else:
150 return None,error
155 return None,error
151
156
152 # local shorthands
157 # local shorthands
153 colors = self.color_table[scheme].colors
158 colors = self.color_table[scheme].colors
154 self.colors = colors # put in object so __call__ sees it
159 self.colors = colors # put in object so __call__ sees it
155
160
156 # Remove trailing whitespace and normalize tabs
161 # Remove trailing whitespace and normalize tabs
157 self.raw = raw.expandtabs().rstrip()
162 self.raw = raw.expandtabs().rstrip()
158
163
159 # store line offsets in self.lines
164 # store line offsets in self.lines
160 self.lines = [0, 0]
165 self.lines = [0, 0]
161 pos = 0
166 pos = 0
162 raw_find = self.raw.find
167 raw_find = self.raw.find
163 lines_append = self.lines.append
168 lines_append = self.lines.append
164 while 1:
169 while 1:
165 pos = raw_find('\n', pos) + 1
170 pos = raw_find('\n', pos) + 1
166 if not pos: break
171 if not pos: break
167 lines_append(pos)
172 lines_append(pos)
168 lines_append(len(self.raw))
173 lines_append(len(self.raw))
169
174
170 # parse the source and write it
175 # parse the source and write it
171 self.pos = 0
176 self.pos = 0
172 text = cStringIO.StringIO(self.raw)
177 text = cStringIO.StringIO(self.raw)
173
178
174 error = False
179 error = False
175 try:
180 try:
176 tokenize.tokenize(text.readline, self)
181 tokenize.tokenize(text.readline, self)
177 except tokenize.TokenError, ex:
182 except tokenize.TokenError, ex:
178 msg = ex[0]
183 msg = ex[0]
179 line = ex[1][0]
184 line = ex[1][0]
180 self.out.write("%s\n\n*** ERROR: %s%s%s\n" %
185 self.out.write("%s\n\n*** ERROR: %s%s%s\n" %
181 (colors[token.ERRORTOKEN],
186 (colors[token.ERRORTOKEN],
182 msg, self.raw[self.lines[line]:],
187 msg, self.raw[self.lines[line]:],
183 colors.normal)
188 colors.normal)
184 )
189 )
185 error = True
190 error = True
186 self.out.write(colors.normal+'\n')
191 self.out.write(colors.normal+'\n')
187 if string_output:
192 if string_output:
188 output = self.out.getvalue()
193 output = self.out.getvalue()
189 self.out = out_old
194 self.out = out_old
190 return (output, error)
195 return (output, error)
191 return (None, error)
196 return (None, error)
192
197
193 def __call__(self, toktype, toktext, (srow,scol), (erow,ecol), line):
198 def __call__(self, toktype, toktext, (srow,scol), (erow,ecol), line):
194 """ Token handler, with syntax highlighting."""
199 """ Token handler, with syntax highlighting."""
195
200
196 # local shorthands
201 # local shorthands
197 colors = self.colors
202 colors = self.colors
198 owrite = self.out.write
203 owrite = self.out.write
199
204
200 # line separator, so this works across platforms
205 # line separator, so this works across platforms
201 linesep = os.linesep
206 linesep = os.linesep
202
207
203 # calculate new positions
208 # calculate new positions
204 oldpos = self.pos
209 oldpos = self.pos
205 newpos = self.lines[srow] + scol
210 newpos = self.lines[srow] + scol
206 self.pos = newpos + len(toktext)
211 self.pos = newpos + len(toktext)
207
212
208 # handle newlines
213 # handle newlines
209 if toktype in [token.NEWLINE, tokenize.NL]:
214 if toktype in [token.NEWLINE, tokenize.NL]:
210 owrite(linesep)
215 owrite(linesep)
211 return
216 return
212
217
213 # send the original whitespace, if needed
218 # send the original whitespace, if needed
214 if newpos > oldpos:
219 if newpos > oldpos:
215 owrite(self.raw[oldpos:newpos])
220 owrite(self.raw[oldpos:newpos])
216
221
217 # skip indenting tokens
222 # skip indenting tokens
218 if toktype in [token.INDENT, token.DEDENT]:
223 if toktype in [token.INDENT, token.DEDENT]:
219 self.pos = newpos
224 self.pos = newpos
220 return
225 return
221
226
222 # map token type to a color group
227 # map token type to a color group
223 if token.LPAR <= toktype and toktype <= token.OP:
228 if token.LPAR <= toktype and toktype <= token.OP:
224 toktype = token.OP
229 toktype = token.OP
225 elif toktype == token.NAME and keyword.iskeyword(toktext):
230 elif toktype == token.NAME and keyword.iskeyword(toktext):
226 toktype = _KEYWORD
231 toktype = _KEYWORD
227 color = colors.get(toktype, colors[_TEXT])
232 color = colors.get(toktype, colors[_TEXT])
228
233
229 #print '<%s>' % toktext, # dbg
234 #print '<%s>' % toktext, # dbg
230
235
231 # Triple quoted strings must be handled carefully so that backtracking
236 # Triple quoted strings must be handled carefully so that backtracking
232 # in pagers works correctly. We need color terminators on _each_ line.
237 # in pagers works correctly. We need color terminators on _each_ line.
233 if linesep in toktext:
238 if linesep in toktext:
234 toktext = toktext.replace(linesep, '%s%s%s' %
239 toktext = toktext.replace(linesep, '%s%s%s' %
235 (colors.normal,linesep,color))
240 (colors.normal,linesep,color))
236
241
237 # send text
242 # send text
238 owrite('%s%s%s' % (color,toktext,colors.normal))
243 owrite('%s%s%s' % (color,toktext,colors.normal))
239
244
240 def main():
245 def main():
241 """Colorize a python file using ANSI color escapes and print to stdout.
246 """Colorize a python file using ANSI color escapes and print to stdout.
242
247
243 Usage:
248 Usage:
244 %s [-s scheme] filename
249 %s [-s scheme] filename
245
250
246 Options:
251 Options:
247
252
248 -s scheme: give the color scheme to use. Currently only 'Linux'
253 -s scheme: give the color scheme to use. Currently only 'Linux'
249 (default) and 'LightBG' and 'NoColor' are implemented (give without
254 (default) and 'LightBG' and 'NoColor' are implemented (give without
250 quotes). """
255 quotes). """
251
256
252 def usage():
257 def usage():
253 print >> sys.stderr, main.__doc__ % sys.argv[0]
258 print >> sys.stderr, main.__doc__ % sys.argv[0]
254 sys.exit(1)
259 sys.exit(1)
255
260
256 # FIXME: rewrite this to at least use getopt
261 # FIXME: rewrite this to at least use getopt
257 try:
262 try:
258 if sys.argv[1] == '-s':
263 if sys.argv[1] == '-s':
259 scheme_name = sys.argv[2]
264 scheme_name = sys.argv[2]
260 del sys.argv[1:3]
265 del sys.argv[1:3]
261 else:
266 else:
262 scheme_name = _scheme_default
267 scheme_name = _scheme_default
263
268
264 except:
269 except:
265 usage()
270 usage()
266
271
267 try:
272 try:
268 fname = sys.argv[1]
273 fname = sys.argv[1]
269 except:
274 except:
270 usage()
275 usage()
271
276
272 # write colorized version to stdout
277 # write colorized version to stdout
273 parser = Parser()
278 parser = Parser()
274 try:
279 try:
275 parser.format(file(fname).read(),scheme = scheme_name)
280 parser.format(file(fname).read(),scheme = scheme_name)
276 except IOError,msg:
281 except IOError,msg:
277 # if user reads through a pager and quits, don't print traceback
282 # if user reads through a pager and quits, don't print traceback
278 if msg.args != (32,'Broken pipe'):
283 if msg.args != (32,'Broken pipe'):
279 raise
284 raise
280
285
281 if __name__ == "__main__":
286 if __name__ == "__main__":
282 main()
287 main()
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
General Comments 0
You need to be logged in to leave comments. Login now