##// END OF EJS Templates
don't insert extra newline for newline tokens.
Ernie French -
Show More
@@ -1,311 +1,311
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 """
2 """
3 Class and program to colorize python source code for ANSI terminals.
3 Class and program to colorize python source code for ANSI terminals.
4
4
5 Based on an HTML code highlighter by Jurgen Hermann found at:
5 Based on an HTML code highlighter by Jurgen Hermann found at:
6 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52298
6 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52298
7
7
8 Modifications by Fernando Perez (fperez@colorado.edu).
8 Modifications by Fernando Perez (fperez@colorado.edu).
9
9
10 Information on the original HTML highlighter follows:
10 Information on the original HTML highlighter follows:
11
11
12 MoinMoin - Python Source Parser
12 MoinMoin - Python Source Parser
13
13
14 Title: Colorize Python source using the built-in tokenizer
14 Title: Colorize Python source using the built-in tokenizer
15
15
16 Submitter: Jurgen Hermann
16 Submitter: Jurgen Hermann
17 Last Updated:2001/04/06
17 Last Updated:2001/04/06
18
18
19 Version no:1.2
19 Version no:1.2
20
20
21 Description:
21 Description:
22
22
23 This code is part of MoinMoin (http://moin.sourceforge.net/) and converts
23 This code is part of MoinMoin (http://moin.sourceforge.net/) and converts
24 Python source code to HTML markup, rendering comments, keywords,
24 Python source code to HTML markup, rendering comments, keywords,
25 operators, numeric and string literals in different colors.
25 operators, numeric and string literals in different colors.
26
26
27 It shows how to use the built-in keyword, token and tokenize modules to
27 It shows how to use the built-in keyword, token and tokenize modules to
28 scan Python source code and re-emit it with no changes to its original
28 scan Python source code and re-emit it with no changes to its original
29 formatting (which is the hard part).
29 formatting (which is the hard part).
30 """
30 """
31
31
32 __all__ = ['ANSICodeColors','Parser']
32 __all__ = ['ANSICodeColors','Parser']
33
33
34 _scheme_default = 'Linux'
34 _scheme_default = 'Linux'
35
35
36 # Imports
36 # Imports
37 import StringIO
37 import StringIO
38 import keyword
38 import keyword
39 import os
39 import os
40 import optparse
40 import optparse
41 import sys
41 import sys
42 import token
42 import token
43 import tokenize
43 import tokenize
44
44
45 try:
45 try:
46 generate_tokens = tokenize.generate_tokens
46 generate_tokens = tokenize.generate_tokens
47 except AttributeError:
47 except AttributeError:
48 # Python 3. Note that we use the undocumented _tokenize because it expects
48 # Python 3. Note that we use the undocumented _tokenize because it expects
49 # strings, not bytes. See also Python issue #9969.
49 # strings, not bytes. See also Python issue #9969.
50 generate_tokens = tokenize._tokenize
50 generate_tokens = tokenize._tokenize
51
51
52 from IPython.utils.coloransi import *
52 from IPython.utils.coloransi import *
53
53
54 #############################################################################
54 #############################################################################
55 ### Python Source Parser (does Hilighting)
55 ### Python Source Parser (does Hilighting)
56 #############################################################################
56 #############################################################################
57
57
58 _KEYWORD = token.NT_OFFSET + 1
58 _KEYWORD = token.NT_OFFSET + 1
59 _TEXT = token.NT_OFFSET + 2
59 _TEXT = token.NT_OFFSET + 2
60
60
61 #****************************************************************************
61 #****************************************************************************
62 # Builtin color schemes
62 # Builtin color schemes
63
63
64 Colors = TermColors # just a shorthand
64 Colors = TermColors # just a shorthand
65
65
66 # Build a few color schemes
66 # Build a few color schemes
67 NoColor = ColorScheme(
67 NoColor = ColorScheme(
68 'NoColor',{
68 'NoColor',{
69 token.NUMBER : Colors.NoColor,
69 token.NUMBER : Colors.NoColor,
70 token.OP : Colors.NoColor,
70 token.OP : Colors.NoColor,
71 token.STRING : Colors.NoColor,
71 token.STRING : Colors.NoColor,
72 tokenize.COMMENT : Colors.NoColor,
72 tokenize.COMMENT : Colors.NoColor,
73 token.NAME : Colors.NoColor,
73 token.NAME : Colors.NoColor,
74 token.ERRORTOKEN : Colors.NoColor,
74 token.ERRORTOKEN : Colors.NoColor,
75
75
76 _KEYWORD : Colors.NoColor,
76 _KEYWORD : Colors.NoColor,
77 _TEXT : Colors.NoColor,
77 _TEXT : Colors.NoColor,
78
78
79 'normal' : Colors.NoColor # color off (usu. Colors.Normal)
79 'normal' : Colors.NoColor # color off (usu. Colors.Normal)
80 } )
80 } )
81
81
82 LinuxColors = ColorScheme(
82 LinuxColors = ColorScheme(
83 'Linux',{
83 'Linux',{
84 token.NUMBER : Colors.LightCyan,
84 token.NUMBER : Colors.LightCyan,
85 token.OP : Colors.Yellow,
85 token.OP : Colors.Yellow,
86 token.STRING : Colors.LightBlue,
86 token.STRING : Colors.LightBlue,
87 tokenize.COMMENT : Colors.LightRed,
87 tokenize.COMMENT : Colors.LightRed,
88 token.NAME : Colors.Normal,
88 token.NAME : Colors.Normal,
89 token.ERRORTOKEN : Colors.Red,
89 token.ERRORTOKEN : Colors.Red,
90
90
91 _KEYWORD : Colors.LightGreen,
91 _KEYWORD : Colors.LightGreen,
92 _TEXT : Colors.Yellow,
92 _TEXT : Colors.Yellow,
93
93
94 'normal' : Colors.Normal # color off (usu. Colors.Normal)
94 'normal' : Colors.Normal # color off (usu. Colors.Normal)
95 } )
95 } )
96
96
97 LightBGColors = ColorScheme(
97 LightBGColors = ColorScheme(
98 'LightBG',{
98 'LightBG',{
99 token.NUMBER : Colors.Cyan,
99 token.NUMBER : Colors.Cyan,
100 token.OP : Colors.Blue,
100 token.OP : Colors.Blue,
101 token.STRING : Colors.Blue,
101 token.STRING : Colors.Blue,
102 tokenize.COMMENT : Colors.Red,
102 tokenize.COMMENT : Colors.Red,
103 token.NAME : Colors.Normal,
103 token.NAME : Colors.Normal,
104 token.ERRORTOKEN : Colors.Red,
104 token.ERRORTOKEN : Colors.Red,
105
105
106 _KEYWORD : Colors.Green,
106 _KEYWORD : Colors.Green,
107 _TEXT : Colors.Blue,
107 _TEXT : Colors.Blue,
108
108
109 'normal' : Colors.Normal # color off (usu. Colors.Normal)
109 'normal' : Colors.Normal # color off (usu. Colors.Normal)
110 } )
110 } )
111
111
112 # Build table of color schemes (needed by the parser)
112 # Build table of color schemes (needed by the parser)
113 ANSICodeColors = ColorSchemeTable([NoColor,LinuxColors,LightBGColors],
113 ANSICodeColors = ColorSchemeTable([NoColor,LinuxColors,LightBGColors],
114 _scheme_default)
114 _scheme_default)
115
115
116 class Parser:
116 class Parser:
117 """ Format colored Python source.
117 """ Format colored Python source.
118 """
118 """
119
119
120 def __init__(self, color_table=None,out = sys.stdout):
120 def __init__(self, color_table=None,out = sys.stdout):
121 """ Create a parser with a specified color table and output channel.
121 """ Create a parser with a specified color table and output channel.
122
122
123 Call format() to process code.
123 Call format() to process code.
124 """
124 """
125 self.color_table = color_table and color_table or ANSICodeColors
125 self.color_table = color_table and color_table or ANSICodeColors
126 self.out = out
126 self.out = out
127
127
128 def format(self, raw, out = None, scheme = ''):
128 def format(self, raw, out = None, scheme = ''):
129 return self.format2(raw, out, scheme)[0]
129 return self.format2(raw, out, scheme)[0]
130
130
131 def format2(self, raw, out = None, scheme = ''):
131 def format2(self, raw, out = None, scheme = ''):
132 """ Parse and send the colored source.
132 """ Parse and send the colored source.
133
133
134 If out and scheme are not specified, the defaults (given to
134 If out and scheme are not specified, the defaults (given to
135 constructor) are used.
135 constructor) are used.
136
136
137 out should be a file-type object. Optionally, out can be given as the
137 out should be a file-type object. Optionally, out can be given as the
138 string 'str' and the parser will automatically return the output in a
138 string 'str' and the parser will automatically return the output in a
139 string."""
139 string."""
140
140
141 string_output = 0
141 string_output = 0
142 if out == 'str' or self.out == 'str' or \
142 if out == 'str' or self.out == 'str' or \
143 isinstance(self.out,StringIO.StringIO):
143 isinstance(self.out,StringIO.StringIO):
144 # XXX - I don't really like this state handling logic, but at this
144 # XXX - I don't really like this state handling logic, but at this
145 # point I don't want to make major changes, so adding the
145 # point I don't want to make major changes, so adding the
146 # isinstance() check is the simplest I can do to ensure correct
146 # isinstance() check is the simplest I can do to ensure correct
147 # behavior.
147 # behavior.
148 out_old = self.out
148 out_old = self.out
149 self.out = StringIO.StringIO()
149 self.out = StringIO.StringIO()
150 string_output = 1
150 string_output = 1
151 elif out is not None:
151 elif out is not None:
152 self.out = out
152 self.out = out
153
153
154 # Fast return of the unmodified input for NoColor scheme
154 # Fast return of the unmodified input for NoColor scheme
155 if scheme == 'NoColor':
155 if scheme == 'NoColor':
156 error = False
156 error = False
157 self.out.write(raw)
157 self.out.write(raw)
158 if string_output:
158 if string_output:
159 return raw,error
159 return raw,error
160 else:
160 else:
161 return None,error
161 return None,error
162
162
163 # local shorthands
163 # local shorthands
164 colors = self.color_table[scheme].colors
164 colors = self.color_table[scheme].colors
165 self.colors = colors # put in object so __call__ sees it
165 self.colors = colors # put in object so __call__ sees it
166
166
167 # Remove trailing whitespace and normalize tabs
167 # Remove trailing whitespace and normalize tabs
168 self.raw = raw.expandtabs().rstrip()
168 self.raw = raw.expandtabs().rstrip()
169
169
170 # store line offsets in self.lines
170 # store line offsets in self.lines
171 self.lines = [0, 0]
171 self.lines = [0, 0]
172 pos = 0
172 pos = 0
173 raw_find = self.raw.find
173 raw_find = self.raw.find
174 lines_append = self.lines.append
174 lines_append = self.lines.append
175 while 1:
175 while 1:
176 pos = raw_find('\n', pos) + 1
176 pos = raw_find('\n', pos) + 1
177 if not pos: break
177 if not pos: break
178 lines_append(pos)
178 lines_append(pos)
179 lines_append(len(self.raw))
179 lines_append(len(self.raw))
180
180
181 # parse the source and write it
181 # parse the source and write it
182 self.pos = 0
182 self.pos = 0
183 text = StringIO.StringIO(self.raw)
183 text = StringIO.StringIO(self.raw)
184
184
185 error = False
185 error = False
186 try:
186 try:
187 for atoken in generate_tokens(text.readline):
187 for atoken in generate_tokens(text.readline):
188 self(*atoken)
188 self(*atoken)
189 except tokenize.TokenError as ex:
189 except tokenize.TokenError as ex:
190 msg = ex.args[0]
190 msg = ex.args[0]
191 line = ex.args[1][0]
191 line = ex.args[1][0]
192 self.out.write("%s\n\n*** ERROR: %s%s%s\n" %
192 self.out.write("%s\n\n*** ERROR: %s%s%s\n" %
193 (colors[token.ERRORTOKEN],
193 (colors[token.ERRORTOKEN],
194 msg, self.raw[self.lines[line]:],
194 msg, self.raw[self.lines[line]:],
195 colors.normal)
195 colors.normal)
196 )
196 )
197 error = True
197 error = True
198 self.out.write(colors.normal+'\n')
198 self.out.write(colors.normal+'\n')
199 if string_output:
199 if string_output:
200 output = self.out.getvalue()
200 output = self.out.getvalue()
201 self.out = out_old
201 self.out = out_old
202 return (output, error)
202 return (output, error)
203 return (None, error)
203 return (None, error)
204
204
205 def __call__(self, toktype, toktext, (srow,scol), (erow,ecol), line):
205 def __call__(self, toktype, toktext, (srow,scol), (erow,ecol), line):
206 """ Token handler, with syntax highlighting."""
206 """ Token handler, with syntax highlighting."""
207
207
208 # local shorthands
208 # local shorthands
209 colors = self.colors
209 colors = self.colors
210 owrite = self.out.write
210 owrite = self.out.write
211
211
212 # line separator, so this works across platforms
212 # line separator, so this works across platforms
213 linesep = os.linesep
213 linesep = os.linesep
214
214
215 # calculate new positions
215 # calculate new positions
216 oldpos = self.pos
216 oldpos = self.pos
217 newpos = self.lines[srow] + scol
217 newpos = self.lines[srow] + scol
218 self.pos = newpos + len(toktext)
218 self.pos = newpos + len(toktext)
219
219
220 # handle newlines
220 # handle newlines
221 if toktext and toktype in [token.NEWLINE, tokenize.NL]:
221 #if toktype in [token.NEWLINE, tokenize.NL]:
222 owrite(linesep)
222 # owrite(linesep)
223 return
223 # return
224
224
225 # send the original whitespace, if needed
225 # send the original whitespace, if needed
226 if newpos > oldpos:
226 if newpos > oldpos:
227 owrite(self.raw[oldpos:newpos])
227 owrite(self.raw[oldpos:newpos])
228
228
229 # skip indenting tokens
229 # skip indenting tokens
230 if toktype in [token.INDENT, token.DEDENT]:
230 if toktype in [token.INDENT, token.DEDENT]:
231 self.pos = newpos
231 self.pos = newpos
232 return
232 return
233
233
234 # map token type to a color group
234 # map token type to a color group
235 if token.LPAR <= toktype and toktype <= token.OP:
235 if token.LPAR <= toktype and toktype <= token.OP:
236 toktype = token.OP
236 toktype = token.OP
237 elif toktype == token.NAME and keyword.iskeyword(toktext):
237 elif toktype == token.NAME and keyword.iskeyword(toktext):
238 toktype = _KEYWORD
238 toktype = _KEYWORD
239 color = colors.get(toktype, colors[_TEXT])
239 color = colors.get(toktype, colors[_TEXT])
240
240
241 #print '<%s>' % toktext, # dbg
241 #print '<%s>' % toktext, # dbg
242
242
243 # Triple quoted strings must be handled carefully so that backtracking
243 # Triple quoted strings must be handled carefully so that backtracking
244 # in pagers works correctly. We need color terminators on _each_ line.
244 # in pagers works correctly. We need color terminators on _each_ line.
245 if linesep in toktext:
245 if linesep in toktext:
246 toktext = toktext.replace(linesep, '%s%s%s' %
246 toktext = toktext.replace(linesep, '%s%s%s' %
247 (colors.normal,linesep,color))
247 (colors.normal,linesep,color))
248
248
249 # send text
249 # send text
250 owrite('%s%s%s' % (color,toktext,colors.normal))
250 owrite('%s%s%s' % (color,toktext,colors.normal))
251
251
252 def main(argv=None):
252 def main(argv=None):
253 """Run as a command-line script: colorize a python file or stdin using ANSI
253 """Run as a command-line script: colorize a python file or stdin using ANSI
254 color escapes and print to stdout.
254 color escapes and print to stdout.
255
255
256 Inputs:
256 Inputs:
257
257
258 - argv(None): a list of strings like sys.argv[1:] giving the command-line
258 - argv(None): a list of strings like sys.argv[1:] giving the command-line
259 arguments. If None, use sys.argv[1:].
259 arguments. If None, use sys.argv[1:].
260 """
260 """
261
261
262 usage_msg = """%prog [options] [filename]
262 usage_msg = """%prog [options] [filename]
263
263
264 Colorize a python file or stdin using ANSI color escapes and print to stdout.
264 Colorize a python file or stdin using ANSI color escapes and print to stdout.
265 If no filename is given, or if filename is -, read standard input."""
265 If no filename is given, or if filename is -, read standard input."""
266
266
267 parser = optparse.OptionParser(usage=usage_msg)
267 parser = optparse.OptionParser(usage=usage_msg)
268 newopt = parser.add_option
268 newopt = parser.add_option
269 newopt('-s','--scheme',metavar='NAME',dest='scheme_name',action='store',
269 newopt('-s','--scheme',metavar='NAME',dest='scheme_name',action='store',
270 choices=['Linux','LightBG','NoColor'],default=_scheme_default,
270 choices=['Linux','LightBG','NoColor'],default=_scheme_default,
271 help="give the color scheme to use. Currently only 'Linux'\
271 help="give the color scheme to use. Currently only 'Linux'\
272 (default) and 'LightBG' and 'NoColor' are implemented (give without\
272 (default) and 'LightBG' and 'NoColor' are implemented (give without\
273 quotes)")
273 quotes)")
274
274
275 opts,args = parser.parse_args(argv)
275 opts,args = parser.parse_args(argv)
276
276
277 if len(args) > 1:
277 if len(args) > 1:
278 parser.error("you must give at most one filename.")
278 parser.error("you must give at most one filename.")
279
279
280 if len(args) == 0:
280 if len(args) == 0:
281 fname = '-' # no filename given; setup to read from stdin
281 fname = '-' # no filename given; setup to read from stdin
282 else:
282 else:
283 fname = args[0]
283 fname = args[0]
284
284
285 if fname == '-':
285 if fname == '-':
286 stream = sys.stdin
286 stream = sys.stdin
287 else:
287 else:
288 try:
288 try:
289 stream = file(fname)
289 stream = file(fname)
290 except IOError,msg:
290 except IOError,msg:
291 print >> sys.stderr, msg
291 print >> sys.stderr, msg
292 sys.exit(1)
292 sys.exit(1)
293
293
294 parser = Parser()
294 parser = Parser()
295
295
296 # we need nested try blocks because pre-2.5 python doesn't support unified
296 # we need nested try blocks because pre-2.5 python doesn't support unified
297 # try-except-finally
297 # try-except-finally
298 try:
298 try:
299 try:
299 try:
300 # write colorized version to stdout
300 # write colorized version to stdout
301 parser.format(stream.read(),scheme=opts.scheme_name)
301 parser.format(stream.read(),scheme=opts.scheme_name)
302 except IOError,msg:
302 except IOError,msg:
303 # if user reads through a pager and quits, don't print traceback
303 # if user reads through a pager and quits, don't print traceback
304 if msg.args != (32,'Broken pipe'):
304 if msg.args != (32,'Broken pipe'):
305 raise
305 raise
306 finally:
306 finally:
307 if stream is not sys.stdin:
307 if stream is not sys.stdin:
308 stream.close() # in case a non-handled exception happened above
308 stream.close() # in case a non-handled exception happened above
309
309
310 if __name__ == "__main__":
310 if __name__ == "__main__":
311 main()
311 main()
General Comments 0
You need to be logged in to leave comments. Login now