##// END OF EJS Templates
Defer import of optparse
Thomas Kluyver -
Show More
@@ -1,310 +1,310 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 """
2 """
3 Class and program to colorize python source code for ANSI terminals.
3 Class and program to colorize python source code for ANSI terminals.
4
4
5 Based on an HTML code highlighter by Jurgen Hermann found at:
5 Based on an HTML code highlighter by Jurgen Hermann found at:
6 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52298
6 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52298
7
7
8 Modifications by Fernando Perez (fperez@colorado.edu).
8 Modifications by Fernando Perez (fperez@colorado.edu).
9
9
10 Information on the original HTML highlighter follows:
10 Information on the original HTML highlighter follows:
11
11
12 MoinMoin - Python Source Parser
12 MoinMoin - Python Source Parser
13
13
14 Title: Colorize Python source using the built-in tokenizer
14 Title: Colorize Python source using the built-in tokenizer
15
15
16 Submitter: Jurgen Hermann
16 Submitter: Jurgen Hermann
17 Last Updated:2001/04/06
17 Last Updated:2001/04/06
18
18
19 Version no:1.2
19 Version no:1.2
20
20
21 Description:
21 Description:
22
22
23 This code is part of MoinMoin (http://moin.sourceforge.net/) and converts
23 This code is part of MoinMoin (http://moin.sourceforge.net/) and converts
24 Python source code to HTML markup, rendering comments, keywords,
24 Python source code to HTML markup, rendering comments, keywords,
25 operators, numeric and string literals in different colors.
25 operators, numeric and string literals in different colors.
26
26
27 It shows how to use the built-in keyword, token and tokenize modules to
27 It shows how to use the built-in keyword, token and tokenize modules to
28 scan Python source code and re-emit it with no changes to its original
28 scan Python source code and re-emit it with no changes to its original
29 formatting (which is the hard part).
29 formatting (which is the hard part).
30 """
30 """
31 from __future__ import print_function
31 from __future__ import print_function
32
32
33 from __future__ import unicode_literals
33 from __future__ import unicode_literals
34
34
35 __all__ = ['ANSICodeColors','Parser']
35 __all__ = ['ANSICodeColors','Parser']
36
36
37 _scheme_default = 'Linux'
37 _scheme_default = 'Linux'
38
38
39
39
40 # Imports
40 # Imports
41 import StringIO
41 import StringIO
42 import keyword
42 import keyword
43 import os
43 import os
44 import optparse
45 import sys
44 import sys
46 import token
45 import token
47 import tokenize
46 import tokenize
48
47
49 try:
48 try:
50 generate_tokens = tokenize.generate_tokens
49 generate_tokens = tokenize.generate_tokens
51 except AttributeError:
50 except AttributeError:
52 # Python 3. Note that we use the undocumented _tokenize because it expects
51 # Python 3. Note that we use the undocumented _tokenize because it expects
53 # strings, not bytes. See also Python issue #9969.
52 # strings, not bytes. See also Python issue #9969.
54 generate_tokens = tokenize._tokenize
53 generate_tokens = tokenize._tokenize
55
54
56 from IPython.utils.coloransi import *
55 from IPython.utils.coloransi import *
57
56
58 #############################################################################
57 #############################################################################
59 ### Python Source Parser (does Hilighting)
58 ### Python Source Parser (does Hilighting)
60 #############################################################################
59 #############################################################################
61
60
62 _KEYWORD = token.NT_OFFSET + 1
61 _KEYWORD = token.NT_OFFSET + 1
63 _TEXT = token.NT_OFFSET + 2
62 _TEXT = token.NT_OFFSET + 2
64
63
65 #****************************************************************************
64 #****************************************************************************
66 # Builtin color schemes
65 # Builtin color schemes
67
66
68 Colors = TermColors # just a shorthand
67 Colors = TermColors # just a shorthand
69
68
70 # Build a few color schemes
69 # Build a few color schemes
71 NoColor = ColorScheme(
70 NoColor = ColorScheme(
72 'NoColor',{
71 'NoColor',{
73 token.NUMBER : Colors.NoColor,
72 token.NUMBER : Colors.NoColor,
74 token.OP : Colors.NoColor,
73 token.OP : Colors.NoColor,
75 token.STRING : Colors.NoColor,
74 token.STRING : Colors.NoColor,
76 tokenize.COMMENT : Colors.NoColor,
75 tokenize.COMMENT : Colors.NoColor,
77 token.NAME : Colors.NoColor,
76 token.NAME : Colors.NoColor,
78 token.ERRORTOKEN : Colors.NoColor,
77 token.ERRORTOKEN : Colors.NoColor,
79
78
80 _KEYWORD : Colors.NoColor,
79 _KEYWORD : Colors.NoColor,
81 _TEXT : Colors.NoColor,
80 _TEXT : Colors.NoColor,
82
81
83 'normal' : Colors.NoColor # color off (usu. Colors.Normal)
82 'normal' : Colors.NoColor # color off (usu. Colors.Normal)
84 } )
83 } )
85
84
86 LinuxColors = ColorScheme(
85 LinuxColors = ColorScheme(
87 'Linux',{
86 'Linux',{
88 token.NUMBER : Colors.LightCyan,
87 token.NUMBER : Colors.LightCyan,
89 token.OP : Colors.Yellow,
88 token.OP : Colors.Yellow,
90 token.STRING : Colors.LightBlue,
89 token.STRING : Colors.LightBlue,
91 tokenize.COMMENT : Colors.LightRed,
90 tokenize.COMMENT : Colors.LightRed,
92 token.NAME : Colors.Normal,
91 token.NAME : Colors.Normal,
93 token.ERRORTOKEN : Colors.Red,
92 token.ERRORTOKEN : Colors.Red,
94
93
95 _KEYWORD : Colors.LightGreen,
94 _KEYWORD : Colors.LightGreen,
96 _TEXT : Colors.Yellow,
95 _TEXT : Colors.Yellow,
97
96
98 'normal' : Colors.Normal # color off (usu. Colors.Normal)
97 'normal' : Colors.Normal # color off (usu. Colors.Normal)
99 } )
98 } )
100
99
101 LightBGColors = ColorScheme(
100 LightBGColors = ColorScheme(
102 'LightBG',{
101 'LightBG',{
103 token.NUMBER : Colors.Cyan,
102 token.NUMBER : Colors.Cyan,
104 token.OP : Colors.Blue,
103 token.OP : Colors.Blue,
105 token.STRING : Colors.Blue,
104 token.STRING : Colors.Blue,
106 tokenize.COMMENT : Colors.Red,
105 tokenize.COMMENT : Colors.Red,
107 token.NAME : Colors.Normal,
106 token.NAME : Colors.Normal,
108 token.ERRORTOKEN : Colors.Red,
107 token.ERRORTOKEN : Colors.Red,
109
108
110 _KEYWORD : Colors.Green,
109 _KEYWORD : Colors.Green,
111 _TEXT : Colors.Blue,
110 _TEXT : Colors.Blue,
112
111
113 'normal' : Colors.Normal # color off (usu. Colors.Normal)
112 'normal' : Colors.Normal # color off (usu. Colors.Normal)
114 } )
113 } )
115
114
116 # Build table of color schemes (needed by the parser)
115 # Build table of color schemes (needed by the parser)
117 ANSICodeColors = ColorSchemeTable([NoColor,LinuxColors,LightBGColors],
116 ANSICodeColors = ColorSchemeTable([NoColor,LinuxColors,LightBGColors],
118 _scheme_default)
117 _scheme_default)
119
118
120 class Parser:
119 class Parser:
121 """ Format colored Python source.
120 """ Format colored Python source.
122 """
121 """
123
122
124 def __init__(self, color_table=None,out = sys.stdout):
123 def __init__(self, color_table=None,out = sys.stdout):
125 """ Create a parser with a specified color table and output channel.
124 """ Create a parser with a specified color table and output channel.
126
125
127 Call format() to process code.
126 Call format() to process code.
128 """
127 """
129 self.color_table = color_table and color_table or ANSICodeColors
128 self.color_table = color_table and color_table or ANSICodeColors
130 self.out = out
129 self.out = out
131
130
132 def format(self, raw, out = None, scheme = ''):
131 def format(self, raw, out = None, scheme = ''):
133 return self.format2(raw, out, scheme)[0]
132 return self.format2(raw, out, scheme)[0]
134
133
135 def format2(self, raw, out = None, scheme = ''):
134 def format2(self, raw, out = None, scheme = ''):
136 """ Parse and send the colored source.
135 """ Parse and send the colored source.
137
136
138 If out and scheme are not specified, the defaults (given to
137 If out and scheme are not specified, the defaults (given to
139 constructor) are used.
138 constructor) are used.
140
139
141 out should be a file-type object. Optionally, out can be given as the
140 out should be a file-type object. Optionally, out can be given as the
142 string 'str' and the parser will automatically return the output in a
141 string 'str' and the parser will automatically return the output in a
143 string."""
142 string."""
144
143
145 string_output = 0
144 string_output = 0
146 if out == 'str' or self.out == 'str' or \
145 if out == 'str' or self.out == 'str' or \
147 isinstance(self.out,StringIO.StringIO):
146 isinstance(self.out,StringIO.StringIO):
148 # XXX - I don't really like this state handling logic, but at this
147 # XXX - I don't really like this state handling logic, but at this
149 # point I don't want to make major changes, so adding the
148 # point I don't want to make major changes, so adding the
150 # isinstance() check is the simplest I can do to ensure correct
149 # isinstance() check is the simplest I can do to ensure correct
151 # behavior.
150 # behavior.
152 out_old = self.out
151 out_old = self.out
153 self.out = StringIO.StringIO()
152 self.out = StringIO.StringIO()
154 string_output = 1
153 string_output = 1
155 elif out is not None:
154 elif out is not None:
156 self.out = out
155 self.out = out
157
156
158 # Fast return of the unmodified input for NoColor scheme
157 # Fast return of the unmodified input for NoColor scheme
159 if scheme == 'NoColor':
158 if scheme == 'NoColor':
160 error = False
159 error = False
161 self.out.write(raw)
160 self.out.write(raw)
162 if string_output:
161 if string_output:
163 return raw,error
162 return raw,error
164 else:
163 else:
165 return None,error
164 return None,error
166
165
167 # local shorthands
166 # local shorthands
168 colors = self.color_table[scheme].colors
167 colors = self.color_table[scheme].colors
169 self.colors = colors # put in object so __call__ sees it
168 self.colors = colors # put in object so __call__ sees it
170
169
171 # Remove trailing whitespace and normalize tabs
170 # Remove trailing whitespace and normalize tabs
172 self.raw = raw.expandtabs().rstrip()
171 self.raw = raw.expandtabs().rstrip()
173
172
174 # store line offsets in self.lines
173 # store line offsets in self.lines
175 self.lines = [0, 0]
174 self.lines = [0, 0]
176 pos = 0
175 pos = 0
177 raw_find = self.raw.find
176 raw_find = self.raw.find
178 lines_append = self.lines.append
177 lines_append = self.lines.append
179 while 1:
178 while 1:
180 pos = raw_find('\n', pos) + 1
179 pos = raw_find('\n', pos) + 1
181 if not pos: break
180 if not pos: break
182 lines_append(pos)
181 lines_append(pos)
183 lines_append(len(self.raw))
182 lines_append(len(self.raw))
184
183
185 # parse the source and write it
184 # parse the source and write it
186 self.pos = 0
185 self.pos = 0
187 text = StringIO.StringIO(self.raw)
186 text = StringIO.StringIO(self.raw)
188
187
189 error = False
188 error = False
190 try:
189 try:
191 for atoken in generate_tokens(text.readline):
190 for atoken in generate_tokens(text.readline):
192 self(*atoken)
191 self(*atoken)
193 except tokenize.TokenError as ex:
192 except tokenize.TokenError as ex:
194 msg = ex.args[0]
193 msg = ex.args[0]
195 line = ex.args[1][0]
194 line = ex.args[1][0]
196 self.out.write("%s\n\n*** ERROR: %s%s%s\n" %
195 self.out.write("%s\n\n*** ERROR: %s%s%s\n" %
197 (colors[token.ERRORTOKEN],
196 (colors[token.ERRORTOKEN],
198 msg, self.raw[self.lines[line]:],
197 msg, self.raw[self.lines[line]:],
199 colors.normal)
198 colors.normal)
200 )
199 )
201 error = True
200 error = True
202 self.out.write(colors.normal+'\n')
201 self.out.write(colors.normal+'\n')
203 if string_output:
202 if string_output:
204 output = self.out.getvalue()
203 output = self.out.getvalue()
205 self.out = out_old
204 self.out = out_old
206 return (output, error)
205 return (output, error)
207 return (None, error)
206 return (None, error)
208
207
209 def __call__(self, toktype, toktext, start_pos, end_pos, line):
208 def __call__(self, toktype, toktext, start_pos, end_pos, line):
210 """ Token handler, with syntax highlighting."""
209 """ Token handler, with syntax highlighting."""
211 (srow,scol) = start_pos
210 (srow,scol) = start_pos
212 (erow,ecol) = end_pos
211 (erow,ecol) = end_pos
213 colors = self.colors
212 colors = self.colors
214 owrite = self.out.write
213 owrite = self.out.write
215
214
216 # line separator, so this works across platforms
215 # line separator, so this works across platforms
217 linesep = os.linesep
216 linesep = os.linesep
218
217
219 # calculate new positions
218 # calculate new positions
220 oldpos = self.pos
219 oldpos = self.pos
221 newpos = self.lines[srow] + scol
220 newpos = self.lines[srow] + scol
222 self.pos = newpos + len(toktext)
221 self.pos = newpos + len(toktext)
223
222
224 # send the original whitespace, if needed
223 # send the original whitespace, if needed
225 if newpos > oldpos:
224 if newpos > oldpos:
226 owrite(self.raw[oldpos:newpos])
225 owrite(self.raw[oldpos:newpos])
227
226
228 # skip indenting tokens
227 # skip indenting tokens
229 if toktype in [token.INDENT, token.DEDENT]:
228 if toktype in [token.INDENT, token.DEDENT]:
230 self.pos = newpos
229 self.pos = newpos
231 return
230 return
232
231
233 # map token type to a color group
232 # map token type to a color group
234 if token.LPAR <= toktype and toktype <= token.OP:
233 if token.LPAR <= toktype and toktype <= token.OP:
235 toktype = token.OP
234 toktype = token.OP
236 elif toktype == token.NAME and keyword.iskeyword(toktext):
235 elif toktype == token.NAME and keyword.iskeyword(toktext):
237 toktype = _KEYWORD
236 toktype = _KEYWORD
238 color = colors.get(toktype, colors[_TEXT])
237 color = colors.get(toktype, colors[_TEXT])
239
238
240 #print '<%s>' % toktext, # dbg
239 #print '<%s>' % toktext, # dbg
241
240
242 # Triple quoted strings must be handled carefully so that backtracking
241 # Triple quoted strings must be handled carefully so that backtracking
243 # in pagers works correctly. We need color terminators on _each_ line.
242 # in pagers works correctly. We need color terminators on _each_ line.
244 if linesep in toktext:
243 if linesep in toktext:
245 toktext = toktext.replace(linesep, '%s%s%s' %
244 toktext = toktext.replace(linesep, '%s%s%s' %
246 (colors.normal,linesep,color))
245 (colors.normal,linesep,color))
247
246
248 # send text
247 # send text
249 owrite('%s%s%s' % (color,toktext,colors.normal))
248 owrite('%s%s%s' % (color,toktext,colors.normal))
250
249
251 def main(argv=None):
250 def main(argv=None):
252 """Run as a command-line script: colorize a python file or stdin using ANSI
251 """Run as a command-line script: colorize a python file or stdin using ANSI
253 color escapes and print to stdout.
252 color escapes and print to stdout.
254
253
255 Inputs:
254 Inputs:
256
255
257 - argv(None): a list of strings like sys.argv[1:] giving the command-line
256 - argv(None): a list of strings like sys.argv[1:] giving the command-line
258 arguments. If None, use sys.argv[1:].
257 arguments. If None, use sys.argv[1:].
259 """
258 """
260
259
261 usage_msg = """%prog [options] [filename]
260 usage_msg = """%prog [options] [filename]
262
261
263 Colorize a python file or stdin using ANSI color escapes and print to stdout.
262 Colorize a python file or stdin using ANSI color escapes and print to stdout.
264 If no filename is given, or if filename is -, read standard input."""
263 If no filename is given, or if filename is -, read standard input."""
265
264
265 import optparse
266 parser = optparse.OptionParser(usage=usage_msg)
266 parser = optparse.OptionParser(usage=usage_msg)
267 newopt = parser.add_option
267 newopt = parser.add_option
268 newopt('-s','--scheme',metavar='NAME',dest='scheme_name',action='store',
268 newopt('-s','--scheme',metavar='NAME',dest='scheme_name',action='store',
269 choices=['Linux','LightBG','NoColor'],default=_scheme_default,
269 choices=['Linux','LightBG','NoColor'],default=_scheme_default,
270 help="give the color scheme to use. Currently only 'Linux'\
270 help="give the color scheme to use. Currently only 'Linux'\
271 (default) and 'LightBG' and 'NoColor' are implemented (give without\
271 (default) and 'LightBG' and 'NoColor' are implemented (give without\
272 quotes)")
272 quotes)")
273
273
274 opts,args = parser.parse_args(argv)
274 opts,args = parser.parse_args(argv)
275
275
276 if len(args) > 1:
276 if len(args) > 1:
277 parser.error("you must give at most one filename.")
277 parser.error("you must give at most one filename.")
278
278
279 if len(args) == 0:
279 if len(args) == 0:
280 fname = '-' # no filename given; setup to read from stdin
280 fname = '-' # no filename given; setup to read from stdin
281 else:
281 else:
282 fname = args[0]
282 fname = args[0]
283
283
284 if fname == '-':
284 if fname == '-':
285 stream = sys.stdin
285 stream = sys.stdin
286 else:
286 else:
287 try:
287 try:
288 stream = open(fname)
288 stream = open(fname)
289 except IOError as msg:
289 except IOError as msg:
290 print(msg, file=sys.stderr)
290 print(msg, file=sys.stderr)
291 sys.exit(1)
291 sys.exit(1)
292
292
293 parser = Parser()
293 parser = Parser()
294
294
295 # we need nested try blocks because pre-2.5 python doesn't support unified
295 # we need nested try blocks because pre-2.5 python doesn't support unified
296 # try-except-finally
296 # try-except-finally
297 try:
297 try:
298 try:
298 try:
299 # write colorized version to stdout
299 # write colorized version to stdout
300 parser.format(stream.read(),scheme=opts.scheme_name)
300 parser.format(stream.read(),scheme=opts.scheme_name)
301 except IOError as msg:
301 except IOError as msg:
302 # if user reads through a pager and quits, don't print traceback
302 # if user reads through a pager and quits, don't print traceback
303 if msg.args != (32,'Broken pipe'):
303 if msg.args != (32,'Broken pipe'):
304 raise
304 raise
305 finally:
305 finally:
306 if stream is not sys.stdin:
306 if stream is not sys.stdin:
307 stream.close() # in case a non-handled exception happened above
307 stream.close() # in case a non-handled exception happened above
308
308
309 if __name__ == "__main__":
309 if __name__ == "__main__":
310 main()
310 main()
General Comments 0
You need to be logged in to leave comments. Login now