##// END OF EJS Templates
remove code adding an extra newline
ernie french -
Show More
@@ -1,311 +1,306 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 """
2 """
3 Class and program to colorize python source code for ANSI terminals.
3 Class and program to colorize python source code for ANSI terminals.
4
4
5 Based on an HTML code highlighter by Jurgen Hermann found at:
5 Based on an HTML code highlighter by Jurgen Hermann found at:
6 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52298
6 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52298
7
7
8 Modifications by Fernando Perez (fperez@colorado.edu).
8 Modifications by Fernando Perez (fperez@colorado.edu).
9
9
10 Information on the original HTML highlighter follows:
10 Information on the original HTML highlighter follows:
11
11
12 MoinMoin - Python Source Parser
12 MoinMoin - Python Source Parser
13
13
14 Title: Colorize Python source using the built-in tokenizer
14 Title: Colorize Python source using the built-in tokenizer
15
15
16 Submitter: Jurgen Hermann
16 Submitter: Jurgen Hermann
17 Last Updated:2001/04/06
17 Last Updated:2001/04/06
18
18
19 Version no:1.2
19 Version no:1.2
20
20
21 Description:
21 Description:
22
22
23 This code is part of MoinMoin (http://moin.sourceforge.net/) and converts
23 This code is part of MoinMoin (http://moin.sourceforge.net/) and converts
24 Python source code to HTML markup, rendering comments, keywords,
24 Python source code to HTML markup, rendering comments, keywords,
25 operators, numeric and string literals in different colors.
25 operators, numeric and string literals in different colors.
26
26
27 It shows how to use the built-in keyword, token and tokenize modules to
27 It shows how to use the built-in keyword, token and tokenize modules to
28 scan Python source code and re-emit it with no changes to its original
28 scan Python source code and re-emit it with no changes to its original
29 formatting (which is the hard part).
29 formatting (which is the hard part).
30 """
30 """
31
31
32 __all__ = ['ANSICodeColors','Parser']
32 __all__ = ['ANSICodeColors','Parser']
33
33
34 _scheme_default = 'Linux'
34 _scheme_default = 'Linux'
35
35
36 # Imports
36 # Imports
37 import StringIO
37 import StringIO
38 import keyword
38 import keyword
39 import os
39 import os
40 import optparse
40 import optparse
41 import sys
41 import sys
42 import token
42 import token
43 import tokenize
43 import tokenize
44
44
45 try:
45 try:
46 generate_tokens = tokenize.generate_tokens
46 generate_tokens = tokenize.generate_tokens
47 except AttributeError:
47 except AttributeError:
48 # Python 3. Note that we use the undocumented _tokenize because it expects
48 # Python 3. Note that we use the undocumented _tokenize because it expects
49 # strings, not bytes. See also Python issue #9969.
49 # strings, not bytes. See also Python issue #9969.
50 generate_tokens = tokenize._tokenize
50 generate_tokens = tokenize._tokenize
51
51
52 from IPython.utils.coloransi import *
52 from IPython.utils.coloransi import *
53
53
54 #############################################################################
54 #############################################################################
55 ### Python Source Parser (does Hilighting)
55 ### Python Source Parser (does Hilighting)
56 #############################################################################
56 #############################################################################
57
57
58 _KEYWORD = token.NT_OFFSET + 1
58 _KEYWORD = token.NT_OFFSET + 1
59 _TEXT = token.NT_OFFSET + 2
59 _TEXT = token.NT_OFFSET + 2
60
60
61 #****************************************************************************
61 #****************************************************************************
62 # Builtin color schemes
62 # Builtin color schemes
63
63
64 Colors = TermColors # just a shorthand
64 Colors = TermColors # just a shorthand
65
65
66 # Build a few color schemes
66 # Build a few color schemes
67 NoColor = ColorScheme(
67 NoColor = ColorScheme(
68 'NoColor',{
68 'NoColor',{
69 token.NUMBER : Colors.NoColor,
69 token.NUMBER : Colors.NoColor,
70 token.OP : Colors.NoColor,
70 token.OP : Colors.NoColor,
71 token.STRING : Colors.NoColor,
71 token.STRING : Colors.NoColor,
72 tokenize.COMMENT : Colors.NoColor,
72 tokenize.COMMENT : Colors.NoColor,
73 token.NAME : Colors.NoColor,
73 token.NAME : Colors.NoColor,
74 token.ERRORTOKEN : Colors.NoColor,
74 token.ERRORTOKEN : Colors.NoColor,
75
75
76 _KEYWORD : Colors.NoColor,
76 _KEYWORD : Colors.NoColor,
77 _TEXT : Colors.NoColor,
77 _TEXT : Colors.NoColor,
78
78
79 'normal' : Colors.NoColor # color off (usu. Colors.Normal)
79 'normal' : Colors.NoColor # color off (usu. Colors.Normal)
80 } )
80 } )
81
81
82 LinuxColors = ColorScheme(
82 LinuxColors = ColorScheme(
83 'Linux',{
83 'Linux',{
84 token.NUMBER : Colors.LightCyan,
84 token.NUMBER : Colors.LightCyan,
85 token.OP : Colors.Yellow,
85 token.OP : Colors.Yellow,
86 token.STRING : Colors.LightBlue,
86 token.STRING : Colors.LightBlue,
87 tokenize.COMMENT : Colors.LightRed,
87 tokenize.COMMENT : Colors.LightRed,
88 token.NAME : Colors.Normal,
88 token.NAME : Colors.Normal,
89 token.ERRORTOKEN : Colors.Red,
89 token.ERRORTOKEN : Colors.Red,
90
90
91 _KEYWORD : Colors.LightGreen,
91 _KEYWORD : Colors.LightGreen,
92 _TEXT : Colors.Yellow,
92 _TEXT : Colors.Yellow,
93
93
94 'normal' : Colors.Normal # color off (usu. Colors.Normal)
94 'normal' : Colors.Normal # color off (usu. Colors.Normal)
95 } )
95 } )
96
96
97 LightBGColors = ColorScheme(
97 LightBGColors = ColorScheme(
98 'LightBG',{
98 'LightBG',{
99 token.NUMBER : Colors.Cyan,
99 token.NUMBER : Colors.Cyan,
100 token.OP : Colors.Blue,
100 token.OP : Colors.Blue,
101 token.STRING : Colors.Blue,
101 token.STRING : Colors.Blue,
102 tokenize.COMMENT : Colors.Red,
102 tokenize.COMMENT : Colors.Red,
103 token.NAME : Colors.Normal,
103 token.NAME : Colors.Normal,
104 token.ERRORTOKEN : Colors.Red,
104 token.ERRORTOKEN : Colors.Red,
105
105
106 _KEYWORD : Colors.Green,
106 _KEYWORD : Colors.Green,
107 _TEXT : Colors.Blue,
107 _TEXT : Colors.Blue,
108
108
109 'normal' : Colors.Normal # color off (usu. Colors.Normal)
109 'normal' : Colors.Normal # color off (usu. Colors.Normal)
110 } )
110 } )
111
111
112 # Build table of color schemes (needed by the parser)
112 # Build table of color schemes (needed by the parser)
113 ANSICodeColors = ColorSchemeTable([NoColor,LinuxColors,LightBGColors],
113 ANSICodeColors = ColorSchemeTable([NoColor,LinuxColors,LightBGColors],
114 _scheme_default)
114 _scheme_default)
115
115
116 class Parser:
116 class Parser:
117 """ Format colored Python source.
117 """ Format colored Python source.
118 """
118 """
119
119
120 def __init__(self, color_table=None,out = sys.stdout):
120 def __init__(self, color_table=None,out = sys.stdout):
121 """ Create a parser with a specified color table and output channel.
121 """ Create a parser with a specified color table and output channel.
122
122
123 Call format() to process code.
123 Call format() to process code.
124 """
124 """
125 self.color_table = color_table and color_table or ANSICodeColors
125 self.color_table = color_table and color_table or ANSICodeColors
126 self.out = out
126 self.out = out
127
127
128 def format(self, raw, out = None, scheme = ''):
128 def format(self, raw, out = None, scheme = ''):
129 return self.format2(raw, out, scheme)[0]
129 return self.format2(raw, out, scheme)[0]
130
130
131 def format2(self, raw, out = None, scheme = ''):
131 def format2(self, raw, out = None, scheme = ''):
132 """ Parse and send the colored source.
132 """ Parse and send the colored source.
133
133
134 If out and scheme are not specified, the defaults (given to
134 If out and scheme are not specified, the defaults (given to
135 constructor) are used.
135 constructor) are used.
136
136
137 out should be a file-type object. Optionally, out can be given as the
137 out should be a file-type object. Optionally, out can be given as the
138 string 'str' and the parser will automatically return the output in a
138 string 'str' and the parser will automatically return the output in a
139 string."""
139 string."""
140
140
141 string_output = 0
141 string_output = 0
142 if out == 'str' or self.out == 'str' or \
142 if out == 'str' or self.out == 'str' or \
143 isinstance(self.out,StringIO.StringIO):
143 isinstance(self.out,StringIO.StringIO):
144 # XXX - I don't really like this state handling logic, but at this
144 # XXX - I don't really like this state handling logic, but at this
145 # point I don't want to make major changes, so adding the
145 # point I don't want to make major changes, so adding the
146 # isinstance() check is the simplest I can do to ensure correct
146 # isinstance() check is the simplest I can do to ensure correct
147 # behavior.
147 # behavior.
148 out_old = self.out
148 out_old = self.out
149 self.out = StringIO.StringIO()
149 self.out = StringIO.StringIO()
150 string_output = 1
150 string_output = 1
151 elif out is not None:
151 elif out is not None:
152 self.out = out
152 self.out = out
153
153
154 # Fast return of the unmodified input for NoColor scheme
154 # Fast return of the unmodified input for NoColor scheme
155 if scheme == 'NoColor':
155 if scheme == 'NoColor':
156 error = False
156 error = False
157 self.out.write(raw)
157 self.out.write(raw)
158 if string_output:
158 if string_output:
159 return raw,error
159 return raw,error
160 else:
160 else:
161 return None,error
161 return None,error
162
162
163 # local shorthands
163 # local shorthands
164 colors = self.color_table[scheme].colors
164 colors = self.color_table[scheme].colors
165 self.colors = colors # put in object so __call__ sees it
165 self.colors = colors # put in object so __call__ sees it
166
166
167 # Remove trailing whitespace and normalize tabs
167 # Remove trailing whitespace and normalize tabs
168 self.raw = raw.expandtabs().rstrip()
168 self.raw = raw.expandtabs().rstrip()
169
169
170 # store line offsets in self.lines
170 # store line offsets in self.lines
171 self.lines = [0, 0]
171 self.lines = [0, 0]
172 pos = 0
172 pos = 0
173 raw_find = self.raw.find
173 raw_find = self.raw.find
174 lines_append = self.lines.append
174 lines_append = self.lines.append
175 while 1:
175 while 1:
176 pos = raw_find('\n', pos) + 1
176 pos = raw_find('\n', pos) + 1
177 if not pos: break
177 if not pos: break
178 lines_append(pos)
178 lines_append(pos)
179 lines_append(len(self.raw))
179 lines_append(len(self.raw))
180
180
181 # parse the source and write it
181 # parse the source and write it
182 self.pos = 0
182 self.pos = 0
183 text = StringIO.StringIO(self.raw)
183 text = StringIO.StringIO(self.raw)
184
184
185 error = False
185 error = False
186 try:
186 try:
187 for atoken in generate_tokens(text.readline):
187 for atoken in generate_tokens(text.readline):
188 self(*atoken)
188 self(*atoken)
189 except tokenize.TokenError as ex:
189 except tokenize.TokenError as ex:
190 msg = ex.args[0]
190 msg = ex.args[0]
191 line = ex.args[1][0]
191 line = ex.args[1][0]
192 self.out.write("%s\n\n*** ERROR: %s%s%s\n" %
192 self.out.write("%s\n\n*** ERROR: %s%s%s\n" %
193 (colors[token.ERRORTOKEN],
193 (colors[token.ERRORTOKEN],
194 msg, self.raw[self.lines[line]:],
194 msg, self.raw[self.lines[line]:],
195 colors.normal)
195 colors.normal)
196 )
196 )
197 error = True
197 error = True
198 self.out.write(colors.normal+'\n')
198 self.out.write(colors.normal+'\n')
199 if string_output:
199 if string_output:
200 output = self.out.getvalue()
200 output = self.out.getvalue()
201 self.out = out_old
201 self.out = out_old
202 return (output, error)
202 return (output, error)
203 return (None, error)
203 return (None, error)
204
204
205 def __call__(self, toktype, toktext, (srow,scol), (erow,ecol), line):
205 def __call__(self, toktype, toktext, (srow,scol), (erow,ecol), line):
206 """ Token handler, with syntax highlighting."""
206 """ Token handler, with syntax highlighting."""
207
207
208 # local shorthands
208 # local shorthands
209 colors = self.colors
209 colors = self.colors
210 owrite = self.out.write
210 owrite = self.out.write
211
211
212 # line separator, so this works across platforms
212 # line separator, so this works across platforms
213 linesep = os.linesep
213 linesep = os.linesep
214
214
215 # calculate new positions
215 # calculate new positions
216 oldpos = self.pos
216 oldpos = self.pos
217 newpos = self.lines[srow] + scol
217 newpos = self.lines[srow] + scol
218 self.pos = newpos + len(toktext)
218 self.pos = newpos + len(toktext)
219
219
220 # handle newlines
221 #if toktype in [token.NEWLINE, tokenize.NL]:
222 # owrite(linesep)
223 # return
224
225 # send the original whitespace, if needed
220 # send the original whitespace, if needed
226 if newpos > oldpos:
221 if newpos > oldpos:
227 owrite(self.raw[oldpos:newpos])
222 owrite(self.raw[oldpos:newpos])
228
223
229 # skip indenting tokens
224 # skip indenting tokens
230 if toktype in [token.INDENT, token.DEDENT]:
225 if toktype in [token.INDENT, token.DEDENT]:
231 self.pos = newpos
226 self.pos = newpos
232 return
227 return
233
228
234 # map token type to a color group
229 # map token type to a color group
235 if token.LPAR <= toktype and toktype <= token.OP:
230 if token.LPAR <= toktype and toktype <= token.OP:
236 toktype = token.OP
231 toktype = token.OP
237 elif toktype == token.NAME and keyword.iskeyword(toktext):
232 elif toktype == token.NAME and keyword.iskeyword(toktext):
238 toktype = _KEYWORD
233 toktype = _KEYWORD
239 color = colors.get(toktype, colors[_TEXT])
234 color = colors.get(toktype, colors[_TEXT])
240
235
241 #print '<%s>' % toktext, # dbg
236 #print '<%s>' % toktext, # dbg
242
237
243 # Triple quoted strings must be handled carefully so that backtracking
238 # Triple quoted strings must be handled carefully so that backtracking
244 # in pagers works correctly. We need color terminators on _each_ line.
239 # in pagers works correctly. We need color terminators on _each_ line.
245 if linesep in toktext:
240 if linesep in toktext:
246 toktext = toktext.replace(linesep, '%s%s%s' %
241 toktext = toktext.replace(linesep, '%s%s%s' %
247 (colors.normal,linesep,color))
242 (colors.normal,linesep,color))
248
243
249 # send text
244 # send text
250 owrite('%s%s%s' % (color,toktext,colors.normal))
245 owrite('%s%s%s' % (color,toktext,colors.normal))
251
246
252 def main(argv=None):
247 def main(argv=None):
253 """Run as a command-line script: colorize a python file or stdin using ANSI
248 """Run as a command-line script: colorize a python file or stdin using ANSI
254 color escapes and print to stdout.
249 color escapes and print to stdout.
255
250
256 Inputs:
251 Inputs:
257
252
258 - argv(None): a list of strings like sys.argv[1:] giving the command-line
253 - argv(None): a list of strings like sys.argv[1:] giving the command-line
259 arguments. If None, use sys.argv[1:].
254 arguments. If None, use sys.argv[1:].
260 """
255 """
261
256
262 usage_msg = """%prog [options] [filename]
257 usage_msg = """%prog [options] [filename]
263
258
264 Colorize a python file or stdin using ANSI color escapes and print to stdout.
259 Colorize a python file or stdin using ANSI color escapes and print to stdout.
265 If no filename is given, or if filename is -, read standard input."""
260 If no filename is given, or if filename is -, read standard input."""
266
261
267 parser = optparse.OptionParser(usage=usage_msg)
262 parser = optparse.OptionParser(usage=usage_msg)
268 newopt = parser.add_option
263 newopt = parser.add_option
269 newopt('-s','--scheme',metavar='NAME',dest='scheme_name',action='store',
264 newopt('-s','--scheme',metavar='NAME',dest='scheme_name',action='store',
270 choices=['Linux','LightBG','NoColor'],default=_scheme_default,
265 choices=['Linux','LightBG','NoColor'],default=_scheme_default,
271 help="give the color scheme to use. Currently only 'Linux'\
266 help="give the color scheme to use. Currently only 'Linux'\
272 (default) and 'LightBG' and 'NoColor' are implemented (give without\
267 (default) and 'LightBG' and 'NoColor' are implemented (give without\
273 quotes)")
268 quotes)")
274
269
275 opts,args = parser.parse_args(argv)
270 opts,args = parser.parse_args(argv)
276
271
277 if len(args) > 1:
272 if len(args) > 1:
278 parser.error("you must give at most one filename.")
273 parser.error("you must give at most one filename.")
279
274
280 if len(args) == 0:
275 if len(args) == 0:
281 fname = '-' # no filename given; setup to read from stdin
276 fname = '-' # no filename given; setup to read from stdin
282 else:
277 else:
283 fname = args[0]
278 fname = args[0]
284
279
285 if fname == '-':
280 if fname == '-':
286 stream = sys.stdin
281 stream = sys.stdin
287 else:
282 else:
288 try:
283 try:
289 stream = file(fname)
284 stream = file(fname)
290 except IOError,msg:
285 except IOError,msg:
291 print >> sys.stderr, msg
286 print >> sys.stderr, msg
292 sys.exit(1)
287 sys.exit(1)
293
288
294 parser = Parser()
289 parser = Parser()
295
290
296 # we need nested try blocks because pre-2.5 python doesn't support unified
291 # we need nested try blocks because pre-2.5 python doesn't support unified
297 # try-except-finally
292 # try-except-finally
298 try:
293 try:
299 try:
294 try:
300 # write colorized version to stdout
295 # write colorized version to stdout
301 parser.format(stream.read(),scheme=opts.scheme_name)
296 parser.format(stream.read(),scheme=opts.scheme_name)
302 except IOError,msg:
297 except IOError,msg:
303 # if user reads through a pager and quits, don't print traceback
298 # if user reads through a pager and quits, don't print traceback
304 if msg.args != (32,'Broken pipe'):
299 if msg.args != (32,'Broken pipe'):
305 raise
300 raise
306 finally:
301 finally:
307 if stream is not sys.stdin:
302 if stream is not sys.stdin:
308 stream.close() # in case a non-handled exception happened above
303 stream.close() # in case a non-handled exception happened above
309
304
310 if __name__ == "__main__":
305 if __name__ == "__main__":
311 main()
306 main()
General Comments 0
You need to be logged in to leave comments. Login now