##// END OF EJS Templates
Some cleanup of Pycolorize....
Matthias Bussonnier -
Show More
@@ -1,331 +1,331 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 """
2 """
3 Class and program to colorize python source code for ANSI terminals.
3 Class and program to colorize python source code for ANSI terminals.
4
4
5 Based on an HTML code highlighter by Jurgen Hermann found at:
5 Based on an HTML code highlighter by Jurgen Hermann found at:
6 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52298
6 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52298
7
7
8 Modifications by Fernando Perez (fperez@colorado.edu).
8 Modifications by Fernando Perez (fperez@colorado.edu).
9
9
10 Information on the original HTML highlighter follows:
10 Information on the original HTML highlighter follows:
11
11
12 MoinMoin - Python Source Parser
12 MoinMoin - Python Source Parser
13
13
14 Title: Colorize Python source using the built-in tokenizer
14 Title: Colorize Python source using the built-in tokenizer
15
15
16 Submitter: Jurgen Hermann
16 Submitter: Jurgen Hermann
17 Last Updated:2001/04/06
17 Last Updated:2001/04/06
18
18
19 Version no:1.2
19 Version no:1.2
20
20
21 Description:
21 Description:
22
22
23 This code is part of MoinMoin (http://moin.sourceforge.net/) and converts
23 This code is part of MoinMoin (http://moin.sourceforge.net/) and converts
24 Python source code to HTML markup, rendering comments, keywords,
24 Python source code to HTML markup, rendering comments, keywords,
25 operators, numeric and string literals in different colors.
25 operators, numeric and string literals in different colors.
26
26
27 It shows how to use the built-in keyword, token and tokenize modules to
27 It shows how to use the built-in keyword, token and tokenize modules to
28 scan Python source code and re-emit it with no changes to its original
28 scan Python source code and re-emit it with no changes to its original
29 formatting (which is the hard part).
29 formatting (which is the hard part).
30 """
30 """
31
31
32 __all__ = ['ANSICodeColors','Parser']
32 __all__ = ['ANSICodeColors', 'Parser']
33
33
34 _scheme_default = 'Linux'
34 _scheme_default = 'Linux'
35
35
36
36
37 # Imports
37 # Imports
38 import keyword
38 import keyword
39 import os
39 import os
40 import sys
40 import sys
41 import token
41 import token
42 import tokenize
42 import tokenize
43
43
44 generate_tokens = tokenize.generate_tokens
44 generate_tokens = tokenize.generate_tokens
45
45
46 from IPython.utils.coloransi import TermColors, InputTermColors ,ColorScheme, ColorSchemeTable
46 from IPython.utils.coloransi import TermColors, InputTermColors,ColorScheme, ColorSchemeTable
47 from .colorable import Colorable
47 from .colorable import Colorable
48 from io import StringIO
48 from io import StringIO
49
49
50 #############################################################################
50 #############################################################################
51 ### Python Source Parser (does Highlighting)
51 ### Python Source Parser (does Highlighting)
52 #############################################################################
52 #############################################################################
53
53
54 _KEYWORD = token.NT_OFFSET + 1
54 _KEYWORD = token.NT_OFFSET + 1
55 _TEXT = token.NT_OFFSET + 2
55 _TEXT = token.NT_OFFSET + 2
56
56
57 #****************************************************************************
57 #****************************************************************************
58 # Builtin color schemes
58 # Builtin color schemes
59
59
60 Colors = TermColors # just a shorthand
60 Colors = TermColors # just a shorthand
61
61
62 # Build a few color schemes
62 # Build a few color schemes
63 NoColor = ColorScheme(
63 NoColor = ColorScheme(
64 'NoColor',{
64 'NoColor',{
65 'header' : Colors.NoColor,
65 'header' : Colors.NoColor,
66 token.NUMBER : Colors.NoColor,
66 token.NUMBER : Colors.NoColor,
67 token.OP : Colors.NoColor,
67 token.OP : Colors.NoColor,
68 token.STRING : Colors.NoColor,
68 token.STRING : Colors.NoColor,
69 tokenize.COMMENT : Colors.NoColor,
69 tokenize.COMMENT : Colors.NoColor,
70 token.NAME : Colors.NoColor,
70 token.NAME : Colors.NoColor,
71 token.ERRORTOKEN : Colors.NoColor,
71 token.ERRORTOKEN : Colors.NoColor,
72
72
73 _KEYWORD : Colors.NoColor,
73 _KEYWORD : Colors.NoColor,
74 _TEXT : Colors.NoColor,
74 _TEXT : Colors.NoColor,
75
75
76 'in_prompt' : InputTermColors.NoColor, # Input prompt
76 'in_prompt' : InputTermColors.NoColor, # Input prompt
77 'in_number' : InputTermColors.NoColor, # Input prompt number
77 'in_number' : InputTermColors.NoColor, # Input prompt number
78 'in_prompt2' : InputTermColors.NoColor, # Continuation prompt
78 'in_prompt2' : InputTermColors.NoColor, # Continuation prompt
79 'in_normal' : InputTermColors.NoColor, # color off (usu. Colors.Normal)
79 'in_normal' : InputTermColors.NoColor, # color off (usu. Colors.Normal)
80
80
81 'out_prompt' : Colors.NoColor, # Output prompt
81 'out_prompt' : Colors.NoColor, # Output prompt
82 'out_number' : Colors.NoColor, # Output prompt number
82 'out_number' : Colors.NoColor, # Output prompt number
83
83
84 'normal' : Colors.NoColor # color off (usu. Colors.Normal)
84 'normal' : Colors.NoColor # color off (usu. Colors.Normal)
85 } )
85 } )
86
86
87 LinuxColors = ColorScheme(
87 LinuxColors = ColorScheme(
88 'Linux',{
88 'Linux',{
89 'header' : Colors.LightRed,
89 'header' : Colors.LightRed,
90 token.NUMBER : Colors.LightCyan,
90 token.NUMBER : Colors.LightCyan,
91 token.OP : Colors.Yellow,
91 token.OP : Colors.Yellow,
92 token.STRING : Colors.LightBlue,
92 token.STRING : Colors.LightBlue,
93 tokenize.COMMENT : Colors.LightRed,
93 tokenize.COMMENT : Colors.LightRed,
94 token.NAME : Colors.Normal,
94 token.NAME : Colors.Normal,
95 token.ERRORTOKEN : Colors.Red,
95 token.ERRORTOKEN : Colors.Red,
96
96
97 _KEYWORD : Colors.LightGreen,
97 _KEYWORD : Colors.LightGreen,
98 _TEXT : Colors.Yellow,
98 _TEXT : Colors.Yellow,
99
99
100 'in_prompt' : InputTermColors.Green,
100 'in_prompt' : InputTermColors.Green,
101 'in_number' : InputTermColors.LightGreen,
101 'in_number' : InputTermColors.LightGreen,
102 'in_prompt2' : InputTermColors.Green,
102 'in_prompt2' : InputTermColors.Green,
103 'in_normal' : InputTermColors.Normal, # color off (usu. Colors.Normal)
103 'in_normal' : InputTermColors.Normal, # color off (usu. Colors.Normal)
104
104
105 'out_prompt' : Colors.Red,
105 'out_prompt' : Colors.Red,
106 'out_number' : Colors.LightRed,
106 'out_number' : Colors.LightRed,
107
107
108 'normal' : Colors.Normal # color off (usu. Colors.Normal)
108 'normal' : Colors.Normal # color off (usu. Colors.Normal)
109 } )
109 } )
110
110
111 NeutralColors = ColorScheme(
111 NeutralColors = ColorScheme(
112 'Neutral',{
112 'Neutral',{
113 'header' : Colors.Red,
113 'header' : Colors.Red,
114 token.NUMBER : Colors.Cyan,
114 token.NUMBER : Colors.Cyan,
115 token.OP : Colors.Blue,
115 token.OP : Colors.Blue,
116 token.STRING : Colors.Blue,
116 token.STRING : Colors.Blue,
117 tokenize.COMMENT : Colors.Red,
117 tokenize.COMMENT : Colors.Red,
118 token.NAME : Colors.Normal,
118 token.NAME : Colors.Normal,
119 token.ERRORTOKEN : Colors.Red,
119 token.ERRORTOKEN : Colors.Red,
120
120
121 _KEYWORD : Colors.Green,
121 _KEYWORD : Colors.Green,
122 _TEXT : Colors.Blue,
122 _TEXT : Colors.Blue,
123
123
124 'in_prompt' : InputTermColors.Blue,
124 'in_prompt' : InputTermColors.Blue,
125 'in_number' : InputTermColors.LightBlue,
125 'in_number' : InputTermColors.LightBlue,
126 'in_prompt2' : InputTermColors.Blue,
126 'in_prompt2' : InputTermColors.Blue,
127 'in_normal' : InputTermColors.Normal, # color off (usu. Colors.Normal)
127 'in_normal' : InputTermColors.Normal, # color off (usu. Colors.Normal)
128
128
129 'out_prompt' : Colors.Red,
129 'out_prompt' : Colors.Red,
130 'out_number' : Colors.LightRed,
130 'out_number' : Colors.LightRed,
131
131
132 'normal' : Colors.Normal # color off (usu. Colors.Normal)
132 'normal' : Colors.Normal # color off (usu. Colors.Normal)
133 } )
133 } )
134
134
135 # Hack: the 'neutral' colours are not very visible on a dark background on
135 # Hack: the 'neutral' colours are not very visible on a dark background on
136 # Windows. Since Windows command prompts have a dark background by default, and
136 # Windows. Since Windows command prompts have a dark background by default, and
137 # relatively few users are likely to alter that, we will use the 'Linux' colours,
137 # relatively few users are likely to alter that, we will use the 'Linux' colours,
138 # designed for a dark background, as the default on Windows. Changing it here
138 # designed for a dark background, as the default on Windows. Changing it here
139 # avoids affecting the prompt colours rendered by prompt_toolkit, where the
139 # avoids affecting the prompt colours rendered by prompt_toolkit, where the
140 # neutral defaults do work OK.
140 # neutral defaults do work OK.
141
141
142 if os.name == 'nt':
142 if os.name == 'nt':
143 NeutralColors = LinuxColors.copy(name='Neutral')
143 NeutralColors = LinuxColors.copy(name='Neutral')
144
144
145 LightBGColors = ColorScheme(
145 LightBGColors = ColorScheme(
146 'LightBG',{
146 'LightBG',{
147 'header' : Colors.Red,
147 'header' : Colors.Red,
148 token.NUMBER : Colors.Cyan,
148 token.NUMBER : Colors.Cyan,
149 token.OP : Colors.Blue,
149 token.OP : Colors.Blue,
150 token.STRING : Colors.Blue,
150 token.STRING : Colors.Blue,
151 tokenize.COMMENT : Colors.Red,
151 tokenize.COMMENT : Colors.Red,
152 token.NAME : Colors.Normal,
152 token.NAME : Colors.Normal,
153 token.ERRORTOKEN : Colors.Red,
153 token.ERRORTOKEN : Colors.Red,
154
154
155
155
156 _KEYWORD : Colors.Green,
156 _KEYWORD : Colors.Green,
157 _TEXT : Colors.Blue,
157 _TEXT : Colors.Blue,
158
158
159 'in_prompt' : InputTermColors.Blue,
159 'in_prompt' : InputTermColors.Blue,
160 'in_number' : InputTermColors.LightBlue,
160 'in_number' : InputTermColors.LightBlue,
161 'in_prompt2' : InputTermColors.Blue,
161 'in_prompt2' : InputTermColors.Blue,
162 'in_normal' : InputTermColors.Normal, # color off (usu. Colors.Normal)
162 'in_normal' : InputTermColors.Normal, # color off (usu. Colors.Normal)
163
163
164 'out_prompt' : Colors.Red,
164 'out_prompt' : Colors.Red,
165 'out_number' : Colors.LightRed,
165 'out_number' : Colors.LightRed,
166
166
167 'normal' : Colors.Normal # color off (usu. Colors.Normal)
167 'normal' : Colors.Normal # color off (usu. Colors.Normal)
168 } )
168 } )
169
169
170 # Build table of color schemes (needed by the parser)
170 # Build table of color schemes (needed by the parser)
171 ANSICodeColors = ColorSchemeTable([NoColor,LinuxColors,LightBGColors, NeutralColors],
171 ANSICodeColors = ColorSchemeTable([NoColor,LinuxColors,LightBGColors, NeutralColors],
172 _scheme_default)
172 _scheme_default)
173
173
174 Undefined = object()
174 Undefined = object()
175
175
176 class Parser(Colorable):
176 class Parser(Colorable):
177 """ Format colored Python source.
177 """ Format colored Python source.
178 """
178 """
179
179
180 def __init__(self, color_table=None, out = sys.stdout, parent=None, style=None):
180 def __init__(self, color_table=None, out = sys.stdout, parent=None, style=None):
181 """ Create a parser with a specified color table and output channel.
181 """ Create a parser with a specified color table and output channel.
182
182
183 Call format() to process code.
183 Call format() to process code.
184 """
184 """
185
185
186 super(Parser, self).__init__(parent=parent)
186 super(Parser, self).__init__(parent=parent)
187
187
188 self.color_table = color_table and color_table or ANSICodeColors
188 self.color_table = color_table if color_table else ANSICodeColors
189 self.out = out
189 self.out = out
190 self.pos = None
191 self.lines = None
192 self.raw = None
190 if not style:
193 if not style:
191 self.style = self.default_style
194 self.style = self.default_style
192 else:
195 else:
193 self.style = style
196 self.style = style
194
197
195
198
196 def format(self, raw, out=None, scheme=Undefined):
199 def format(self, raw, out=None, scheme=Undefined):
197 import warnings
200 import warnings
198 if scheme is not Undefined:
201 if scheme is not Undefined:
199 warnings.warn('The `scheme` argument of IPython.utils.PyColorize:Parser.format is deprecated since IPython 6.0.'
202 warnings.warn('The `scheme` argument of IPython.utils.PyColorize:Parser.format is deprecated since IPython 6.0.'
200 'It will have no effect. Set the parser `style` directly.',
203 'It will have no effect. Set the parser `style` directly.',
201 stacklevel=2)
204 stacklevel=2)
202 return self.format2(raw, out)[0]
205 return self.format2(raw, out)[0]
203
206
204 def format2(self, raw, out = None):
207 def format2(self, raw, out = None):
205 """ Parse and send the colored source.
208 """ Parse and send the colored source.
206
209
207 If out and scheme are not specified, the defaults (given to
210 If out and scheme are not specified, the defaults (given to
208 constructor) are used.
211 constructor) are used.
209
212
210 out should be a file-type object. Optionally, out can be given as the
213 out should be a file-type object. Optionally, out can be given as the
211 string 'str' and the parser will automatically return the output in a
214 string 'str' and the parser will automatically return the output in a
212 string."""
215 string."""
213
216
214 string_output = 0
217 string_output = 0
215 if out == 'str' or self.out == 'str' or \
218 if out == 'str' or self.out == 'str' or \
216 isinstance(self.out, StringIO):
219 isinstance(self.out, StringIO):
217 # XXX - I don't really like this state handling logic, but at this
220 # XXX - I don't really like this state handling logic, but at this
218 # point I don't want to make major changes, so adding the
221 # point I don't want to make major changes, so adding the
219 # isinstance() check is the simplest I can do to ensure correct
222 # isinstance() check is the simplest I can do to ensure correct
220 # behavior.
223 # behavior.
221 out_old = self.out
224 out_old = self.out
222 self.out = StringIO()
225 self.out = StringIO()
223 string_output = 1
226 string_output = 1
224 elif out is not None:
227 elif out is not None:
225 self.out = out
228 self.out = out
226 else:
229 else:
227 raise ValueError('`out` or `self.out` should be file-like or the value `"str"`')
230 raise ValueError('`out` or `self.out` should be file-like or the value `"str"`')
228
231
229 # Fast return of the unmodified input for NoColor scheme
232 # Fast return of the unmodified input for NoColor scheme
230 if self.style == 'NoColor':
233 if self.style == 'NoColor':
231 error = False
234 error = False
232 self.out.write(raw)
235 self.out.write(raw)
233 if string_output:
236 if string_output:
234 return raw,error
237 return raw, error
235 else:
238 return None, error
236 return None,error
237
239
238 # local shorthands
240 # local shorthands
239 colors = self.color_table[self.style].colors
241 colors = self.color_table[self.style].colors
240 self.colors = colors # put in object so __call__ sees it
242 self.colors = colors # put in object so __call__ sees it
241
243
242 # Remove trailing whitespace and normalize tabs
244 # Remove trailing whitespace and normalize tabs
243 self.raw = raw.expandtabs().rstrip()
245 self.raw = raw.expandtabs().rstrip()
244
246
245 # store line offsets in self.lines
247 # store line offsets in self.lines
246 self.lines = [0, 0]
248 self.lines = [0, 0]
247 pos = 0
249 pos = 0
248 raw_find = self.raw.find
250 raw_find = self.raw.find
249 lines_append = self.lines.append
251 lines_append = self.lines.append
250 while 1:
252 while True:
251 pos = raw_find('\n', pos) + 1
253 pos = raw_find('\n', pos) + 1
252 if not pos: break
254 if not pos:
255 break
253 lines_append(pos)
256 lines_append(pos)
254 lines_append(len(self.raw))
257 lines_append(len(self.raw))
255
258
256 # parse the source and write it
259 # parse the source and write it
257 self.pos = 0
260 self.pos = 0
258 text = StringIO(self.raw)
261 text = StringIO(self.raw)
259
262
260 error = False
263 error = False
261 try:
264 try:
262 for atoken in generate_tokens(text.readline):
265 for atoken in generate_tokens(text.readline):
263 self(*atoken)
266 self(*atoken)
264 except tokenize.TokenError as ex:
267 except tokenize.TokenError as ex:
265 msg = ex.args[0]
268 msg = ex.args[0]
266 line = ex.args[1][0]
269 line = ex.args[1][0]
267 self.out.write("%s\n\n*** ERROR: %s%s%s\n" %
270 self.out.write("%s\n\n*** ERROR: %s%s%s\n" %
268 (colors[token.ERRORTOKEN],
271 (colors[token.ERRORTOKEN],
269 msg, self.raw[self.lines[line]:],
272 msg, self.raw[self.lines[line]:],
270 colors.normal)
273 colors.normal)
271 )
274 )
272 error = True
275 error = True
273 self.out.write(colors.normal+'\n')
276 self.out.write(colors.normal+'\n')
274 if string_output:
277 if string_output:
275 output = self.out.getvalue()
278 output = self.out.getvalue()
276 self.out = out_old
279 self.out = out_old
277 return (output, error)
280 return (output, error)
278 return (None, error)
281 return (None, error)
279
282
280 def _inner_call_(self, toktype, toktext, start_pos, end_pos, line):
283
284 def _inner_call_(self, toktype, toktext, start_pos):
281 """like call but write to a temporary buffer"""
285 """like call but write to a temporary buffer"""
282 buff = StringIO()
286 buff = StringIO()
283 (srow,scol) = start_pos
287 srow, scol = start_pos
284 (erow,ecol) = end_pos
285 colors = self.colors
288 colors = self.colors
286 owrite = buff.write
289 owrite = buff.write
287
290
288 # line separator, so this works across platforms
291 # line separator, so this works across platforms
289 linesep = os.linesep
292 linesep = os.linesep
290
293
291 # calculate new positions
294 # calculate new positions
292 oldpos = self.pos
295 oldpos = self.pos
293 newpos = self.lines[srow] + scol
296 newpos = self.lines[srow] + scol
294 self.pos = newpos + len(toktext)
297 self.pos = newpos + len(toktext)
295
298
296 # send the original whitespace, if needed
299 # send the original whitespace, if needed
297 if newpos > oldpos:
300 if newpos > oldpos:
298 owrite(self.raw[oldpos:newpos])
301 owrite(self.raw[oldpos:newpos])
299
302
300 # skip indenting tokens
303 # skip indenting tokens
301 if toktype in [token.INDENT, token.DEDENT]:
304 if toktype in [token.INDENT, token.DEDENT]:
302 self.pos = newpos
305 self.pos = newpos
303 buff.seek(0)
306 buff.seek(0)
304 return buff.read()
307 return buff.read()
305
308
306 # map token type to a color group
309 # map token type to a color group
307 if token.LPAR <= toktype <= token.OP:
310 if token.LPAR <= toktype <= token.OP:
308 toktype = token.OP
311 toktype = token.OP
309 elif toktype == token.NAME and keyword.iskeyword(toktext):
312 elif toktype == token.NAME and keyword.iskeyword(toktext):
310 toktype = _KEYWORD
313 toktype = _KEYWORD
311 color = colors.get(toktype, colors[_TEXT])
314 color = colors.get(toktype, colors[_TEXT])
312
315
313 #print '<%s>' % toktext, # dbg
314
315 # Triple quoted strings must be handled carefully so that backtracking
316 # Triple quoted strings must be handled carefully so that backtracking
316 # in pagers works correctly. We need color terminators on _each_ line.
317 # in pagers works correctly. We need color terminators on _each_ line.
317 if linesep in toktext:
318 if linesep in toktext:
318 toktext = toktext.replace(linesep, '%s%s%s' %
319 toktext = toktext.replace(linesep, '%s%s%s' %
319 (colors.normal,linesep,color))
320 (colors.normal,linesep,color))
320
321
321 # send text
322 # send text
322 owrite('%s%s%s' % (color,toktext,colors.normal))
323 owrite('%s%s%s' % (color,toktext,colors.normal))
323 buff.seek(0)
324 buff.seek(0)
324 return buff.read()
325 return buff.read()
325
326
326
327
327 def __call__(self, toktype, toktext, start_pos, end_pos, line):
328 def __call__(self, toktype, toktext, start_pos, end_pos, line):
328 """ Token handler, with syntax highlighting."""
329 """ Token handler, with syntax highlighting."""
329 self.out.write(
330 self.out.write(
330 self._inner_call_(toktype, toktext, start_pos, end_pos, line))
331 self._inner_call_(toktype, toktext, start_pos))
331
General Comments 0
You need to be logged in to leave comments. Login now