##// END OF EJS Templates
Some cleanup of Pycolorize....
Matthias Bussonnier -
Show More
@@ -1,331 +1,331 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 Class and program to colorize python source code for ANSI terminals.
4 4
5 5 Based on an HTML code highlighter by Jurgen Hermann found at:
6 6 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52298
7 7
8 8 Modifications by Fernando Perez (fperez@colorado.edu).
9 9
10 10 Information on the original HTML highlighter follows:
11 11
12 12 MoinMoin - Python Source Parser
13 13
14 14 Title: Colorize Python source using the built-in tokenizer
15 15
16 16 Submitter: Jurgen Hermann
17 17 Last Updated:2001/04/06
18 18
19 19 Version no:1.2
20 20
21 21 Description:
22 22
23 23 This code is part of MoinMoin (http://moin.sourceforge.net/) and converts
24 24 Python source code to HTML markup, rendering comments, keywords,
25 25 operators, numeric and string literals in different colors.
26 26
27 27 It shows how to use the built-in keyword, token and tokenize modules to
28 28 scan Python source code and re-emit it with no changes to its original
29 29 formatting (which is the hard part).
30 30 """
31 31
32 32 __all__ = ['ANSICodeColors','Parser']
33 33
34 34 _scheme_default = 'Linux'
35 35
36 36
37 37 # Imports
38 38 import keyword
39 39 import os
40 40 import sys
41 41 import token
42 42 import tokenize
43 43
44 44 generate_tokens = tokenize.generate_tokens
45 45
46 46 from IPython.utils.coloransi import TermColors, InputTermColors ,ColorScheme, ColorSchemeTable
47 47 from .colorable import Colorable
48 48 from io import StringIO
49 49
50 50 #############################################################################
51 51 ### Python Source Parser (does Highlighting)
52 52 #############################################################################
53 53
54 54 _KEYWORD = token.NT_OFFSET + 1
55 55 _TEXT = token.NT_OFFSET + 2
56 56
57 57 #****************************************************************************
58 58 # Builtin color schemes
59 59
60 60 Colors = TermColors # just a shorthand
61 61
62 62 # Build a few color schemes
63 63 NoColor = ColorScheme(
64 64 'NoColor',{
65 65 'header' : Colors.NoColor,
66 66 token.NUMBER : Colors.NoColor,
67 67 token.OP : Colors.NoColor,
68 68 token.STRING : Colors.NoColor,
69 69 tokenize.COMMENT : Colors.NoColor,
70 70 token.NAME : Colors.NoColor,
71 71 token.ERRORTOKEN : Colors.NoColor,
72 72
73 73 _KEYWORD : Colors.NoColor,
74 74 _TEXT : Colors.NoColor,
75 75
76 76 'in_prompt' : InputTermColors.NoColor, # Input prompt
77 77 'in_number' : InputTermColors.NoColor, # Input prompt number
78 78 'in_prompt2' : InputTermColors.NoColor, # Continuation prompt
79 79 'in_normal' : InputTermColors.NoColor, # color off (usu. Colors.Normal)
80 80
81 81 'out_prompt' : Colors.NoColor, # Output prompt
82 82 'out_number' : Colors.NoColor, # Output prompt number
83 83
84 84 'normal' : Colors.NoColor # color off (usu. Colors.Normal)
85 85 } )
86 86
87 87 LinuxColors = ColorScheme(
88 88 'Linux',{
89 89 'header' : Colors.LightRed,
90 90 token.NUMBER : Colors.LightCyan,
91 91 token.OP : Colors.Yellow,
92 92 token.STRING : Colors.LightBlue,
93 93 tokenize.COMMENT : Colors.LightRed,
94 94 token.NAME : Colors.Normal,
95 95 token.ERRORTOKEN : Colors.Red,
96 96
97 97 _KEYWORD : Colors.LightGreen,
98 98 _TEXT : Colors.Yellow,
99 99
100 100 'in_prompt' : InputTermColors.Green,
101 101 'in_number' : InputTermColors.LightGreen,
102 102 'in_prompt2' : InputTermColors.Green,
103 103 'in_normal' : InputTermColors.Normal, # color off (usu. Colors.Normal)
104 104
105 105 'out_prompt' : Colors.Red,
106 106 'out_number' : Colors.LightRed,
107 107
108 108 'normal' : Colors.Normal # color off (usu. Colors.Normal)
109 109 } )
110 110
111 111 NeutralColors = ColorScheme(
112 112 'Neutral',{
113 113 'header' : Colors.Red,
114 114 token.NUMBER : Colors.Cyan,
115 115 token.OP : Colors.Blue,
116 116 token.STRING : Colors.Blue,
117 117 tokenize.COMMENT : Colors.Red,
118 118 token.NAME : Colors.Normal,
119 119 token.ERRORTOKEN : Colors.Red,
120 120
121 121 _KEYWORD : Colors.Green,
122 122 _TEXT : Colors.Blue,
123 123
124 124 'in_prompt' : InputTermColors.Blue,
125 125 'in_number' : InputTermColors.LightBlue,
126 126 'in_prompt2' : InputTermColors.Blue,
127 127 'in_normal' : InputTermColors.Normal, # color off (usu. Colors.Normal)
128 128
129 129 'out_prompt' : Colors.Red,
130 130 'out_number' : Colors.LightRed,
131 131
132 132 'normal' : Colors.Normal # color off (usu. Colors.Normal)
133 133 } )
134 134
135 135 # Hack: the 'neutral' colours are not very visible on a dark background on
136 136 # Windows. Since Windows command prompts have a dark background by default, and
137 137 # relatively few users are likely to alter that, we will use the 'Linux' colours,
138 138 # designed for a dark background, as the default on Windows. Changing it here
139 139 # avoids affecting the prompt colours rendered by prompt_toolkit, where the
140 140 # neutral defaults do work OK.
141 141
142 142 if os.name == 'nt':
143 143 NeutralColors = LinuxColors.copy(name='Neutral')
144 144
145 145 LightBGColors = ColorScheme(
146 146 'LightBG',{
147 147 'header' : Colors.Red,
148 148 token.NUMBER : Colors.Cyan,
149 149 token.OP : Colors.Blue,
150 150 token.STRING : Colors.Blue,
151 151 tokenize.COMMENT : Colors.Red,
152 152 token.NAME : Colors.Normal,
153 153 token.ERRORTOKEN : Colors.Red,
154 154
155 155
156 156 _KEYWORD : Colors.Green,
157 157 _TEXT : Colors.Blue,
158 158
159 159 'in_prompt' : InputTermColors.Blue,
160 160 'in_number' : InputTermColors.LightBlue,
161 161 'in_prompt2' : InputTermColors.Blue,
162 162 'in_normal' : InputTermColors.Normal, # color off (usu. Colors.Normal)
163 163
164 164 'out_prompt' : Colors.Red,
165 165 'out_number' : Colors.LightRed,
166 166
167 167 'normal' : Colors.Normal # color off (usu. Colors.Normal)
168 168 } )
169 169
170 170 # Build table of color schemes (needed by the parser)
171 171 ANSICodeColors = ColorSchemeTable([NoColor,LinuxColors,LightBGColors, NeutralColors],
172 172 _scheme_default)
173 173
174 174 Undefined = object()
175 175
176 176 class Parser(Colorable):
177 177 """ Format colored Python source.
178 178 """
179 179
180 180 def __init__(self, color_table=None, out = sys.stdout, parent=None, style=None):
181 181 """ Create a parser with a specified color table and output channel.
182 182
183 183 Call format() to process code.
184 184 """
185 185
186 186 super(Parser, self).__init__(parent=parent)
187 187
188 self.color_table = color_table and color_table or ANSICodeColors
188 self.color_table = color_table if color_table else ANSICodeColors
189 189 self.out = out
190 self.pos = None
191 self.lines = None
192 self.raw = None
190 193 if not style:
191 194 self.style = self.default_style
192 195 else:
193 196 self.style = style
194 197
195 198
196 199 def format(self, raw, out=None, scheme=Undefined):
197 200 import warnings
198 201 if scheme is not Undefined:
199 202 warnings.warn('The `scheme` argument of IPython.utils.PyColorize:Parser.format is deprecated since IPython 6.0.'
200 203 'It will have no effect. Set the parser `style` directly.',
201 204 stacklevel=2)
202 205 return self.format2(raw, out)[0]
203 206
204 207 def format2(self, raw, out = None):
205 208 """ Parse and send the colored source.
206 209
207 210 If out and scheme are not specified, the defaults (given to
208 211 constructor) are used.
209 212
210 213 out should be a file-type object. Optionally, out can be given as the
211 214 string 'str' and the parser will automatically return the output in a
212 215 string."""
213 216
214 217 string_output = 0
215 218 if out == 'str' or self.out == 'str' or \
216 219 isinstance(self.out, StringIO):
217 220 # XXX - I don't really like this state handling logic, but at this
218 221 # point I don't want to make major changes, so adding the
219 222 # isinstance() check is the simplest I can do to ensure correct
220 223 # behavior.
221 224 out_old = self.out
222 225 self.out = StringIO()
223 226 string_output = 1
224 227 elif out is not None:
225 228 self.out = out
226 229 else:
227 230 raise ValueError('`out` or `self.out` should be file-like or the value `"str"`')
228 231
229 232 # Fast return of the unmodified input for NoColor scheme
230 233 if self.style == 'NoColor':
231 234 error = False
232 235 self.out.write(raw)
233 236 if string_output:
234 237 return raw,error
235 else:
236 238 return None,error
237 239
238 240 # local shorthands
239 241 colors = self.color_table[self.style].colors
240 242 self.colors = colors # put in object so __call__ sees it
241 243
242 244 # Remove trailing whitespace and normalize tabs
243 245 self.raw = raw.expandtabs().rstrip()
244 246
245 247 # store line offsets in self.lines
246 248 self.lines = [0, 0]
247 249 pos = 0
248 250 raw_find = self.raw.find
249 251 lines_append = self.lines.append
250 while 1:
252 while True:
251 253 pos = raw_find('\n', pos) + 1
252 if not pos: break
254 if not pos:
255 break
253 256 lines_append(pos)
254 257 lines_append(len(self.raw))
255 258
256 259 # parse the source and write it
257 260 self.pos = 0
258 261 text = StringIO(self.raw)
259 262
260 263 error = False
261 264 try:
262 265 for atoken in generate_tokens(text.readline):
263 266 self(*atoken)
264 267 except tokenize.TokenError as ex:
265 268 msg = ex.args[0]
266 269 line = ex.args[1][0]
267 270 self.out.write("%s\n\n*** ERROR: %s%s%s\n" %
268 271 (colors[token.ERRORTOKEN],
269 272 msg, self.raw[self.lines[line]:],
270 273 colors.normal)
271 274 )
272 275 error = True
273 276 self.out.write(colors.normal+'\n')
274 277 if string_output:
275 278 output = self.out.getvalue()
276 279 self.out = out_old
277 280 return (output, error)
278 281 return (None, error)
279 282
280 def _inner_call_(self, toktype, toktext, start_pos, end_pos, line):
283
284 def _inner_call_(self, toktype, toktext, start_pos):
281 285 """like call but write to a temporary buffer"""
282 286 buff = StringIO()
283 (srow,scol) = start_pos
284 (erow,ecol) = end_pos
287 srow, scol = start_pos
285 288 colors = self.colors
286 289 owrite = buff.write
287 290
288 291 # line separator, so this works across platforms
289 292 linesep = os.linesep
290 293
291 294 # calculate new positions
292 295 oldpos = self.pos
293 296 newpos = self.lines[srow] + scol
294 297 self.pos = newpos + len(toktext)
295 298
296 299 # send the original whitespace, if needed
297 300 if newpos > oldpos:
298 301 owrite(self.raw[oldpos:newpos])
299 302
300 303 # skip indenting tokens
301 304 if toktype in [token.INDENT, token.DEDENT]:
302 305 self.pos = newpos
303 306 buff.seek(0)
304 307 return buff.read()
305 308
306 309 # map token type to a color group
307 310 if token.LPAR <= toktype <= token.OP:
308 311 toktype = token.OP
309 312 elif toktype == token.NAME and keyword.iskeyword(toktext):
310 313 toktype = _KEYWORD
311 314 color = colors.get(toktype, colors[_TEXT])
312 315
313 #print '<%s>' % toktext, # dbg
314
315 316 # Triple quoted strings must be handled carefully so that backtracking
316 317 # in pagers works correctly. We need color terminators on _each_ line.
317 318 if linesep in toktext:
318 319 toktext = toktext.replace(linesep, '%s%s%s' %
319 320 (colors.normal,linesep,color))
320 321
321 322 # send text
322 323 owrite('%s%s%s' % (color,toktext,colors.normal))
323 324 buff.seek(0)
324 325 return buff.read()
325 326
326 327
327 328 def __call__(self, toktype, toktext, start_pos, end_pos, line):
328 329 """ Token handler, with syntax highlighting."""
329 330 self.out.write(
330 self._inner_call_(toktype, toktext, start_pos, end_pos, line))
331
331 self._inner_call_(toktype, toktext, start_pos))
General Comments 0
You need to be logged in to leave comments. Login now