##// END OF EJS Templates
Insert copyright again.
chebee7i -
Show More
@@ -1,479 +1,486 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 Defines a variety of Pygments lexers for highlighting IPython code.
4 4
5 5 This includes:
6 6
7 7 IPythonLexer
8 8 IPython3Lexer
9 9 Lexers for pure IPython (python + magic/shell commands)
10 10
11 11 IPythonPartialTracebackLexer
12 12 IPythonTracebackLexer
13 13 Supports 2.x and 3.x via keyword `python3`. The partial traceback
14 14 lexer reads everything but the Python code appearing in a traceback.
15 15 The full lexer combines the partial lexer with an IPython lexer.
16 16
17 17 IPythonConsoleLexer
18 18 A lexer for IPython console sessions, with support for tracebacks.
19 19
20 20 IPyLexer
21 21 A friendly lexer which examines the first line of text and from it,
22 22 decides whether to use an IPython lexer or an IPython console lexer.
23 23 This is probably the only lexer that needs to be explicitly added
24 24 to Pygments.
25 25
26 26 """
27 #-----------------------------------------------------------------------------
28 # Copyright (c) 2013, the IPython Development Team.
29 #
30 # Distributed under the terms of the Modified BSD License.
31 #
32 # The full license is in the file COPYING.txt, distributed with this software.
33 #-----------------------------------------------------------------------------
27 34
28 35 # Standard library
29 36 import re
30 37
31 38 # Third party
32 39 from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
33 40 from pygments.lexer import (
34 41 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
35 42 )
36 43 from pygments.token import (
37 44 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
38 45 )
39 46 from pygments.util import get_bool_opt
40 47
41 48 # Local
42 49 from IPython.testing.skipdoctest import skip_doctest
43 50
44 51 line_re = re.compile('.*?\n')
45 52
46 53 ipython_tokens = [
47 54 (r'(\%+)(\w+)\s+(\.*)(\n)', bygroups(Operator, Keyword,
48 55 using(BashLexer), Text)),
49 56 (r'(\%+)(\w+)\b', bygroups(Operator, Keyword)),
50 57 (r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
51 58 ]
52 59
53 60 def build_ipy_lexer(python3):
54 61 """Builds IPython lexers depending on the value of `python3`.
55 62
56 63 The lexer inherits from an appropriate Python lexer and then adds
57 64 information about IPython specific keywords (i.e. magic commands,
58 65 shell commands, etc.)
59 66
60 67 Parameters
61 68 ----------
62 69 python3 : bool
63 70 If `True`, then build an IPython lexer from a Python 3 lexer.
64 71
65 72 """
66 73 # It would be nice to have a single IPython lexer class which takes
67 74 # a boolean `python3`. But since there are two Python lexer classes,
68 75 # we will also have two IPython lexer classes.
69 76 if python3:
70 77 PyLexer = Python3Lexer
71 78 clsname = 'IPython3Lexer'
72 79 name = 'IPython3'
73 80 aliases = ['ipython3']
74 81 doc = """IPython3 Lexer"""
75 82 else:
76 83 PyLexer = PythonLexer
77 84 clsname = 'IPythonLexer'
78 85 name = 'IPython'
79 86 aliases = ['ipython']
80 87 doc = """IPython Lexer"""
81 88
82 89 tokens = PyLexer.tokens.copy()
83 90 tokens['root'] = ipython_tokens + tokens['root']
84 91
85 92 attrs = {'name': name, 'aliases': aliases,
86 93 '__doc__': doc, 'tokens': tokens}
87 94
88 95 return type(name, (PyLexer,), attrs)
89 96
90 97
91 98 IPython3Lexer = build_ipy_lexer(python3=True)
92 99 IPythonLexer = build_ipy_lexer(python3=False)
93 100
94 101
95 102 class IPythonPartialTracebackLexer(RegexLexer):
96 103 """
97 104 Partial lexer for IPython tracebacks.
98 105
99 106 Handles all the non-python output. This works for both Python 2.x and 3.x.
100 107
101 108 """
102 109 name = 'IPython Partial Traceback'
103 110
104 111 tokens = {
105 112 'root': [
106 113 # Tracebacks for syntax errors have a different style.
107 114 # For both types of tracebacks, we mark the first line with
108 115 # Generic.Traceback. For syntax errors, we mark the filename
109 116 # as we mark the filenames for non-syntax tracebacks.
110 117 #
111 118 # These two regexps define how IPythonConsoleLexer finds a
112 119 # traceback.
113 120 #
114 121 ## Non-syntax traceback
115 122 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
116 123 ## Syntax traceback
117 124 (r'^( File)(.*)(, line )(\d+\n)',
118 125 bygroups(Generic.Traceback, Name.Namespace,
119 126 Generic.Traceback, Literal.Number.Integer)),
120 127
121 128 # (Exception Identifier)(Whitespace)(Traceback Message)
122 129 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
123 130 bygroups(Name.Exception, Generic.Whitespace, Text)),
124 131 # (Module/Filename)(Text)(Callee)(Function Signature)
125 132 # Better options for callee and function signature?
126 133 (r'(.*)( in )(.*)(\(.*\)\n)',
127 134 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
128 135 # Regular line: (Whitespace)(Line Number)(Python Code)
129 136 (r'(\s*?)(\d+)(.*?\n)',
130 137 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
131 138 # Emphasized line: (Arrow)(Line Number)(Python Code)
132 139 # Using Exception token so arrow color matches the Exception.
133 140 (r'(-*>?\s?)(\d+)(.*?\n)',
134 141 bygroups(Name.Exception, Literal.Number.Integer, Other)),
135 142 # (Exception Identifier)(Message)
136 143 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
137 144 bygroups(Name.Exception, Text)),
138 145 # Tag everything else as Other, will be handled later.
139 146 (r'.*\n', Other),
140 147 ],
141 148 }
142 149
143 150
144 151 class IPythonTracebackLexer(DelegatingLexer):
145 152 """
146 153 IPython traceback lexer.
147 154
148 155 For doctests, the tracebacks can be snipped as much as desired with the
149 156 exception to the lines that designate a traceback. For non-syntax error
150 157 tracebacks, this is the line of hyphens. For syntax error tracebacks,
151 158 this is the line which lists the File and line number.
152 159
153 160 """
154 161 # The lexer inherits from DelegatingLexer. The "root" lexer is an
155 162 # appropriate IPython lexer, which depends on the value of the boolean
156 163 # `python3`. First, we parse with the partial IPython traceback lexer.
157 164 # Then, any code marked with the "Other" token is delegated to the root
158 165 # lexer.
159 166 #
160 167 name = 'IPython Traceback'
161 168 aliases = ['ipythontb']
162 169
163 170 def __init__(self, **options):
164 171 self.python3 = get_bool_opt(options, 'python3', False)
165 172
166 173 if self.python3:
167 174 IPyLexer = IPython3Lexer
168 175 else:
169 176 IPyLexer = IPythonLexer
170 177
171 178 DelegatingLexer.__init__(self, IPyLexer,
172 179 IPythonPartialTracebackLexer, **options)
173 180
174 181 @skip_doctest
175 182 class IPythonConsoleLexer(Lexer):
176 183 """
177 184 An IPython console lexer for IPython code-blocks and doctests, such as:
178 185
179 186 .. code-block:: rst
180 187
181 188 .. code-block:: ipythoncon
182 189
183 190 In [1]: a = 'foo'
184 191
185 192 In [2]: a
186 193 Out[2]: 'foo'
187 194
188 195 In [3]: print a
189 196 foo
190 197
191 198 In [4]: 1 / 0
192 199
193 200
194 201 Support is also provided for IPython exceptions:
195 202
196 203 .. code-block:: rst
197 204
198 205 .. code-block:: ipythoncon
199 206
200 207 In [1]: raise Exception
201 208 ---------------------------------------------------------------------------
202 209 Exception Traceback (most recent call last)
203 210 <ipython-input-1-fca2ab0ca76b> in <module>()
204 211 ----> 1 raise Exception
205 212
206 213 Exception:
207 214
208 215 """
209 216 name = 'IPython console session'
210 217 aliases = ['ipythoncon']
211 218 mimetypes = ['text/x-ipython-console']
212 219
213 220 # The regexps used to determine what is input and what is output. The
214 221 # input regex should be consistent with and also be the combination of
215 222 # the values of the `in_template` and `in2_templates`. For example, the
216 223 # defaults prompts are:
217 224 #
218 225 # c.PromptManager.in_template = 'In [\#]: '
219 226 # c.PromptManager.in2_template = ' .\D.: '
220 227 # c.PromptManager.out_template = 'Out[\#]: '
221 228 #
222 229 # Note, we do not include the trailing whitespace in the regex since
223 230 # we want to allow blank prompts (and editors often remove trailing
224 231 # whitespace).
225 232 #
226 233 in1_regex = r'In \[[0-9]+\]: '
227 234 in2_regex = r' \.\.+\.: '
228 235 out_regex = r'Out\[[0-9]+\]: '
229 236
230 237 #: The regex to determine when a traceback starts.
231 238 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
232 239
233 240 def __init__(self, **options):
234 241 """Initialize the IPython console lexer.
235 242
236 243 Parameters
237 244 ----------
238 245 python3 : bool
239 246 If `True`, then the console inputs are parsed using a Python 3
240 247 lexer. Otherwise, they are parsed using a Python 2 lexer.
241 248 in1_regex : RegexObject
242 249 The compiled regular expression used to detect the start
243 250 of inputs. Although the IPython configuration setting may have a
244 251 trailing whitespace, do not include it in the regex. If `None`,
245 252 then the default input prompt is assumed.
246 253 in2_regex : RegexObject
247 254 The compiled regular expression used to detect the continuation
248 255 of inputs. Although the IPython configuration setting may have a
249 256 trailing whitespace, do not include it in the regex. If `None`,
250 257 then the default input prompt is assumed.
251 258 out_regex : RegexObject
252 259 The compiled regular expression used to detect outputs. If `None`,
253 260 then the default output prompt is assumed.
254 261
255 262 """
256 263 self.python3 = get_bool_opt(options, 'python3', False)
257 264
258 265 in1_regex = options.get('in1_regex', self.in1_regex)
259 266 in2_regex = options.get('in2_regex', self.in2_regex)
260 267 out_regex = options.get('out_regex', self.out_regex)
261 268
262 269 # So that we can work with input and output prompts which have been
263 270 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
264 271 # we do not do this, then such prompts will be tagged as 'output'.
265 272 # The reason can't just use the rstrip'd variants instead is because
266 273 # we want any whitespace associated with the prompt to be inserted
267 274 # with the token. This allows formatted code to be modified so as hide
268 275 # the appearance of prompts. For example, see copybutton.js.
269 276 in1_regex_rstrip = in1_regex.rstrip() + '\n'
270 277 in2_regex_rstrip = in2_regex.rstrip() + '\n'
271 278 out_regex_rstrip = out_regex.rstrip() + '\n'
272 279
273 280 # Compile and save them all.
274 281 attrs = ['in1_regex', 'in2_regex', 'out_regex',
275 282 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
276 283 for attr in attrs:
277 284 self.__setattr__(attr, re.compile(locals()[attr]))
278 285
279 286 Lexer.__init__(self, **options)
280 287
281 288 if self.python3:
282 289 pylexer = IPython3Lexer
283 290 tblexer = IPythonTracebackLexer
284 291 else:
285 292 pylexer = IPythonLexer
286 293 tblexer = IPythonTracebackLexer
287 294
288 295 self.pylexer = pylexer(**options)
289 296 self.tblexer = tblexer(**options)
290 297
291 298 self.reset()
292 299
293 300 def reset(self):
294 301 self.mode = 'output'
295 302 self.index = 0
296 303 self.buffer = u''
297 304 self.insertions = []
298 305
299 306 def buffered_tokens(self):
300 307 """
301 308 Generator of unprocessed tokens after doing insertions and before
302 309 changing to a new state.
303 310
304 311 """
305 312 if self.mode == 'output':
306 313 tokens = [(0, Generic.Output, self.buffer)]
307 314 elif self.mode == 'input':
308 315 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
309 316 else: # traceback
310 317 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
311 318
312 319 for i, t, v in do_insertions(self.insertions, tokens):
313 320 # All token indexes are relative to the buffer.
314 321 yield self.index + i, t, v
315 322
316 323 # Clear it all
317 324 self.index += len(self.buffer)
318 325 self.buffer = u''
319 326 self.insertions = []
320 327
321 328 def get_modecode(self, line):
322 329 """
323 330 Returns the next mode and code to be added to the next mode's buffer.
324 331
325 332 The next mode depends on current mode and contents of line.
326 333
327 334 """
328 335 # To reduce the number of regex match checks, we have multiple
329 336 # 'if' blocks instead of 'if-elif' blocks.
330 337
331 338 ### Check for possible end of input
332 339 ###
333 340 in2_match = self.in2_regex.match(line)
334 341 in2_match_rstrip = self.in2_regex_rstrip.match(line)
335 342 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
336 343 in2_match_rstrip:
337 344 end_input = True
338 345 else:
339 346 end_input = False
340 347 if end_input and self.mode != 'tb':
341 348 # Only look for an end of input when not in tb mode.
342 349 # An ellipsis could appear within the traceback.
343 350 mode = 'output'
344 351 code = u''
345 352 insertion = (0, Generic.Prompt, line)
346 353 return mode, code, insertion
347 354
348 355 ### Check for output prompt
349 356 ###
350 357 out_match = self.out_regex.match(line)
351 358 out_match_rstrip = self.out_regex_rstrip.match(line)
352 359 if out_match or out_match_rstrip:
353 360 mode = 'output'
354 361 if out_match:
355 362 idx = out_match.end()
356 363 else:
357 364 idx = out_match_rstrip.end()
358 365 code = line[idx:]
359 366 # Use the 'heading' token for output. We cannot use Generic.Error
360 367 # since it would conflict with exceptions.
361 368 insertion = (0, Generic.Heading, line[:idx])
362 369 return mode, code, insertion
363 370
364 371
365 372 ### Check for input or continuation prompt (non stripped version)
366 373 ###
367 374 in1_match = self.in1_regex.match(line)
368 375 if in1_match or (in2_match and self.mode != 'tb'):
369 376 # New input or when not in tb, continued input.
370 377 # We do not check for continued input when in tb since it is
371 378 # allowable to replace a long stack with an ellipsis.
372 379 mode = 'input'
373 380 if in1_match:
374 381 idx = in1_match.end()
375 382 else: # in2_match
376 383 idx = in2_match.end()
377 384 code = line[idx:]
378 385 insertion = (0, Generic.Prompt, line[:idx])
379 386 return mode, code, insertion
380 387
381 388 ### Check for input or continuation prompt (stripped version)
382 389 ###
383 390 in1_match_rstrip = self.in1_regex_rstrip.match(line)
384 391 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
385 392 # New input or when not in tb, continued input.
386 393 # We do not check for continued input when in tb since it is
387 394 # allowable to replace a long stack with an ellipsis.
388 395 mode = 'input'
389 396 if in1_match_rstrip:
390 397 idx = in1_match_rstrip.end()
391 398 else: # in2_match
392 399 idx = in2_match_rstrip.end()
393 400 code = line[idx:]
394 401 insertion = (0, Generic.Prompt, line[:idx])
395 402 return mode, code, insertion
396 403
397 404 ### Check for traceback
398 405 ###
399 406 if self.ipytb_start.match(line):
400 407 mode = 'tb'
401 408 code = line
402 409 insertion = None
403 410 return mode, code, insertion
404 411
405 412 ### All other stuff...
406 413 ###
407 414 if self.mode in ('input', 'output'):
408 415 # We assume all other text is output. Multiline input that
409 416 # does not use the continuation marker cannot be detected.
410 417 # For example, the 3 in the following is clearly output:
411 418 #
412 419 # In [1]: print 3
413 420 # 3
414 421 #
415 422 # But the following second line is part of the input:
416 423 #
417 424 # In [2]: while True:
418 425 # print True
419 426 #
420 427 # In both cases, the 2nd line will be 'output'.
421 428 #
422 429 mode = 'output'
423 430 else:
424 431 mode = 'tb'
425 432
426 433 code = line
427 434 insertion = None
428 435
429 436 return mode, code, insertion
430 437
431 438 def get_tokens_unprocessed(self, text):
432 439 self.reset()
433 440 for match in line_re.finditer(text):
434 441 line = match.group()
435 442 mode, code, insertion = self.get_modecode(line)
436 443
437 444 if mode != self.mode:
438 445 # Yield buffered tokens before transitioning to new mode.
439 446 for token in self.buffered_tokens():
440 447 yield token
441 448 self.mode = mode
442 449
443 450 if insertion:
444 451 self.insertions.append((len(self.buffer), [insertion]))
445 452 self.buffer += code
446 453 else:
447 454 for token in self.buffered_tokens():
448 455 yield token
449 456
450 457 class IPyLexer(Lexer):
451 458 """
452 459 Primary lexer for all IPython-like code.
453 460
454 461 This is a simple helper lexer. If the first line of the text begins with
455 462 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
456 463 lexer. If not, then the entire text is parsed with an IPython lexer.
457 464
458 465 The goal is to reduce the number of lexers that are registered
459 466 with Pygments.
460 467
461 468 """
462 469 name = 'IPy session'
463 470 aliases = ['ipy']
464 471
465 472 def __init__(self, **options):
466 473 self.python3 = get_bool_opt(options, 'python3', False)
467 474 Lexer.__init__(self, **options)
468 475
469 476 self.IPythonLexer = IPythonLexer(**options)
470 477 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
471 478
472 479 def get_tokens_unprocessed(self, text):
473 480 if re.match(r'(In \[[0-9]+\]:)', text.strip()):
474 481 lex = self.IPythonConsoleLexer
475 482 else:
476 483 lex = self.IPythonLexer
477 484 for token in lex.get_tokens_unprocessed(text):
478 485 yield token
479 486
General Comments 0
You need to be logged in to leave comments. Login now