##// END OF EJS Templates
Improve detection of IPython console sessions.
chebee7i -
Show More
@@ -1,500 +1,502 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 Defines a variety of Pygments lexers for highlighting IPython code.
4 4
5 5 This includes:
6 6
7 7 IPythonLexer, IPython3Lexer
8 8 Lexers for pure IPython (python + magic/shell commands)
9 9
10 10 IPythonPartialTracebackLexer, IPythonTracebackLexer
11 11 Supports 2.x and 3.x via keyword `python3`. The partial traceback
12 12 lexer reads everything but the Python code appearing in a traceback.
13 13 The full lexer combines the partial lexer with an IPython lexer.
14 14
15 15 IPythonConsoleLexer
16 16 A lexer for IPython console sessions, with support for tracebacks.
17 17
18 18 IPyLexer
19 19 A friendly lexer which examines the first line of text and from it,
20 20 decides whether to use an IPython lexer or an IPython console lexer.
21 21 This is probably the only lexer that needs to be explicitly added
22 22 to Pygments.
23 23
24 24 """
25 25 #-----------------------------------------------------------------------------
26 26 # Copyright (c) 2013, the IPython Development Team.
27 27 #
28 28 # Distributed under the terms of the Modified BSD License.
29 29 #
30 30 # The full license is in the file COPYING.txt, distributed with this software.
31 31 #-----------------------------------------------------------------------------
32 32
33 33 # Standard library
34 34 import re
35 35
36 36 # Third party
37 37 from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
38 38 from pygments.lexer import (
39 39 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
40 40 )
41 41 from pygments.token import (
42 42 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
43 43 )
44 44 from pygments.util import get_bool_opt
45 45
46 46 # Local
47 47 from IPython.testing.skipdoctest import skip_doctest
48 48
49 49 line_re = re.compile('.*?\n')
50 50
51 51 ipython_tokens = [
52 52 (r'(\%+)(\w+)\s+(\.*)(\n)', bygroups(Operator, Keyword,
53 53 using(BashLexer), Text)),
54 54 (r'(\%+)(\w+)\b', bygroups(Operator, Keyword)),
55 55 (r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
56 56 ]
57 57
58 58 def build_ipy_lexer(python3):
59 59 """Builds IPython lexers depending on the value of `python3`.
60 60
61 61 The lexer inherits from an appropriate Python lexer and then adds
62 62 information about IPython specific keywords (i.e. magic commands,
63 63 shell commands, etc.)
64 64
65 65 Parameters
66 66 ----------
67 67 python3 : bool
68 68 If `True`, then build an IPython lexer from a Python 3 lexer.
69 69
70 70 """
71 71 # It would be nice to have a single IPython lexer class which takes
72 72 # a boolean `python3`. But since there are two Python lexer classes,
73 73 # we will also have two IPython lexer classes.
74 74 if python3:
75 75 PyLexer = Python3Lexer
76 76 clsname = 'IPython3Lexer'
77 77 name = 'IPython3'
78 78 aliases = ['ipython3']
79 79 doc = """IPython3 Lexer"""
80 80 else:
81 81 PyLexer = PythonLexer
82 82 clsname = 'IPythonLexer'
83 83 name = 'IPython'
84 84 aliases = ['ipython2', 'ipython']
85 85 doc = """IPython Lexer"""
86 86
87 87 tokens = PyLexer.tokens.copy()
88 88 tokens['root'] = ipython_tokens + tokens['root']
89 89
90 90 attrs = {'name': name, 'aliases': aliases,
91 91 '__doc__': doc, 'tokens': tokens}
92 92
93 93 return type(name, (PyLexer,), attrs)
94 94
95 95
96 96 IPython3Lexer = build_ipy_lexer(python3=True)
97 97 IPythonLexer = build_ipy_lexer(python3=False)
98 98
99 99
100 100 class IPythonPartialTracebackLexer(RegexLexer):
101 101 """
102 102 Partial lexer for IPython tracebacks.
103 103
104 104 Handles all the non-python output. This works for both Python 2.x and 3.x.
105 105
106 106 """
107 107 name = 'IPython Partial Traceback'
108 108
109 109 tokens = {
110 110 'root': [
111 111 # Tracebacks for syntax errors have a different style.
112 112 # For both types of tracebacks, we mark the first line with
113 113 # Generic.Traceback. For syntax errors, we mark the filename
114 114 # as we mark the filenames for non-syntax tracebacks.
115 115 #
116 116 # These two regexps define how IPythonConsoleLexer finds a
117 117 # traceback.
118 118 #
119 119 ## Non-syntax traceback
120 120 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
121 121 ## Syntax traceback
122 122 (r'^( File)(.*)(, line )(\d+\n)',
123 123 bygroups(Generic.Traceback, Name.Namespace,
124 124 Generic.Traceback, Literal.Number.Integer)),
125 125
126 126 # (Exception Identifier)(Whitespace)(Traceback Message)
127 127 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
128 128 bygroups(Name.Exception, Generic.Whitespace, Text)),
129 129 # (Module/Filename)(Text)(Callee)(Function Signature)
130 130 # Better options for callee and function signature?
131 131 (r'(.*)( in )(.*)(\(.*\)\n)',
132 132 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
133 133 # Regular line: (Whitespace)(Line Number)(Python Code)
134 134 (r'(\s*?)(\d+)(.*?\n)',
135 135 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
136 136 # Emphasized line: (Arrow)(Line Number)(Python Code)
137 137 # Using Exception token so arrow color matches the Exception.
138 138 (r'(-*>?\s?)(\d+)(.*?\n)',
139 139 bygroups(Name.Exception, Literal.Number.Integer, Other)),
140 140 # (Exception Identifier)(Message)
141 141 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
142 142 bygroups(Name.Exception, Text)),
143 143 # Tag everything else as Other, will be handled later.
144 144 (r'.*\n', Other),
145 145 ],
146 146 }
147 147
148 148
149 149 class IPythonTracebackLexer(DelegatingLexer):
150 150 """
151 151 IPython traceback lexer.
152 152
153 153 For doctests, the tracebacks can be snipped as much as desired with the
154 154 exception to the lines that designate a traceback. For non-syntax error
155 155 tracebacks, this is the line of hyphens. For syntax error tracebacks,
156 156 this is the line which lists the File and line number.
157 157
158 158 """
159 159 # The lexer inherits from DelegatingLexer. The "root" lexer is an
160 160 # appropriate IPython lexer, which depends on the value of the boolean
161 161 # `python3`. First, we parse with the partial IPython traceback lexer.
162 162 # Then, any code marked with the "Other" token is delegated to the root
163 163 # lexer.
164 164 #
165 165 name = 'IPython Traceback'
166 166 aliases = ['ipythontb']
167 167
168 168 def __init__(self, **options):
169 169 self.python3 = get_bool_opt(options, 'python3', False)
170 170 if self.python3:
171 171 self.aliases = ['ipython3tb']
172 172 else:
173 173 self.aliases = ['ipython2tb', 'ipythontb']
174 174
175 175 if self.python3:
176 176 IPyLexer = IPython3Lexer
177 177 else:
178 178 IPyLexer = IPythonLexer
179 179
180 180 DelegatingLexer.__init__(self, IPyLexer,
181 181 IPythonPartialTracebackLexer, **options)
182 182
183 183 @skip_doctest
184 184 class IPythonConsoleLexer(Lexer):
185 185 """
186 186 An IPython console lexer for IPython code-blocks and doctests, such as:
187 187
188 188 .. code-block:: rst
189 189
190 190 .. code-block:: ipythonconsole
191 191
192 192 In [1]: a = 'foo'
193 193
194 194 In [2]: a
195 195 Out[2]: 'foo'
196 196
197 197 In [3]: print a
198 198 foo
199 199
200 200 In [4]: 1 / 0
201 201
202 202
203 203 Support is also provided for IPython exceptions:
204 204
205 205 .. code-block:: rst
206 206
207 207 .. code-block:: ipythonconsole
208 208
209 209 In [1]: raise Exception
210 210
211 211 ---------------------------------------------------------------------------
212 212 Exception Traceback (most recent call last)
213 213 <ipython-input-1-fca2ab0ca76b> in <module>()
214 214 ----> 1 raise Exception
215 215
216 216 Exception:
217 217
218 218 """
219 219 name = 'IPython console session'
220 220 aliases = ['ipythonconsole']
221 221 mimetypes = ['text/x-ipython-console']
222 222
223 223 # The regexps used to determine what is input and what is output.
224 224 # The default prompts for IPython are:
225 225 #
226 226 # c.PromptManager.in_template = 'In [\#]: '
227 227 # c.PromptManager.in2_template = ' .\D.: '
228 228 # c.PromptManager.out_template = 'Out[\#]: '
229 229 #
230 230 in1_regex = r'In \[[0-9]+\]: '
231 231 in2_regex = r' \.\.+\.: '
232 232 out_regex = r'Out\[[0-9]+\]: '
233 233
234 234 #: The regex to determine when a traceback starts.
235 235 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
236 236
237 237 def __init__(self, **options):
238 238 """Initialize the IPython console lexer.
239 239
240 240 Parameters
241 241 ----------
242 242 python3 : bool
243 243 If `True`, then the console inputs are parsed using a Python 3
244 244 lexer. Otherwise, they are parsed using a Python 2 lexer.
245 245 in1_regex : RegexObject
246 246 The compiled regular expression used to detect the start
247 247 of inputs. Although the IPython configuration setting may have a
248 248 trailing whitespace, do not include it in the regex. If `None`,
249 249 then the default input prompt is assumed.
250 250 in2_regex : RegexObject
251 251 The compiled regular expression used to detect the continuation
252 252 of inputs. Although the IPython configuration setting may have a
253 253 trailing whitespace, do not include it in the regex. If `None`,
254 254 then the default input prompt is assumed.
255 255 out_regex : RegexObject
256 256 The compiled regular expression used to detect outputs. If `None`,
257 257 then the default output prompt is assumed.
258 258
259 259 """
260 260 self.python3 = get_bool_opt(options, 'python3', False)
261 261 if self.python3:
262 262 self.aliases = ['ipython3console']
263 263 else:
264 264 self.aliases = ['ipython2console', 'ipythonconsole']
265 265
266 266 in1_regex = options.get('in1_regex', self.in1_regex)
267 267 in2_regex = options.get('in2_regex', self.in2_regex)
268 268 out_regex = options.get('out_regex', self.out_regex)
269 269
270 270 # So that we can work with input and output prompts which have been
271 271 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
272 272 # we do not do this, then such prompts will be tagged as 'output'.
273 273 # The reason can't just use the rstrip'd variants instead is because
274 274 # we want any whitespace associated with the prompt to be inserted
275 275 # with the token. This allows formatted code to be modified so as hide
276 276 # the appearance of prompts, with the whitespace included. One example
277 277 # use of this is in copybutton.js from the standard lib Python docs.
278 278 in1_regex_rstrip = in1_regex.rstrip() + '\n'
279 279 in2_regex_rstrip = in2_regex.rstrip() + '\n'
280 280 out_regex_rstrip = out_regex.rstrip() + '\n'
281 281
282 282 # Compile and save them all.
283 283 attrs = ['in1_regex', 'in2_regex', 'out_regex',
284 284 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
285 285 for attr in attrs:
286 286 self.__setattr__(attr, re.compile(locals()[attr]))
287 287
288 288 Lexer.__init__(self, **options)
289 289
290 290 if self.python3:
291 291 pylexer = IPython3Lexer
292 292 tblexer = IPythonTracebackLexer
293 293 else:
294 294 pylexer = IPythonLexer
295 295 tblexer = IPythonTracebackLexer
296 296
297 297 self.pylexer = pylexer(**options)
298 298 self.tblexer = tblexer(**options)
299 299
300 300 self.reset()
301 301
302 302 def reset(self):
303 303 self.mode = 'output'
304 304 self.index = 0
305 305 self.buffer = u''
306 306 self.insertions = []
307 307
308 308 def buffered_tokens(self):
309 309 """
310 310 Generator of unprocessed tokens after doing insertions and before
311 311 changing to a new state.
312 312
313 313 """
314 314 if self.mode == 'output':
315 315 tokens = [(0, Generic.Output, self.buffer)]
316 316 elif self.mode == 'input':
317 317 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
318 318 else: # traceback
319 319 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
320 320
321 321 for i, t, v in do_insertions(self.insertions, tokens):
322 322 # All token indexes are relative to the buffer.
323 323 yield self.index + i, t, v
324 324
325 325 # Clear it all
326 326 self.index += len(self.buffer)
327 327 self.buffer = u''
328 328 self.insertions = []
329 329
330 330 def get_mci(self, line):
331 331 """
332 332 Parses the line and returns a 3-tuple: (mode, code, insertion).
333 333
334 334 `mode` is the next mode (or state) of the lexer, and is always equal
335 335 to 'input', 'output', or 'tb'.
336 336
337 337 `code` is a portion of the line that should be added to the buffer
338 338 corresponding to the next mode and eventually lexed by another lexer.
339 339 For example, `code` could be Python code if `mode` were 'input'.
340 340
341 341 `insertion` is a 3-tuple (index, token, text) representing an
342 342 unprocessed "token" that will be inserted into the stream of tokens
343 343 that are created from the buffer once we change modes. This is usually
344 344 the input or output prompt.
345 345
346 346 In general, the next mode depends on current mode and on the contents
347 347 of `line`.
348 348
349 349 """
350 350 # To reduce the number of regex match checks, we have multiple
351 351 # 'if' blocks instead of 'if-elif' blocks.
352 352
353 353 # Check for possible end of input
354 354 in2_match = self.in2_regex.match(line)
355 355 in2_match_rstrip = self.in2_regex_rstrip.match(line)
356 356 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
357 357 in2_match_rstrip:
358 358 end_input = True
359 359 else:
360 360 end_input = False
361 361 if end_input and self.mode != 'tb':
362 362 # Only look for an end of input when not in tb mode.
363 363 # An ellipsis could appear within the traceback.
364 364 mode = 'output'
365 365 code = u''
366 366 insertion = (0, Generic.Prompt, line)
367 367 return mode, code, insertion
368 368
369 369 # Check for output prompt
370 370 out_match = self.out_regex.match(line)
371 371 out_match_rstrip = self.out_regex_rstrip.match(line)
372 372 if out_match or out_match_rstrip:
373 373 mode = 'output'
374 374 if out_match:
375 375 idx = out_match.end()
376 376 else:
377 377 idx = out_match_rstrip.end()
378 378 code = line[idx:]
379 379 # Use the 'heading' token for output. We cannot use Generic.Error
380 380 # since it would conflict with exceptions.
381 381 insertion = (0, Generic.Heading, line[:idx])
382 382 return mode, code, insertion
383 383
384 384
385 385 # Check for input or continuation prompt (non stripped version)
386 386 in1_match = self.in1_regex.match(line)
387 387 if in1_match or (in2_match and self.mode != 'tb'):
388 388 # New input or when not in tb, continued input.
389 389 # We do not check for continued input when in tb since it is
390 390 # allowable to replace a long stack with an ellipsis.
391 391 mode = 'input'
392 392 if in1_match:
393 393 idx = in1_match.end()
394 394 else: # in2_match
395 395 idx = in2_match.end()
396 396 code = line[idx:]
397 397 insertion = (0, Generic.Prompt, line[:idx])
398 398 return mode, code, insertion
399 399
400 400 # Check for input or continuation prompt (stripped version)
401 401 in1_match_rstrip = self.in1_regex_rstrip.match(line)
402 402 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
403 403 # New input or when not in tb, continued input.
404 404 # We do not check for continued input when in tb since it is
405 405 # allowable to replace a long stack with an ellipsis.
406 406 mode = 'input'
407 407 if in1_match_rstrip:
408 408 idx = in1_match_rstrip.end()
409 409 else: # in2_match
410 410 idx = in2_match_rstrip.end()
411 411 code = line[idx:]
412 412 insertion = (0, Generic.Prompt, line[:idx])
413 413 return mode, code, insertion
414 414
415 415 # Check for traceback
416 416 if self.ipytb_start.match(line):
417 417 mode = 'tb'
418 418 code = line
419 419 insertion = None
420 420 return mode, code, insertion
421 421
422 422 # All other stuff...
423 423 if self.mode in ('input', 'output'):
424 424 # We assume all other text is output. Multiline input that
425 425 # does not use the continuation marker cannot be detected.
426 426 # For example, the 3 in the following is clearly output:
427 427 #
428 428 # In [1]: print 3
429 429 # 3
430 430 #
431 431 # But the following second line is part of the input:
432 432 #
433 433 # In [2]: while True:
434 434 # print True
435 435 #
436 436 # In both cases, the 2nd line will be 'output'.
437 437 #
438 438 mode = 'output'
439 439 else:
440 440 mode = 'tb'
441 441
442 442 code = line
443 443 insertion = None
444 444
445 445 return mode, code, insertion
446 446
447 447 def get_tokens_unprocessed(self, text):
448 448 self.reset()
449 449 for match in line_re.finditer(text):
450 450 line = match.group()
451 451 mode, code, insertion = self.get_mci(line)
452 452
453 453 if mode != self.mode:
454 454 # Yield buffered tokens before transitioning to new mode.
455 455 for token in self.buffered_tokens():
456 456 yield token
457 457 self.mode = mode
458 458
459 459 if insertion:
460 460 self.insertions.append((len(self.buffer), [insertion]))
461 461 self.buffer += code
462 462 else:
463 463 for token in self.buffered_tokens():
464 464 yield token
465 465
466 466 class IPyLexer(Lexer):
467 467 """
468 468 Primary lexer for all IPython-like code.
469 469
470 470 This is a simple helper lexer. If the first line of the text begins with
471 471 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
472 472 lexer. If not, then the entire text is parsed with an IPython lexer.
473 473
474 474 The goal is to reduce the number of lexers that are registered
475 475 with Pygments.
476 476
477 477 """
478 478 name = 'IPy session'
479 479 aliases = ['ipy']
480 480
481 481 def __init__(self, **options):
482 482 self.python3 = get_bool_opt(options, 'python3', False)
483 483 if self.python3:
484 484 self.aliases = ['ipy3']
485 485 else:
486 486 self.aliases = ['ipy2', 'ipy']
487 487
488 488 Lexer.__init__(self, **options)
489 489
490 490 self.IPythonLexer = IPythonLexer(**options)
491 491 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
492 492
493 493 def get_tokens_unprocessed(self, text):
494 if re.match(r'(In \[[0-9]+\]:)', text.strip()):
494 # Search for the input prompt anywhere...this allows code blocks to
495 # begin with comments as well.
496 if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
495 497 lex = self.IPythonConsoleLexer
496 498 else:
497 499 lex = self.IPythonLexer
498 500 for token in lex.get_tokens_unprocessed(text):
499 501 yield token
500 502
General Comments 0
You need to be logged in to leave comments. Login now