##// END OF EJS Templates
Slight name change for aliases....
chebee7i -
Show More
@@ -1,501 +1,501 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 Defines a variety of Pygments lexers for highlighting IPython code.
4 4
5 5 This includes:
6 6
7 7 IPythonLexer
8 8 IPython3Lexer
9 9 Lexers for pure IPython (python + magic/shell commands)
10 10
11 11 IPythonPartialTracebackLexer
12 12 IPythonTracebackLexer
13 13 Supports 2.x and 3.x via keyword `python3`. The partial traceback
14 14 lexer reads everything but the Python code appearing in a traceback.
15 15 The full lexer combines the partial lexer with an IPython lexer.
16 16
17 17 IPythonConsoleLexer
18 18 A lexer for IPython console sessions, with support for tracebacks.
19 19
20 20 IPyLexer
21 21 A friendly lexer which examines the first line of text and from it,
22 22 decides whether to use an IPython lexer or an IPython console lexer.
23 23 This is probably the only lexer that needs to be explicitly added
24 24 to Pygments.
25 25
26 26 """
27 27 #-----------------------------------------------------------------------------
28 28 # Copyright (c) 2013, the IPython Development Team.
29 29 #
30 30 # Distributed under the terms of the Modified BSD License.
31 31 #
32 32 # The full license is in the file COPYING.txt, distributed with this software.
33 33 #-----------------------------------------------------------------------------
34 34
35 35 # Standard library
36 36 import re
37 37
38 38 # Third party
39 39 from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
40 40 from pygments.lexer import (
41 41 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
42 42 )
43 43 from pygments.token import (
44 44 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
45 45 )
46 46 from pygments.util import get_bool_opt
47 47
48 48 # Local
49 49 from IPython.testing.skipdoctest import skip_doctest
50 50
51 51 line_re = re.compile('.*?\n')
52 52
53 53 ipython_tokens = [
54 54 (r'(\%+)(\w+)\s+(\.*)(\n)', bygroups(Operator, Keyword,
55 55 using(BashLexer), Text)),
56 56 (r'(\%+)(\w+)\b', bygroups(Operator, Keyword)),
57 57 (r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
58 58 ]
59 59
60 60 def build_ipy_lexer(python3):
61 61 """Builds IPython lexers depending on the value of `python3`.
62 62
63 63 The lexer inherits from an appropriate Python lexer and then adds
64 64 information about IPython specific keywords (i.e. magic commands,
65 65 shell commands, etc.)
66 66
67 67 Parameters
68 68 ----------
69 69 python3 : bool
70 70 If `True`, then build an IPython lexer from a Python 3 lexer.
71 71
72 72 """
73 73 # It would be nice to have a single IPython lexer class which takes
74 74 # a boolean `python3`. But since there are two Python lexer classes,
75 75 # we will also have two IPython lexer classes.
76 76 if python3:
77 77 PyLexer = Python3Lexer
78 78 clsname = 'IPython3Lexer'
79 79 name = 'IPython3'
80 80 aliases = ['ipython3']
81 81 doc = """IPython3 Lexer"""
82 82 else:
83 83 PyLexer = PythonLexer
84 84 clsname = 'IPythonLexer'
85 85 name = 'IPython'
86 86 aliases = ['ipython2', 'ipython']
87 87 doc = """IPython Lexer"""
88 88
89 89 tokens = PyLexer.tokens.copy()
90 90 tokens['root'] = ipython_tokens + tokens['root']
91 91
92 92 attrs = {'name': name, 'aliases': aliases,
93 93 '__doc__': doc, 'tokens': tokens}
94 94
95 95 return type(name, (PyLexer,), attrs)
96 96
97 97
98 98 IPython3Lexer = build_ipy_lexer(python3=True)
99 99 IPythonLexer = build_ipy_lexer(python3=False)
100 100
101 101
102 102 class IPythonPartialTracebackLexer(RegexLexer):
103 103 """
104 104 Partial lexer for IPython tracebacks.
105 105
106 106 Handles all the non-python output. This works for both Python 2.x and 3.x.
107 107
108 108 """
109 109 name = 'IPython Partial Traceback'
110 110
111 111 tokens = {
112 112 'root': [
113 113 # Tracebacks for syntax errors have a different style.
114 114 # For both types of tracebacks, we mark the first line with
115 115 # Generic.Traceback. For syntax errors, we mark the filename
116 116 # as we mark the filenames for non-syntax tracebacks.
117 117 #
118 118 # These two regexps define how IPythonConsoleLexer finds a
119 119 # traceback.
120 120 #
121 121 ## Non-syntax traceback
122 122 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
123 123 ## Syntax traceback
124 124 (r'^( File)(.*)(, line )(\d+\n)',
125 125 bygroups(Generic.Traceback, Name.Namespace,
126 126 Generic.Traceback, Literal.Number.Integer)),
127 127
128 128 # (Exception Identifier)(Whitespace)(Traceback Message)
129 129 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
130 130 bygroups(Name.Exception, Generic.Whitespace, Text)),
131 131 # (Module/Filename)(Text)(Callee)(Function Signature)
132 132 # Better options for callee and function signature?
133 133 (r'(.*)( in )(.*)(\(.*\)\n)',
134 134 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
135 135 # Regular line: (Whitespace)(Line Number)(Python Code)
136 136 (r'(\s*?)(\d+)(.*?\n)',
137 137 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
138 138 # Emphasized line: (Arrow)(Line Number)(Python Code)
139 139 # Using Exception token so arrow color matches the Exception.
140 140 (r'(-*>?\s?)(\d+)(.*?\n)',
141 141 bygroups(Name.Exception, Literal.Number.Integer, Other)),
142 142 # (Exception Identifier)(Message)
143 143 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
144 144 bygroups(Name.Exception, Text)),
145 145 # Tag everything else as Other, will be handled later.
146 146 (r'.*\n', Other),
147 147 ],
148 148 }
149 149
150 150
151 151 class IPythonTracebackLexer(DelegatingLexer):
152 152 """
153 153 IPython traceback lexer.
154 154
155 155 For doctests, the tracebacks can be snipped as much as desired with the
156 156 exception to the lines that designate a traceback. For non-syntax error
157 157 tracebacks, this is the line of hyphens. For syntax error tracebacks,
158 158 this is the line which lists the File and line number.
159 159
160 160 """
161 161 # The lexer inherits from DelegatingLexer. The "root" lexer is an
162 162 # appropriate IPython lexer, which depends on the value of the boolean
163 163 # `python3`. First, we parse with the partial IPython traceback lexer.
164 164 # Then, any code marked with the "Other" token is delegated to the root
165 165 # lexer.
166 166 #
167 167 name = 'IPython Traceback'
168 168 aliases = ['ipythontb']
169 169
170 170 def __init__(self, **options):
171 171 self.python3 = get_bool_opt(options, 'python3', False)
172 172 if self.python3:
173 self.aliases = ['ipythontb3']
173 self.aliases = ['ipython3tb']
174 174 else:
175 self.aliases = ['ipythontb2', 'ipythontb']
175 self.aliases = ['ipython2tb', 'ipythontb']
176 176
177 177 if self.python3:
178 178 IPyLexer = IPython3Lexer
179 179 else:
180 180 IPyLexer = IPythonLexer
181 181
182 182 DelegatingLexer.__init__(self, IPyLexer,
183 183 IPythonPartialTracebackLexer, **options)
184 184
185 185 @skip_doctest
186 186 class IPythonConsoleLexer(Lexer):
187 187 """
188 188 An IPython console lexer for IPython code-blocks and doctests, such as:
189 189
190 190 .. code-block:: rst
191 191
192 .. code-block:: ipythoncon
192 .. code-block:: ipythonconsole
193 193
194 194 In [1]: a = 'foo'
195 195
196 196 In [2]: a
197 197 Out[2]: 'foo'
198 198
199 199 In [3]: print a
200 200 foo
201 201
202 202 In [4]: 1 / 0
203 203
204 204
205 205 Support is also provided for IPython exceptions:
206 206
207 207 .. code-block:: rst
208 208
209 .. code-block:: ipythoncon
209 .. code-block:: ipythonconsole
210 210
211 211 In [1]: raise Exception
212 212 ---------------------------------------------------------------------------
213 213 Exception Traceback (most recent call last)
214 214 <ipython-input-1-fca2ab0ca76b> in <module>()
215 215 ----> 1 raise Exception
216 216
217 217 Exception:
218 218
219 219 """
220 220 name = 'IPython console session'
221 aliases = ['ipythoncon']
221 aliases = ['ipythonconsole']
222 222 mimetypes = ['text/x-ipython-console']
223 223
224 224 # The regexps used to determine what is input and what is output.
225 225 # The default prompts for IPython are:
226 226 #
227 227 # c.PromptManager.in_template = 'In [\#]: '
228 228 # c.PromptManager.in2_template = ' .\D.: '
229 229 # c.PromptManager.out_template = 'Out[\#]: '
230 230 #
231 231 in1_regex = r'In \[[0-9]+\]: '
232 232 in2_regex = r' \.\.+\.: '
233 233 out_regex = r'Out\[[0-9]+\]: '
234 234
235 235 #: The regex to determine when a traceback starts.
236 236 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
237 237
238 238 def __init__(self, **options):
239 239 """Initialize the IPython console lexer.
240 240
241 241 Parameters
242 242 ----------
243 243 python3 : bool
244 244 If `True`, then the console inputs are parsed using a Python 3
245 245 lexer. Otherwise, they are parsed using a Python 2 lexer.
246 246 in1_regex : RegexObject
247 247 The compiled regular expression used to detect the start
248 248 of inputs. Although the IPython configuration setting may have a
249 249 trailing whitespace, do not include it in the regex. If `None`,
250 250 then the default input prompt is assumed.
251 251 in2_regex : RegexObject
252 252 The compiled regular expression used to detect the continuation
253 253 of inputs. Although the IPython configuration setting may have a
254 254 trailing whitespace, do not include it in the regex. If `None`,
255 255 then the default input prompt is assumed.
256 256 out_regex : RegexObject
257 257 The compiled regular expression used to detect outputs. If `None`,
258 258 then the default output prompt is assumed.
259 259
260 260 """
261 261 self.python3 = get_bool_opt(options, 'python3', False)
262 262 if self.python3:
263 self.aliases = ['ipythoncon3']
263 self.aliases = ['ipython3console']
264 264 else:
265 self.aliases = ['ipythoncon2', 'ipythoncon']
265 self.aliases = ['ipython2console', 'ipythonconsole']
266 266
267 267 in1_regex = options.get('in1_regex', self.in1_regex)
268 268 in2_regex = options.get('in2_regex', self.in2_regex)
269 269 out_regex = options.get('out_regex', self.out_regex)
270 270
271 271 # So that we can work with input and output prompts which have been
272 272 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
273 273 # we do not do this, then such prompts will be tagged as 'output'.
274 274 # The reason can't just use the rstrip'd variants instead is because
275 275 # we want any whitespace associated with the prompt to be inserted
276 276 # with the token. This allows formatted code to be modified so as hide
277 277 # the appearance of prompts, with the whitespace included. One example
278 278 # use of this is in copybutton.js from the standard lib Python docs.
279 279 in1_regex_rstrip = in1_regex.rstrip() + '\n'
280 280 in2_regex_rstrip = in2_regex.rstrip() + '\n'
281 281 out_regex_rstrip = out_regex.rstrip() + '\n'
282 282
283 283 # Compile and save them all.
284 284 attrs = ['in1_regex', 'in2_regex', 'out_regex',
285 285 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
286 286 for attr in attrs:
287 287 self.__setattr__(attr, re.compile(locals()[attr]))
288 288
289 289 Lexer.__init__(self, **options)
290 290
291 291 if self.python3:
292 292 pylexer = IPython3Lexer
293 293 tblexer = IPythonTracebackLexer
294 294 else:
295 295 pylexer = IPythonLexer
296 296 tblexer = IPythonTracebackLexer
297 297
298 298 self.pylexer = pylexer(**options)
299 299 self.tblexer = tblexer(**options)
300 300
301 301 self.reset()
302 302
303 303 def reset(self):
304 304 self.mode = 'output'
305 305 self.index = 0
306 306 self.buffer = u''
307 307 self.insertions = []
308 308
309 309 def buffered_tokens(self):
310 310 """
311 311 Generator of unprocessed tokens after doing insertions and before
312 312 changing to a new state.
313 313
314 314 """
315 315 if self.mode == 'output':
316 316 tokens = [(0, Generic.Output, self.buffer)]
317 317 elif self.mode == 'input':
318 318 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
319 319 else: # traceback
320 320 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
321 321
322 322 for i, t, v in do_insertions(self.insertions, tokens):
323 323 # All token indexes are relative to the buffer.
324 324 yield self.index + i, t, v
325 325
326 326 # Clear it all
327 327 self.index += len(self.buffer)
328 328 self.buffer = u''
329 329 self.insertions = []
330 330
331 331 def get_mci(self, line):
332 332 """
333 333 Parses the line and returns a 3-tuple: (mode, code, insertion).
334 334
335 335 `mode` is the next mode (or state) of the lexer, and is always equal
336 336 to 'input', 'output', or 'tb'.
337 337
338 338 `code` is a portion of the line that should be added to the buffer
339 339 corresponding to the next mode and eventually lexed by another lexer.
340 340 For example, `code` could be Python code if `mode` were 'input'.
341 341
342 342 `insertion` is a 3-tuple (index, token, text) representing an
343 343 unprocessed "token" that will be inserted into the stream of tokens
344 344 that are created from the buffer once we change modes. This is usually
345 345 the input or output prompt.
346 346
347 347 In general, the next mode depends on current mode and on the contents
348 348 of `line`.
349 349
350 350 """
351 351 # To reduce the number of regex match checks, we have multiple
352 352 # 'if' blocks instead of 'if-elif' blocks.
353 353
354 354 # Check for possible end of input
355 355 in2_match = self.in2_regex.match(line)
356 356 in2_match_rstrip = self.in2_regex_rstrip.match(line)
357 357 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
358 358 in2_match_rstrip:
359 359 end_input = True
360 360 else:
361 361 end_input = False
362 362 if end_input and self.mode != 'tb':
363 363 # Only look for an end of input when not in tb mode.
364 364 # An ellipsis could appear within the traceback.
365 365 mode = 'output'
366 366 code = u''
367 367 insertion = (0, Generic.Prompt, line)
368 368 return mode, code, insertion
369 369
370 370 # Check for output prompt
371 371 out_match = self.out_regex.match(line)
372 372 out_match_rstrip = self.out_regex_rstrip.match(line)
373 373 if out_match or out_match_rstrip:
374 374 mode = 'output'
375 375 if out_match:
376 376 idx = out_match.end()
377 377 else:
378 378 idx = out_match_rstrip.end()
379 379 code = line[idx:]
380 380 # Use the 'heading' token for output. We cannot use Generic.Error
381 381 # since it would conflict with exceptions.
382 382 insertion = (0, Generic.Heading, line[:idx])
383 383 return mode, code, insertion
384 384
385 385
386 386 # Check for input or continuation prompt (non stripped version)
387 387 in1_match = self.in1_regex.match(line)
388 388 if in1_match or (in2_match and self.mode != 'tb'):
389 389 # New input or when not in tb, continued input.
390 390 # We do not check for continued input when in tb since it is
391 391 # allowable to replace a long stack with an ellipsis.
392 392 mode = 'input'
393 393 if in1_match:
394 394 idx = in1_match.end()
395 395 else: # in2_match
396 396 idx = in2_match.end()
397 397 code = line[idx:]
398 398 insertion = (0, Generic.Prompt, line[:idx])
399 399 return mode, code, insertion
400 400
401 401 # Check for input or continuation prompt (stripped version)
402 402 in1_match_rstrip = self.in1_regex_rstrip.match(line)
403 403 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
404 404 # New input or when not in tb, continued input.
405 405 # We do not check for continued input when in tb since it is
406 406 # allowable to replace a long stack with an ellipsis.
407 407 mode = 'input'
408 408 if in1_match_rstrip:
409 409 idx = in1_match_rstrip.end()
410 410 else: # in2_match
411 411 idx = in2_match_rstrip.end()
412 412 code = line[idx:]
413 413 insertion = (0, Generic.Prompt, line[:idx])
414 414 return mode, code, insertion
415 415
416 416 # Check for traceback
417 417 if self.ipytb_start.match(line):
418 418 mode = 'tb'
419 419 code = line
420 420 insertion = None
421 421 return mode, code, insertion
422 422
423 423 # All other stuff...
424 424 if self.mode in ('input', 'output'):
425 425 # We assume all other text is output. Multiline input that
426 426 # does not use the continuation marker cannot be detected.
427 427 # For example, the 3 in the following is clearly output:
428 428 #
429 429 # In [1]: print 3
430 430 # 3
431 431 #
432 432 # But the following second line is part of the input:
433 433 #
434 434 # In [2]: while True:
435 435 # print True
436 436 #
437 437 # In both cases, the 2nd line will be 'output'.
438 438 #
439 439 mode = 'output'
440 440 else:
441 441 mode = 'tb'
442 442
443 443 code = line
444 444 insertion = None
445 445
446 446 return mode, code, insertion
447 447
448 448 def get_tokens_unprocessed(self, text):
449 449 self.reset()
450 450 for match in line_re.finditer(text):
451 451 line = match.group()
452 452 mode, code, insertion = self.get_mci(line)
453 453
454 454 if mode != self.mode:
455 455 # Yield buffered tokens before transitioning to new mode.
456 456 for token in self.buffered_tokens():
457 457 yield token
458 458 self.mode = mode
459 459
460 460 if insertion:
461 461 self.insertions.append((len(self.buffer), [insertion]))
462 462 self.buffer += code
463 463 else:
464 464 for token in self.buffered_tokens():
465 465 yield token
466 466
467 467 class IPyLexer(Lexer):
468 468 """
469 469 Primary lexer for all IPython-like code.
470 470
471 471 This is a simple helper lexer. If the first line of the text begins with
472 472 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
473 473 lexer. If not, then the entire text is parsed with an IPython lexer.
474 474
475 475 The goal is to reduce the number of lexers that are registered
476 476 with Pygments.
477 477
478 478 """
479 479 name = 'IPy session'
480 480 aliases = ['ipy']
481 481
482 482 def __init__(self, **options):
483 483 self.python3 = get_bool_opt(options, 'python3', False)
484 484 if self.python3:
485 485 self.aliases = ['ipy3']
486 486 else:
487 487 self.aliases = ['ipy2', 'ipy']
488 488
489 489 Lexer.__init__(self, **options)
490 490
491 491 self.IPythonLexer = IPythonLexer(**options)
492 492 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
493 493
494 494 def get_tokens_unprocessed(self, text):
495 495 if re.match(r'(In \[[0-9]+\]:)', text.strip()):
496 496 lex = self.IPythonConsoleLexer
497 497 else:
498 498 lex = self.IPythonLexer
499 499 for token in lex.get_tokens_unprocessed(text):
500 500 yield token
501 501
General Comments 0
You need to be logged in to leave comments. Login now