##// END OF EJS Templates
#7548: A bang, "!", followed by an equal sign, "=", is not a shell escape.
Lev Abalkin -
Show More
@@ -1,507 +1,507 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 Defines a variety of Pygments lexers for highlighting IPython code.
4 4
5 5 This includes:
6 6
7 7 IPythonLexer, IPython3Lexer
8 8 Lexers for pure IPython (python + magic/shell commands)
9 9
10 10 IPythonPartialTracebackLexer, IPythonTracebackLexer
11 11 Supports 2.x and 3.x via keyword `python3`. The partial traceback
12 12 lexer reads everything but the Python code appearing in a traceback.
13 13 The full lexer combines the partial lexer with an IPython lexer.
14 14
15 15 IPythonConsoleLexer
16 16 A lexer for IPython console sessions, with support for tracebacks.
17 17
18 18 IPyLexer
19 19 A friendly lexer which examines the first line of text and from it,
20 20 decides whether to use an IPython lexer or an IPython console lexer.
21 21 This is probably the only lexer that needs to be explicitly added
22 22 to Pygments.
23 23
24 24 """
25 25 #-----------------------------------------------------------------------------
26 26 # Copyright (c) 2013, the IPython Development Team.
27 27 #
28 28 # Distributed under the terms of the Modified BSD License.
29 29 #
30 30 # The full license is in the file COPYING.txt, distributed with this software.
31 31 #-----------------------------------------------------------------------------
32 32
33 33 # Standard library
34 34 import re
35 35
36 36 # Third party
37 37 from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
38 38 from pygments.lexer import (
39 39 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
40 40 )
41 41 from pygments.token import (
42 42 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
43 43 )
44 44 from pygments.util import get_bool_opt
45 45
46 46 # Local
47 47 from IPython.testing.skipdoctest import skip_doctest
48 48
49 49 line_re = re.compile('.*?\n')
50 50
51 51 ipython_tokens = [
52 52 (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
53 53 (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
54 54 (r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),
55 55 (r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),
56 56 (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
57 57 using(BashLexer), Text)),
58 58 (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
59 59 (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
60 (r'(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
60 (r'((?!=)!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
61 61 ]
62 62
63 63 def build_ipy_lexer(python3):
64 64 """Builds IPython lexers depending on the value of `python3`.
65 65
66 66 The lexer inherits from an appropriate Python lexer and then adds
67 67 information about IPython specific keywords (i.e. magic commands,
68 68 shell commands, etc.)
69 69
70 70 Parameters
71 71 ----------
72 72 python3 : bool
73 73 If `True`, then build an IPython lexer from a Python 3 lexer.
74 74
75 75 """
76 76 # It would be nice to have a single IPython lexer class which takes
77 77 # a boolean `python3`. But since there are two Python lexer classes,
78 78 # we will also have two IPython lexer classes.
79 79 if python3:
80 80 PyLexer = Python3Lexer
81 81 clsname = 'IPython3Lexer'
82 82 name = 'IPython3'
83 83 aliases = ['ipython3']
84 84 doc = """IPython3 Lexer"""
85 85 else:
86 86 PyLexer = PythonLexer
87 87 clsname = 'IPythonLexer'
88 88 name = 'IPython'
89 89 aliases = ['ipython2', 'ipython']
90 90 doc = """IPython Lexer"""
91 91
92 92 tokens = PyLexer.tokens.copy()
93 93 tokens['root'] = ipython_tokens + tokens['root']
94 94
95 95 attrs = {'name': name, 'aliases': aliases, 'filenames': [],
96 96 '__doc__': doc, 'tokens': tokens}
97 97
98 98 return type(name, (PyLexer,), attrs)
99 99
100 100
101 101 IPython3Lexer = build_ipy_lexer(python3=True)
102 102 IPythonLexer = build_ipy_lexer(python3=False)
103 103
104 104
105 105 class IPythonPartialTracebackLexer(RegexLexer):
106 106 """
107 107 Partial lexer for IPython tracebacks.
108 108
109 109 Handles all the non-python output. This works for both Python 2.x and 3.x.
110 110
111 111 """
112 112 name = 'IPython Partial Traceback'
113 113
114 114 tokens = {
115 115 'root': [
116 116 # Tracebacks for syntax errors have a different style.
117 117 # For both types of tracebacks, we mark the first line with
118 118 # Generic.Traceback. For syntax errors, we mark the filename
119 119 # as we mark the filenames for non-syntax tracebacks.
120 120 #
121 121 # These two regexps define how IPythonConsoleLexer finds a
122 122 # traceback.
123 123 #
124 124 ## Non-syntax traceback
125 125 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
126 126 ## Syntax traceback
127 127 (r'^( File)(.*)(, line )(\d+\n)',
128 128 bygroups(Generic.Traceback, Name.Namespace,
129 129 Generic.Traceback, Literal.Number.Integer)),
130 130
131 131 # (Exception Identifier)(Whitespace)(Traceback Message)
132 132 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
133 133 bygroups(Name.Exception, Generic.Whitespace, Text)),
134 134 # (Module/Filename)(Text)(Callee)(Function Signature)
135 135 # Better options for callee and function signature?
136 136 (r'(.*)( in )(.*)(\(.*\)\n)',
137 137 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
138 138 # Regular line: (Whitespace)(Line Number)(Python Code)
139 139 (r'(\s*?)(\d+)(.*?\n)',
140 140 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
141 141 # Emphasized line: (Arrow)(Line Number)(Python Code)
142 142 # Using Exception token so arrow color matches the Exception.
143 143 (r'(-*>?\s?)(\d+)(.*?\n)',
144 144 bygroups(Name.Exception, Literal.Number.Integer, Other)),
145 145 # (Exception Identifier)(Message)
146 146 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
147 147 bygroups(Name.Exception, Text)),
148 148 # Tag everything else as Other, will be handled later.
149 149 (r'.*\n', Other),
150 150 ],
151 151 }
152 152
153 153
154 154 class IPythonTracebackLexer(DelegatingLexer):
155 155 """
156 156 IPython traceback lexer.
157 157
158 158 For doctests, the tracebacks can be snipped as much as desired with the
159 159 exception to the lines that designate a traceback. For non-syntax error
160 160 tracebacks, this is the line of hyphens. For syntax error tracebacks,
161 161 this is the line which lists the File and line number.
162 162
163 163 """
164 164 # The lexer inherits from DelegatingLexer. The "root" lexer is an
165 165 # appropriate IPython lexer, which depends on the value of the boolean
166 166 # `python3`. First, we parse with the partial IPython traceback lexer.
167 167 # Then, any code marked with the "Other" token is delegated to the root
168 168 # lexer.
169 169 #
170 170 name = 'IPython Traceback'
171 171 aliases = ['ipythontb']
172 172
173 173 def __init__(self, **options):
174 174 self.python3 = get_bool_opt(options, 'python3', False)
175 175 if self.python3:
176 176 self.aliases = ['ipython3tb']
177 177 else:
178 178 self.aliases = ['ipython2tb', 'ipythontb']
179 179
180 180 if self.python3:
181 181 IPyLexer = IPython3Lexer
182 182 else:
183 183 IPyLexer = IPythonLexer
184 184
185 185 DelegatingLexer.__init__(self, IPyLexer,
186 186 IPythonPartialTracebackLexer, **options)
187 187
188 188 @skip_doctest
189 189 class IPythonConsoleLexer(Lexer):
190 190 """
191 191 An IPython console lexer for IPython code-blocks and doctests, such as:
192 192
193 193 .. code-block:: rst
194 194
195 195 .. code-block:: ipythonconsole
196 196
197 197 In [1]: a = 'foo'
198 198
199 199 In [2]: a
200 200 Out[2]: 'foo'
201 201
202 202 In [3]: print a
203 203 foo
204 204
205 205 In [4]: 1 / 0
206 206
207 207
208 208 Support is also provided for IPython exceptions:
209 209
210 210 .. code-block:: rst
211 211
212 212 .. code-block:: ipythonconsole
213 213
214 214 In [1]: raise Exception
215 215
216 216 ---------------------------------------------------------------------------
217 217 Exception Traceback (most recent call last)
218 218 <ipython-input-1-fca2ab0ca76b> in <module>()
219 219 ----> 1 raise Exception
220 220
221 221 Exception:
222 222
223 223 """
224 224 name = 'IPython console session'
225 225 aliases = ['ipythonconsole']
226 226 mimetypes = ['text/x-ipython-console']
227 227
228 228 # The regexps used to determine what is input and what is output.
229 229 # The default prompts for IPython are:
230 230 #
231 231 # c.PromptManager.in_template = 'In [\#]: '
232 232 # c.PromptManager.in2_template = ' .\D.: '
233 233 # c.PromptManager.out_template = 'Out[\#]: '
234 234 #
235 235 in1_regex = r'In \[[0-9]+\]: '
236 236 in2_regex = r' \.\.+\.: '
237 237 out_regex = r'Out\[[0-9]+\]: '
238 238
239 239 #: The regex to determine when a traceback starts.
240 240 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
241 241
242 242 def __init__(self, **options):
243 243 """Initialize the IPython console lexer.
244 244
245 245 Parameters
246 246 ----------
247 247 python3 : bool
248 248 If `True`, then the console inputs are parsed using a Python 3
249 249 lexer. Otherwise, they are parsed using a Python 2 lexer.
250 250 in1_regex : RegexObject
251 251 The compiled regular expression used to detect the start
252 252 of inputs. Although the IPython configuration setting may have a
253 253 trailing whitespace, do not include it in the regex. If `None`,
254 254 then the default input prompt is assumed.
255 255 in2_regex : RegexObject
256 256 The compiled regular expression used to detect the continuation
257 257 of inputs. Although the IPython configuration setting may have a
258 258 trailing whitespace, do not include it in the regex. If `None`,
259 259 then the default input prompt is assumed.
260 260 out_regex : RegexObject
261 261 The compiled regular expression used to detect outputs. If `None`,
262 262 then the default output prompt is assumed.
263 263
264 264 """
265 265 self.python3 = get_bool_opt(options, 'python3', False)
266 266 if self.python3:
267 267 self.aliases = ['ipython3console']
268 268 else:
269 269 self.aliases = ['ipython2console', 'ipythonconsole']
270 270
271 271 in1_regex = options.get('in1_regex', self.in1_regex)
272 272 in2_regex = options.get('in2_regex', self.in2_regex)
273 273 out_regex = options.get('out_regex', self.out_regex)
274 274
275 275 # So that we can work with input and output prompts which have been
276 276 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
277 277 # we do not do this, then such prompts will be tagged as 'output'.
278 278 # The reason can't just use the rstrip'd variants instead is because
279 279 # we want any whitespace associated with the prompt to be inserted
280 280 # with the token. This allows formatted code to be modified so as hide
281 281 # the appearance of prompts, with the whitespace included. One example
282 282 # use of this is in copybutton.js from the standard lib Python docs.
283 283 in1_regex_rstrip = in1_regex.rstrip() + '\n'
284 284 in2_regex_rstrip = in2_regex.rstrip() + '\n'
285 285 out_regex_rstrip = out_regex.rstrip() + '\n'
286 286
287 287 # Compile and save them all.
288 288 attrs = ['in1_regex', 'in2_regex', 'out_regex',
289 289 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
290 290 for attr in attrs:
291 291 self.__setattr__(attr, re.compile(locals()[attr]))
292 292
293 293 Lexer.__init__(self, **options)
294 294
295 295 if self.python3:
296 296 pylexer = IPython3Lexer
297 297 tblexer = IPythonTracebackLexer
298 298 else:
299 299 pylexer = IPythonLexer
300 300 tblexer = IPythonTracebackLexer
301 301
302 302 self.pylexer = pylexer(**options)
303 303 self.tblexer = tblexer(**options)
304 304
305 305 self.reset()
306 306
307 307 def reset(self):
308 308 self.mode = 'output'
309 309 self.index = 0
310 310 self.buffer = u''
311 311 self.insertions = []
312 312
313 313 def buffered_tokens(self):
314 314 """
315 315 Generator of unprocessed tokens after doing insertions and before
316 316 changing to a new state.
317 317
318 318 """
319 319 if self.mode == 'output':
320 320 tokens = [(0, Generic.Output, self.buffer)]
321 321 elif self.mode == 'input':
322 322 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
323 323 else: # traceback
324 324 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
325 325
326 326 for i, t, v in do_insertions(self.insertions, tokens):
327 327 # All token indexes are relative to the buffer.
328 328 yield self.index + i, t, v
329 329
330 330 # Clear it all
331 331 self.index += len(self.buffer)
332 332 self.buffer = u''
333 333 self.insertions = []
334 334
335 335 def get_mci(self, line):
336 336 """
337 337 Parses the line and returns a 3-tuple: (mode, code, insertion).
338 338
339 339 `mode` is the next mode (or state) of the lexer, and is always equal
340 340 to 'input', 'output', or 'tb'.
341 341
342 342 `code` is a portion of the line that should be added to the buffer
343 343 corresponding to the next mode and eventually lexed by another lexer.
344 344 For example, `code` could be Python code if `mode` were 'input'.
345 345
346 346 `insertion` is a 3-tuple (index, token, text) representing an
347 347 unprocessed "token" that will be inserted into the stream of tokens
348 348 that are created from the buffer once we change modes. This is usually
349 349 the input or output prompt.
350 350
351 351 In general, the next mode depends on current mode and on the contents
352 352 of `line`.
353 353
354 354 """
355 355 # To reduce the number of regex match checks, we have multiple
356 356 # 'if' blocks instead of 'if-elif' blocks.
357 357
358 358 # Check for possible end of input
359 359 in2_match = self.in2_regex.match(line)
360 360 in2_match_rstrip = self.in2_regex_rstrip.match(line)
361 361 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
362 362 in2_match_rstrip:
363 363 end_input = True
364 364 else:
365 365 end_input = False
366 366 if end_input and self.mode != 'tb':
367 367 # Only look for an end of input when not in tb mode.
368 368 # An ellipsis could appear within the traceback.
369 369 mode = 'output'
370 370 code = u''
371 371 insertion = (0, Generic.Prompt, line)
372 372 return mode, code, insertion
373 373
374 374 # Check for output prompt
375 375 out_match = self.out_regex.match(line)
376 376 out_match_rstrip = self.out_regex_rstrip.match(line)
377 377 if out_match or out_match_rstrip:
378 378 mode = 'output'
379 379 if out_match:
380 380 idx = out_match.end()
381 381 else:
382 382 idx = out_match_rstrip.end()
383 383 code = line[idx:]
384 384 # Use the 'heading' token for output. We cannot use Generic.Error
385 385 # since it would conflict with exceptions.
386 386 insertion = (0, Generic.Heading, line[:idx])
387 387 return mode, code, insertion
388 388
389 389
390 390 # Check for input or continuation prompt (non stripped version)
391 391 in1_match = self.in1_regex.match(line)
392 392 if in1_match or (in2_match and self.mode != 'tb'):
393 393 # New input or when not in tb, continued input.
394 394 # We do not check for continued input when in tb since it is
395 395 # allowable to replace a long stack with an ellipsis.
396 396 mode = 'input'
397 397 if in1_match:
398 398 idx = in1_match.end()
399 399 else: # in2_match
400 400 idx = in2_match.end()
401 401 code = line[idx:]
402 402 insertion = (0, Generic.Prompt, line[:idx])
403 403 return mode, code, insertion
404 404
405 405 # Check for input or continuation prompt (stripped version)
406 406 in1_match_rstrip = self.in1_regex_rstrip.match(line)
407 407 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
408 408 # New input or when not in tb, continued input.
409 409 # We do not check for continued input when in tb since it is
410 410 # allowable to replace a long stack with an ellipsis.
411 411 mode = 'input'
412 412 if in1_match_rstrip:
413 413 idx = in1_match_rstrip.end()
414 414 else: # in2_match
415 415 idx = in2_match_rstrip.end()
416 416 code = line[idx:]
417 417 insertion = (0, Generic.Prompt, line[:idx])
418 418 return mode, code, insertion
419 419
420 420 # Check for traceback
421 421 if self.ipytb_start.match(line):
422 422 mode = 'tb'
423 423 code = line
424 424 insertion = None
425 425 return mode, code, insertion
426 426
427 427 # All other stuff...
428 428 if self.mode in ('input', 'output'):
429 429 # We assume all other text is output. Multiline input that
430 430 # does not use the continuation marker cannot be detected.
431 431 # For example, the 3 in the following is clearly output:
432 432 #
433 433 # In [1]: print 3
434 434 # 3
435 435 #
436 436 # But the following second line is part of the input:
437 437 #
438 438 # In [2]: while True:
439 439 # print True
440 440 #
441 441 # In both cases, the 2nd line will be 'output'.
442 442 #
443 443 mode = 'output'
444 444 else:
445 445 mode = 'tb'
446 446
447 447 code = line
448 448 insertion = None
449 449
450 450 return mode, code, insertion
451 451
452 452 def get_tokens_unprocessed(self, text):
453 453 self.reset()
454 454 for match in line_re.finditer(text):
455 455 line = match.group()
456 456 mode, code, insertion = self.get_mci(line)
457 457
458 458 if mode != self.mode:
459 459 # Yield buffered tokens before transitioning to new mode.
460 460 for token in self.buffered_tokens():
461 461 yield token
462 462 self.mode = mode
463 463
464 464 if insertion:
465 465 self.insertions.append((len(self.buffer), [insertion]))
466 466 self.buffer += code
467 467 else:
468 468 for token in self.buffered_tokens():
469 469 yield token
470 470
471 471 class IPyLexer(Lexer):
472 472 """
473 473 Primary lexer for all IPython-like code.
474 474
475 475 This is a simple helper lexer. If the first line of the text begins with
476 476 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
477 477 lexer. If not, then the entire text is parsed with an IPython lexer.
478 478
479 479 The goal is to reduce the number of lexers that are registered
480 480 with Pygments.
481 481
482 482 """
483 483 name = 'IPy session'
484 484 aliases = ['ipy']
485 485
486 486 def __init__(self, **options):
487 487 self.python3 = get_bool_opt(options, 'python3', False)
488 488 if self.python3:
489 489 self.aliases = ['ipy3']
490 490 else:
491 491 self.aliases = ['ipy2', 'ipy']
492 492
493 493 Lexer.__init__(self, **options)
494 494
495 495 self.IPythonLexer = IPythonLexer(**options)
496 496 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
497 497
498 498 def get_tokens_unprocessed(self, text):
499 499 # Search for the input prompt anywhere...this allows code blocks to
500 500 # begin with comments as well.
501 501 if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
502 502 lex = self.IPythonConsoleLexer
503 503 else:
504 504 lex = self.IPythonLexer
505 505 for token in lex.get_tokens_unprocessed(text):
506 506 yield token
507 507
General Comments 0
You need to be logged in to leave comments. Login now