##// END OF EJS Templates
Backport PR #14022 on branch 8.12.x (Fix failing docs build.) (#14034)...
Matthias Bussonnier -
r28234:41c5449d merge
parent child Browse files
Show More
@@ -1,526 +1,540 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 Defines a variety of Pygments lexers for highlighting IPython code.
4 4
5 5 This includes:
6 6
7 7 IPythonLexer, IPython3Lexer
8 8 Lexers for pure IPython (python + magic/shell commands)
9 9
10 10 IPythonPartialTracebackLexer, IPythonTracebackLexer
11 11 Supports 2.x and 3.x via keyword `python3`. The partial traceback
12 12 lexer reads everything but the Python code appearing in a traceback.
13 13 The full lexer combines the partial lexer with an IPython lexer.
14 14
15 15 IPythonConsoleLexer
16 16 A lexer for IPython console sessions, with support for tracebacks.
17 17
18 18 IPyLexer
19 19 A friendly lexer which examines the first line of text and from it,
20 20 decides whether to use an IPython lexer or an IPython console lexer.
21 21 This is probably the only lexer that needs to be explicitly added
22 22 to Pygments.
23 23
24 24 """
25 25 #-----------------------------------------------------------------------------
26 26 # Copyright (c) 2013, the IPython Development Team.
27 27 #
28 28 # Distributed under the terms of the Modified BSD License.
29 29 #
30 30 # The full license is in the file COPYING.txt, distributed with this software.
31 31 #-----------------------------------------------------------------------------
32 32
33 33 # Standard library
34 34 import re
35 35
36 36 # Third party
37 37 from pygments.lexers import (
38 38 BashLexer, HtmlLexer, JavascriptLexer, RubyLexer, PerlLexer, PythonLexer,
39 39 Python3Lexer, TexLexer)
40 40 from pygments.lexer import (
41 41 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
42 42 )
43 43 from pygments.token import (
44 44 Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
45 45 )
46 46 from pygments.util import get_bool_opt
47 47
48 48 # Local
49 49
50 50 line_re = re.compile('.*?\n')
51 51
52 52 __all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',
53 53 'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',
54 54 'IPythonConsoleLexer', 'IPyLexer']
55 55
56 56
57 57 def build_ipy_lexer(python3):
58 58 """Builds IPython lexers depending on the value of `python3`.
59 59
60 60 The lexer inherits from an appropriate Python lexer and then adds
61 61 information about IPython specific keywords (i.e. magic commands,
62 62 shell commands, etc.)
63 63
64 64 Parameters
65 65 ----------
66 66 python3 : bool
67 67 If `True`, then build an IPython lexer from a Python 3 lexer.
68 68
69 69 """
70 70 # It would be nice to have a single IPython lexer class which takes
71 71 # a boolean `python3`. But since there are two Python lexer classes,
72 72 # we will also have two IPython lexer classes.
73 73 if python3:
74 74 PyLexer = Python3Lexer
75 75 name = 'IPython3'
76 76 aliases = ['ipython3']
77 77 doc = """IPython3 Lexer"""
78 78 else:
79 79 PyLexer = PythonLexer
80 80 name = 'IPython'
81 81 aliases = ['ipython2', 'ipython']
82 82 doc = """IPython Lexer"""
83 83
84 84 ipython_tokens = [
85 85 (r'(?s)(\s*)(%%capture)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
86 86 (r'(?s)(\s*)(%%debug)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
87 87 (r'(?is)(\s*)(%%html)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(HtmlLexer))),
88 88 (r'(?s)(\s*)(%%javascript)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),
89 89 (r'(?s)(\s*)(%%js)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),
90 90 (r'(?s)(\s*)(%%latex)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(TexLexer))),
91 91 (r'(?s)(\s*)(%%perl)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PerlLexer))),
92 92 (r'(?s)(\s*)(%%prun)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
93 93 (r'(?s)(\s*)(%%pypy)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
94 94 (r'(?s)(\s*)(%%python)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
95 95 (r'(?s)(\s*)(%%python2)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PythonLexer))),
96 96 (r'(?s)(\s*)(%%python3)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(Python3Lexer))),
97 97 (r'(?s)(\s*)(%%ruby)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(RubyLexer))),
98 98 (r'(?s)(\s*)(%%time)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
99 99 (r'(?s)(\s*)(%%timeit)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
100 100 (r'(?s)(\s*)(%%writefile)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
101 101 (r'(?s)(\s*)(%%file)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
102 102 (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
103 103 (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
104 104 (r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),
105 105 (r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),
106 106 (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
107 107 using(BashLexer), Text)),
108 108 (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
109 109 (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
110 110 (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
111 111 (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),
112 112 (r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),
113 113 ]
114 114
115 115 tokens = PyLexer.tokens.copy()
116 116 tokens['root'] = ipython_tokens + tokens['root']
117 117
118 118 attrs = {'name': name, 'aliases': aliases, 'filenames': [],
119 119 '__doc__': doc, 'tokens': tokens}
120 120
121 121 return type(name, (PyLexer,), attrs)
122 122
123 123
124 124 IPython3Lexer = build_ipy_lexer(python3=True)
125 125 IPythonLexer = build_ipy_lexer(python3=False)
126 126
127 127
128 128 class IPythonPartialTracebackLexer(RegexLexer):
129 129 """
130 130 Partial lexer for IPython tracebacks.
131 131
132 132 Handles all the non-python output.
133 133
134 134 """
135 135 name = 'IPython Partial Traceback'
136 136
137 137 tokens = {
138 138 'root': [
139 139 # Tracebacks for syntax errors have a different style.
140 140 # For both types of tracebacks, we mark the first line with
141 141 # Generic.Traceback. For syntax errors, we mark the filename
142 142 # as we mark the filenames for non-syntax tracebacks.
143 143 #
144 144 # These two regexps define how IPythonConsoleLexer finds a
145 145 # traceback.
146 146 #
147 147 ## Non-syntax traceback
148 148 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
149 149 ## Syntax traceback
150 150 (r'^( File)(.*)(, line )(\d+\n)',
151 151 bygroups(Generic.Traceback, Name.Namespace,
152 152 Generic.Traceback, Literal.Number.Integer)),
153 153
154 154 # (Exception Identifier)(Whitespace)(Traceback Message)
155 155 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
156 156 bygroups(Name.Exception, Generic.Whitespace, Text)),
157 157 # (Module/Filename)(Text)(Callee)(Function Signature)
158 158 # Better options for callee and function signature?
159 159 (r'(.*)( in )(.*)(\(.*\)\n)',
160 160 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
161 161 # Regular line: (Whitespace)(Line Number)(Python Code)
162 162 (r'(\s*?)(\d+)(.*?\n)',
163 163 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
164 164 # Emphasized line: (Arrow)(Line Number)(Python Code)
165 165 # Using Exception token so arrow color matches the Exception.
166 166 (r'(-*>?\s?)(\d+)(.*?\n)',
167 167 bygroups(Name.Exception, Literal.Number.Integer, Other)),
168 168 # (Exception Identifier)(Message)
169 169 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
170 170 bygroups(Name.Exception, Text)),
171 171 # Tag everything else as Other, will be handled later.
172 172 (r'.*\n', Other),
173 173 ],
174 174 }
175 175
176 176
177 177 class IPythonTracebackLexer(DelegatingLexer):
178 178 """
179 179 IPython traceback lexer.
180 180
181 181 For doctests, the tracebacks can be snipped as much as desired with the
182 182 exception to the lines that designate a traceback. For non-syntax error
183 183 tracebacks, this is the line of hyphens. For syntax error tracebacks,
184 184 this is the line which lists the File and line number.
185 185
186 186 """
187 187 # The lexer inherits from DelegatingLexer. The "root" lexer is an
188 188 # appropriate IPython lexer, which depends on the value of the boolean
189 189 # `python3`. First, we parse with the partial IPython traceback lexer.
190 190 # Then, any code marked with the "Other" token is delegated to the root
191 191 # lexer.
192 192 #
193 193 name = 'IPython Traceback'
194 194 aliases = ['ipythontb']
195 195
196 196 def __init__(self, **options):
197 """
198 A subclass of `DelegatingLexer` which delegates to the appropriate to either IPyLexer,
199 IPythonPartialTracebackLexer.
200 """
201 # note we need a __init__ doc, as otherwise it inherits the doc from the super class
202 # which will fail the documentation build as it references section of the pygments docs that
203 # do not exists when building IPython's docs.
197 204 self.python3 = get_bool_opt(options, 'python3', False)
198 205 if self.python3:
199 206 self.aliases = ['ipython3tb']
200 207 else:
201 208 self.aliases = ['ipython2tb', 'ipythontb']
202 209
203 210 if self.python3:
204 211 IPyLexer = IPython3Lexer
205 212 else:
206 213 IPyLexer = IPythonLexer
207 214
208 215 DelegatingLexer.__init__(self, IPyLexer,
209 216 IPythonPartialTracebackLexer, **options)
210 217
211 218 class IPythonConsoleLexer(Lexer):
212 219 """
213 220 An IPython console lexer for IPython code-blocks and doctests, such as:
214 221
215 222 .. code-block:: rst
216 223
217 224 .. code-block:: ipythonconsole
218 225
219 226 In [1]: a = 'foo'
220 227
221 228 In [2]: a
222 229 Out[2]: 'foo'
223 230
224 231 In [3]: print(a)
225 232 foo
226 233
227 234
228 235 Support is also provided for IPython exceptions:
229 236
230 237 .. code-block:: rst
231 238
232 239 .. code-block:: ipythonconsole
233 240
234 241 In [1]: raise Exception
235 242 Traceback (most recent call last):
236 243 ...
237 244 Exception
238 245
239 246 """
240 247 name = 'IPython console session'
241 248 aliases = ['ipythonconsole']
242 249 mimetypes = ['text/x-ipython-console']
243 250
244 251 # The regexps used to determine what is input and what is output.
245 252 # The default prompts for IPython are:
246 253 #
247 254 # in = 'In [#]: '
248 255 # continuation = ' .D.: '
249 256 # template = 'Out[#]: '
250 257 #
251 258 # Where '#' is the 'prompt number' or 'execution count' and 'D'
252 259 # D is a number of dots matching the width of the execution count
253 260 #
254 261 in1_regex = r'In \[[0-9]+\]: '
255 262 in2_regex = r' \.\.+\.: '
256 263 out_regex = r'Out\[[0-9]+\]: '
257 264
258 265 #: The regex to determine when a traceback starts.
259 266 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
260 267
261 268 def __init__(self, **options):
262 269 """Initialize the IPython console lexer.
263 270
264 271 Parameters
265 272 ----------
266 273 python3 : bool
267 274 If `True`, then the console inputs are parsed using a Python 3
268 275 lexer. Otherwise, they are parsed using a Python 2 lexer.
269 276 in1_regex : RegexObject
270 277 The compiled regular expression used to detect the start
271 278 of inputs. Although the IPython configuration setting may have a
272 279 trailing whitespace, do not include it in the regex. If `None`,
273 280 then the default input prompt is assumed.
274 281 in2_regex : RegexObject
275 282 The compiled regular expression used to detect the continuation
276 283 of inputs. Although the IPython configuration setting may have a
277 284 trailing whitespace, do not include it in the regex. If `None`,
278 285 then the default input prompt is assumed.
279 286 out_regex : RegexObject
280 287 The compiled regular expression used to detect outputs. If `None`,
281 288 then the default output prompt is assumed.
282 289
283 290 """
284 291 self.python3 = get_bool_opt(options, 'python3', False)
285 292 if self.python3:
286 293 self.aliases = ['ipython3console']
287 294 else:
288 295 self.aliases = ['ipython2console', 'ipythonconsole']
289 296
290 297 in1_regex = options.get('in1_regex', self.in1_regex)
291 298 in2_regex = options.get('in2_regex', self.in2_regex)
292 299 out_regex = options.get('out_regex', self.out_regex)
293 300
294 301 # So that we can work with input and output prompts which have been
295 302 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
296 303 # we do not do this, then such prompts will be tagged as 'output'.
297 304 # The reason can't just use the rstrip'd variants instead is because
298 305 # we want any whitespace associated with the prompt to be inserted
299 306 # with the token. This allows formatted code to be modified so as hide
300 307 # the appearance of prompts, with the whitespace included. One example
301 308 # use of this is in copybutton.js from the standard lib Python docs.
302 309 in1_regex_rstrip = in1_regex.rstrip() + '\n'
303 310 in2_regex_rstrip = in2_regex.rstrip() + '\n'
304 311 out_regex_rstrip = out_regex.rstrip() + '\n'
305 312
306 313 # Compile and save them all.
307 314 attrs = ['in1_regex', 'in2_regex', 'out_regex',
308 315 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
309 316 for attr in attrs:
310 317 self.__setattr__(attr, re.compile(locals()[attr]))
311 318
312 319 Lexer.__init__(self, **options)
313 320
314 321 if self.python3:
315 322 pylexer = IPython3Lexer
316 323 tblexer = IPythonTracebackLexer
317 324 else:
318 325 pylexer = IPythonLexer
319 326 tblexer = IPythonTracebackLexer
320 327
321 328 self.pylexer = pylexer(**options)
322 329 self.tblexer = tblexer(**options)
323 330
324 331 self.reset()
325 332
326 333 def reset(self):
327 334 self.mode = 'output'
328 335 self.index = 0
329 336 self.buffer = u''
330 337 self.insertions = []
331 338
332 339 def buffered_tokens(self):
333 340 """
334 341 Generator of unprocessed tokens after doing insertions and before
335 342 changing to a new state.
336 343
337 344 """
338 345 if self.mode == 'output':
339 346 tokens = [(0, Generic.Output, self.buffer)]
340 347 elif self.mode == 'input':
341 348 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
342 349 else: # traceback
343 350 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
344 351
345 352 for i, t, v in do_insertions(self.insertions, tokens):
346 353 # All token indexes are relative to the buffer.
347 354 yield self.index + i, t, v
348 355
349 356 # Clear it all
350 357 self.index += len(self.buffer)
351 358 self.buffer = u''
352 359 self.insertions = []
353 360
354 361 def get_mci(self, line):
355 362 """
356 363 Parses the line and returns a 3-tuple: (mode, code, insertion).
357 364
358 365 `mode` is the next mode (or state) of the lexer, and is always equal
359 366 to 'input', 'output', or 'tb'.
360 367
361 368 `code` is a portion of the line that should be added to the buffer
362 369 corresponding to the next mode and eventually lexed by another lexer.
363 370 For example, `code` could be Python code if `mode` were 'input'.
364 371
365 372 `insertion` is a 3-tuple (index, token, text) representing an
366 373 unprocessed "token" that will be inserted into the stream of tokens
367 374 that are created from the buffer once we change modes. This is usually
368 375 the input or output prompt.
369 376
370 377 In general, the next mode depends on current mode and on the contents
371 378 of `line`.
372 379
373 380 """
374 381 # To reduce the number of regex match checks, we have multiple
375 382 # 'if' blocks instead of 'if-elif' blocks.
376 383
377 384 # Check for possible end of input
378 385 in2_match = self.in2_regex.match(line)
379 386 in2_match_rstrip = self.in2_regex_rstrip.match(line)
380 387 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
381 388 in2_match_rstrip:
382 389 end_input = True
383 390 else:
384 391 end_input = False
385 392 if end_input and self.mode != 'tb':
386 393 # Only look for an end of input when not in tb mode.
387 394 # An ellipsis could appear within the traceback.
388 395 mode = 'output'
389 396 code = u''
390 397 insertion = (0, Generic.Prompt, line)
391 398 return mode, code, insertion
392 399
393 400 # Check for output prompt
394 401 out_match = self.out_regex.match(line)
395 402 out_match_rstrip = self.out_regex_rstrip.match(line)
396 403 if out_match or out_match_rstrip:
397 404 mode = 'output'
398 405 if out_match:
399 406 idx = out_match.end()
400 407 else:
401 408 idx = out_match_rstrip.end()
402 409 code = line[idx:]
403 410 # Use the 'heading' token for output. We cannot use Generic.Error
404 411 # since it would conflict with exceptions.
405 412 insertion = (0, Generic.Heading, line[:idx])
406 413 return mode, code, insertion
407 414
408 415
409 416 # Check for input or continuation prompt (non stripped version)
410 417 in1_match = self.in1_regex.match(line)
411 418 if in1_match or (in2_match and self.mode != 'tb'):
412 419 # New input or when not in tb, continued input.
413 420 # We do not check for continued input when in tb since it is
414 421 # allowable to replace a long stack with an ellipsis.
415 422 mode = 'input'
416 423 if in1_match:
417 424 idx = in1_match.end()
418 425 else: # in2_match
419 426 idx = in2_match.end()
420 427 code = line[idx:]
421 428 insertion = (0, Generic.Prompt, line[:idx])
422 429 return mode, code, insertion
423 430
424 431 # Check for input or continuation prompt (stripped version)
425 432 in1_match_rstrip = self.in1_regex_rstrip.match(line)
426 433 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
427 434 # New input or when not in tb, continued input.
428 435 # We do not check for continued input when in tb since it is
429 436 # allowable to replace a long stack with an ellipsis.
430 437 mode = 'input'
431 438 if in1_match_rstrip:
432 439 idx = in1_match_rstrip.end()
433 440 else: # in2_match
434 441 idx = in2_match_rstrip.end()
435 442 code = line[idx:]
436 443 insertion = (0, Generic.Prompt, line[:idx])
437 444 return mode, code, insertion
438 445
439 446 # Check for traceback
440 447 if self.ipytb_start.match(line):
441 448 mode = 'tb'
442 449 code = line
443 450 insertion = None
444 451 return mode, code, insertion
445 452
446 453 # All other stuff...
447 454 if self.mode in ('input', 'output'):
448 455 # We assume all other text is output. Multiline input that
449 456 # does not use the continuation marker cannot be detected.
450 457 # For example, the 3 in the following is clearly output:
451 458 #
452 459 # In [1]: print 3
453 460 # 3
454 461 #
455 462 # But the following second line is part of the input:
456 463 #
457 464 # In [2]: while True:
458 465 # print True
459 466 #
460 467 # In both cases, the 2nd line will be 'output'.
461 468 #
462 469 mode = 'output'
463 470 else:
464 471 mode = 'tb'
465 472
466 473 code = line
467 474 insertion = None
468 475
469 476 return mode, code, insertion
470 477
471 478 def get_tokens_unprocessed(self, text):
472 479 self.reset()
473 480 for match in line_re.finditer(text):
474 481 line = match.group()
475 482 mode, code, insertion = self.get_mci(line)
476 483
477 484 if mode != self.mode:
478 485 # Yield buffered tokens before transitioning to new mode.
479 486 for token in self.buffered_tokens():
480 487 yield token
481 488 self.mode = mode
482 489
483 490 if insertion:
484 491 self.insertions.append((len(self.buffer), [insertion]))
485 492 self.buffer += code
486 493
487 494 for token in self.buffered_tokens():
488 495 yield token
489 496
490 497 class IPyLexer(Lexer):
491 498 r"""
492 499 Primary lexer for all IPython-like code.
493 500
494 501 This is a simple helper lexer. If the first line of the text begins with
495 502 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
496 503 lexer. If not, then the entire text is parsed with an IPython lexer.
497 504
498 505 The goal is to reduce the number of lexers that are registered
499 506 with Pygments.
500 507
501 508 """
502 509 name = 'IPy session'
503 510 aliases = ['ipy']
504 511
505 512 def __init__(self, **options):
513 """
514 Create a new IPyLexer instance which dispatch to either an
515 IPythonCOnsoleLexer (if In prompts are present) or and IPythonLexer (if
516 In prompts are not present).
517 """
518 # init docstring is necessary for docs not to fail to build do to parent
519 # docs referenceing a section in pygments docs.
506 520 self.python3 = get_bool_opt(options, 'python3', False)
507 521 if self.python3:
508 522 self.aliases = ['ipy3']
509 523 else:
510 524 self.aliases = ['ipy2', 'ipy']
511 525
512 526 Lexer.__init__(self, **options)
513 527
514 528 self.IPythonLexer = IPythonLexer(**options)
515 529 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
516 530
517 531 def get_tokens_unprocessed(self, text):
518 532 # Search for the input prompt anywhere...this allows code blocks to
519 533 # begin with comments as well.
520 534 if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
521 535 lex = self.IPythonConsoleLexer
522 536 else:
523 537 lex = self.IPythonLexer
524 538 for token in lex.get_tokens_unprocessed(text):
525 539 yield token
526 540
General Comments 0
You need to be logged in to leave comments. Login now