##// END OF EJS Templates
Clean up aliases for lexers.
chebee7i -
Show More
@@ -1,494 +1,507
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 Defines a variety of Pygments lexers for highlighting IPython code.
4 4
5 5 This includes:
6 6
7 7 IPythonLexer
8 8 IPython3Lexer
9 9 Lexers for pure IPython (python + magic/shell commands)
10 10
11 11 IPythonPartialTracebackLexer
12 12 IPythonTracebackLexer
13 13 Supports 2.x and 3.x via keyword `python3`. The partial traceback
14 14 lexer reads everything but the Python code appearing in a traceback.
15 15 The full lexer combines the partial lexer with an IPython lexer.
16 16
17 17 IPythonConsoleLexer
18 18 A lexer for IPython console sessions, with support for tracebacks.
19 19
20 20 IPyLexer
21 21 A friendly lexer which examines the first line of text and from it,
22 22 decides whether to use an IPython lexer or an IPython console lexer.
23 23 This is probably the only lexer that needs to be explicitly added
24 24 to Pygments.
25 25
26 26 """
27 27 #-----------------------------------------------------------------------------
28 28 # Copyright (c) 2013, the IPython Development Team.
29 29 #
30 30 # Distributed under the terms of the Modified BSD License.
31 31 #
32 32 # The full license is in the file COPYING.txt, distributed with this software.
33 33 #-----------------------------------------------------------------------------
34 34
35 35 # Standard library
36 36 import re
37 37
38 38 # Third party
39 39 from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
40 40 from pygments.lexer import (
41 41 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
42 42 )
43 43 from pygments.token import (
44 44 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
45 45 )
46 46 from pygments.util import get_bool_opt
47 47
48 48 # Local
49 49 from IPython.testing.skipdoctest import skip_doctest
50 50
51 51 line_re = re.compile('.*?\n')
52 52
53 53 ipython_tokens = [
54 54 (r'(\%+)(\w+)\s+(\.*)(\n)', bygroups(Operator, Keyword,
55 55 using(BashLexer), Text)),
56 56 (r'(\%+)(\w+)\b', bygroups(Operator, Keyword)),
57 57 (r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
58 58 ]
59 59
60 60 def build_ipy_lexer(python3):
61 61 """Builds IPython lexers depending on the value of `python3`.
62 62
63 63 The lexer inherits from an appropriate Python lexer and then adds
64 64 information about IPython specific keywords (i.e. magic commands,
65 65 shell commands, etc.)
66 66
67 67 Parameters
68 68 ----------
69 69 python3 : bool
70 70 If `True`, then build an IPython lexer from a Python 3 lexer.
71 71
72 72 """
73 73 # It would be nice to have a single IPython lexer class which takes
74 74 # a boolean `python3`. But since there are two Python lexer classes,
75 75 # we will also have two IPython lexer classes.
76 76 if python3:
77 77 PyLexer = Python3Lexer
78 78 clsname = 'IPython3Lexer'
79 79 name = 'IPython3'
80 80 aliases = ['ipython3']
81 81 doc = """IPython3 Lexer"""
82 82 else:
83 83 PyLexer = PythonLexer
84 84 clsname = 'IPythonLexer'
85 85 name = 'IPython'
86 aliases = ['ipython']
86 aliases = ['ipython2', 'ipython']
87 87 doc = """IPython Lexer"""
88 88
89 89 tokens = PyLexer.tokens.copy()
90 90 tokens['root'] = ipython_tokens + tokens['root']
91 91
92 92 attrs = {'name': name, 'aliases': aliases,
93 93 '__doc__': doc, 'tokens': tokens}
94 94
95 95 return type(name, (PyLexer,), attrs)
96 96
97 97
98 98 IPython3Lexer = build_ipy_lexer(python3=True)
99 99 IPythonLexer = build_ipy_lexer(python3=False)
100 100
101 101
102 102 class IPythonPartialTracebackLexer(RegexLexer):
103 103 """
104 104 Partial lexer for IPython tracebacks.
105 105
106 106 Handles all the non-python output. This works for both Python 2.x and 3.x.
107 107
108 108 """
109 109 name = 'IPython Partial Traceback'
110 110
111 111 tokens = {
112 112 'root': [
113 113 # Tracebacks for syntax errors have a different style.
114 114 # For both types of tracebacks, we mark the first line with
115 115 # Generic.Traceback. For syntax errors, we mark the filename
116 116 # as we mark the filenames for non-syntax tracebacks.
117 117 #
118 118 # These two regexps define how IPythonConsoleLexer finds a
119 119 # traceback.
120 120 #
121 121 ## Non-syntax traceback
122 122 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
123 123 ## Syntax traceback
124 124 (r'^( File)(.*)(, line )(\d+\n)',
125 125 bygroups(Generic.Traceback, Name.Namespace,
126 126 Generic.Traceback, Literal.Number.Integer)),
127 127
128 128 # (Exception Identifier)(Whitespace)(Traceback Message)
129 129 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
130 130 bygroups(Name.Exception, Generic.Whitespace, Text)),
131 131 # (Module/Filename)(Text)(Callee)(Function Signature)
132 132 # Better options for callee and function signature?
133 133 (r'(.*)( in )(.*)(\(.*\)\n)',
134 134 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
135 135 # Regular line: (Whitespace)(Line Number)(Python Code)
136 136 (r'(\s*?)(\d+)(.*?\n)',
137 137 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
138 138 # Emphasized line: (Arrow)(Line Number)(Python Code)
139 139 # Using Exception token so arrow color matches the Exception.
140 140 (r'(-*>?\s?)(\d+)(.*?\n)',
141 141 bygroups(Name.Exception, Literal.Number.Integer, Other)),
142 142 # (Exception Identifier)(Message)
143 143 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
144 144 bygroups(Name.Exception, Text)),
145 145 # Tag everything else as Other, will be handled later.
146 146 (r'.*\n', Other),
147 147 ],
148 148 }
149 149
150 150
151 151 class IPythonTracebackLexer(DelegatingLexer):
152 152 """
153 153 IPython traceback lexer.
154 154
155 155 For doctests, the tracebacks can be snipped as much as desired with the
156 156 exception to the lines that designate a traceback. For non-syntax error
157 157 tracebacks, this is the line of hyphens. For syntax error tracebacks,
158 158 this is the line which lists the File and line number.
159 159
160 160 """
161 161 # The lexer inherits from DelegatingLexer. The "root" lexer is an
162 162 # appropriate IPython lexer, which depends on the value of the boolean
163 163 # `python3`. First, we parse with the partial IPython traceback lexer.
164 164 # Then, any code marked with the "Other" token is delegated to the root
165 165 # lexer.
166 166 #
167 167 name = 'IPython Traceback'
168 168 aliases = ['ipythontb']
169 169
170 170 def __init__(self, **options):
171 171 self.python3 = get_bool_opt(options, 'python3', False)
172 if self.python3:
173 self.aliases = ['ipythontb3']
174 else:
175 self.aliases = ['ipythontb2', 'ipythontb']
172 176
173 177 if self.python3:
174 178 IPyLexer = IPython3Lexer
175 179 else:
176 180 IPyLexer = IPythonLexer
177 181
178 182 DelegatingLexer.__init__(self, IPyLexer,
179 183 IPythonPartialTracebackLexer, **options)
180 184
181 185 @skip_doctest
182 186 class IPythonConsoleLexer(Lexer):
183 187 """
184 188 An IPython console lexer for IPython code-blocks and doctests, such as:
185 189
186 190 .. code-block:: rst
187 191
188 192 .. code-block:: ipythoncon
189 193
190 194 In [1]: a = 'foo'
191 195
192 196 In [2]: a
193 197 Out[2]: 'foo'
194 198
195 199 In [3]: print a
196 200 foo
197 201
198 202 In [4]: 1 / 0
199 203
200 204
201 205 Support is also provided for IPython exceptions:
202 206
203 207 .. code-block:: rst
204 208
205 209 .. code-block:: ipythoncon
206 210
207 211 In [1]: raise Exception
208 212 ---------------------------------------------------------------------------
209 213 Exception Traceback (most recent call last)
210 214 <ipython-input-1-fca2ab0ca76b> in <module>()
211 215 ----> 1 raise Exception
212 216
213 217 Exception:
214 218
215 219 """
216 220 name = 'IPython console session'
217 221 aliases = ['ipythoncon']
218 222 mimetypes = ['text/x-ipython-console']
219 223
220 224 # The regexps used to determine what is input and what is output.
221 225 # The default prompts for IPython are:
222 226 #
223 227 # c.PromptManager.in_template = 'In [\#]: '
224 228 # c.PromptManager.in2_template = ' .\D.: '
225 229 # c.PromptManager.out_template = 'Out[\#]: '
226 230 #
227 231 in1_regex = r'In \[[0-9]+\]: '
228 232 in2_regex = r' \.\.+\.: '
229 233 out_regex = r'Out\[[0-9]+\]: '
230 234
231 235 #: The regex to determine when a traceback starts.
232 236 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
233 237
234 238 def __init__(self, **options):
235 239 """Initialize the IPython console lexer.
236 240
237 241 Parameters
238 242 ----------
239 243 python3 : bool
240 244 If `True`, then the console inputs are parsed using a Python 3
241 245 lexer. Otherwise, they are parsed using a Python 2 lexer.
242 246 in1_regex : RegexObject
243 247 The compiled regular expression used to detect the start
244 248 of inputs. Although the IPython configuration setting may have a
245 249 trailing whitespace, do not include it in the regex. If `None`,
246 250 then the default input prompt is assumed.
247 251 in2_regex : RegexObject
248 252 The compiled regular expression used to detect the continuation
249 253 of inputs. Although the IPython configuration setting may have a
250 254 trailing whitespace, do not include it in the regex. If `None`,
251 255 then the default input prompt is assumed.
252 256 out_regex : RegexObject
253 257 The compiled regular expression used to detect outputs. If `None`,
254 258 then the default output prompt is assumed.
255 259
256 260 """
257 261 self.python3 = get_bool_opt(options, 'python3', False)
262 if self.python3:
263 self.aliases = ['ipythoncon3']
264 else:
265 self.aliases = ['ipythoncon2', 'ipythoncon']
258 266
259 267 in1_regex = options.get('in1_regex', self.in1_regex)
260 268 in2_regex = options.get('in2_regex', self.in2_regex)
261 269 out_regex = options.get('out_regex', self.out_regex)
262 270
263 271 # So that we can work with input and output prompts which have been
264 272 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
265 273 # we do not do this, then such prompts will be tagged as 'output'.
266 274 # The reason can't just use the rstrip'd variants instead is because
267 275 # we want any whitespace associated with the prompt to be inserted
268 276 # with the token. This allows formatted code to be modified so as hide
269 277 # the appearance of prompts, with the whitespace included. One example
270 278 # use of this is in copybutton.js from the standard lib Python docs.
271 279 in1_regex_rstrip = in1_regex.rstrip() + '\n'
272 280 in2_regex_rstrip = in2_regex.rstrip() + '\n'
273 281 out_regex_rstrip = out_regex.rstrip() + '\n'
274 282
275 283 # Compile and save them all.
276 284 attrs = ['in1_regex', 'in2_regex', 'out_regex',
277 285 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
278 286 for attr in attrs:
279 287 self.__setattr__(attr, re.compile(locals()[attr]))
280 288
281 289 Lexer.__init__(self, **options)
282 290
283 291 if self.python3:
284 292 pylexer = IPython3Lexer
285 293 tblexer = IPythonTracebackLexer
286 294 else:
287 295 pylexer = IPythonLexer
288 296 tblexer = IPythonTracebackLexer
289 297
290 298 self.pylexer = pylexer(**options)
291 299 self.tblexer = tblexer(**options)
292 300
293 301 self.reset()
294 302
295 303 def reset(self):
296 304 self.mode = 'output'
297 305 self.index = 0
298 306 self.buffer = u''
299 307 self.insertions = []
300 308
301 309 def buffered_tokens(self):
302 310 """
303 311 Generator of unprocessed tokens after doing insertions and before
304 312 changing to a new state.
305 313
306 314 """
307 315 if self.mode == 'output':
308 316 tokens = [(0, Generic.Output, self.buffer)]
309 317 elif self.mode == 'input':
310 318 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
311 319 else: # traceback
312 320 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
313 321
314 322 for i, t, v in do_insertions(self.insertions, tokens):
315 323 # All token indexes are relative to the buffer.
316 324 yield self.index + i, t, v
317 325
318 326 # Clear it all
319 327 self.index += len(self.buffer)
320 328 self.buffer = u''
321 329 self.insertions = []
322 330
323 331 def get_mci(self, line):
324 332 """
325 333 Parses the line and returns a 3-tuple: (mode, code, insertion).
326 334
327 335 `mode` is the next mode (or state) of the lexer, and is always equal
328 336 to 'input', 'output', or 'tb'.
329 337
330 338 `code` is a portion of the line that should be added to the buffer
331 339 corresponding to the next mode and eventually lexed by another lexer.
332 340 For example, `code` could be Python code if `mode` were 'input'.
333 341
334 342 `insertion` is a 3-tuple (index, token, text) representing an
335 343 unprocessed "token" that will be inserted into the stream of tokens
336 344 that are created from the buffer once we change modes. This is usually
337 345 the input or output prompt.
338 346
339 347 In general, the next mode depends on current mode and on the contents
340 348 of `line`.
341 349
342 350 """
343 351 # To reduce the number of regex match checks, we have multiple
344 352 # 'if' blocks instead of 'if-elif' blocks.
345 353
346 354 ### Check for possible end of input
347 355 ###
348 356 in2_match = self.in2_regex.match(line)
349 357 in2_match_rstrip = self.in2_regex_rstrip.match(line)
350 358 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
351 359 in2_match_rstrip:
352 360 end_input = True
353 361 else:
354 362 end_input = False
355 363 if end_input and self.mode != 'tb':
356 364 # Only look for an end of input when not in tb mode.
357 365 # An ellipsis could appear within the traceback.
358 366 mode = 'output'
359 367 code = u''
360 368 insertion = (0, Generic.Prompt, line)
361 369 return mode, code, insertion
362 370
363 371 ### Check for output prompt
364 372 ###
365 373 out_match = self.out_regex.match(line)
366 374 out_match_rstrip = self.out_regex_rstrip.match(line)
367 375 if out_match or out_match_rstrip:
368 376 mode = 'output'
369 377 if out_match:
370 378 idx = out_match.end()
371 379 else:
372 380 idx = out_match_rstrip.end()
373 381 code = line[idx:]
374 382 # Use the 'heading' token for output. We cannot use Generic.Error
375 383 # since it would conflict with exceptions.
376 384 insertion = (0, Generic.Heading, line[:idx])
377 385 return mode, code, insertion
378 386
379 387
380 388 ### Check for input or continuation prompt (non stripped version)
381 389 ###
382 390 in1_match = self.in1_regex.match(line)
383 391 if in1_match or (in2_match and self.mode != 'tb'):
384 392 # New input or when not in tb, continued input.
385 393 # We do not check for continued input when in tb since it is
386 394 # allowable to replace a long stack with an ellipsis.
387 395 mode = 'input'
388 396 if in1_match:
389 397 idx = in1_match.end()
390 398 else: # in2_match
391 399 idx = in2_match.end()
392 400 code = line[idx:]
393 401 insertion = (0, Generic.Prompt, line[:idx])
394 402 return mode, code, insertion
395 403
396 404 ### Check for input or continuation prompt (stripped version)
397 405 ###
398 406 in1_match_rstrip = self.in1_regex_rstrip.match(line)
399 407 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
400 408 # New input or when not in tb, continued input.
401 409 # We do not check for continued input when in tb since it is
402 410 # allowable to replace a long stack with an ellipsis.
403 411 mode = 'input'
404 412 if in1_match_rstrip:
405 413 idx = in1_match_rstrip.end()
406 414 else: # in2_match
407 415 idx = in2_match_rstrip.end()
408 416 code = line[idx:]
409 417 insertion = (0, Generic.Prompt, line[:idx])
410 418 return mode, code, insertion
411 419
412 420 ### Check for traceback
413 421 ###
414 422 if self.ipytb_start.match(line):
415 423 mode = 'tb'
416 424 code = line
417 425 insertion = None
418 426 return mode, code, insertion
419 427
420 428 ### All other stuff...
421 429 ###
422 430 if self.mode in ('input', 'output'):
423 431 # We assume all other text is output. Multiline input that
424 432 # does not use the continuation marker cannot be detected.
425 433 # For example, the 3 in the following is clearly output:
426 434 #
427 435 # In [1]: print 3
428 436 # 3
429 437 #
430 438 # But the following second line is part of the input:
431 439 #
432 440 # In [2]: while True:
433 441 # print True
434 442 #
435 443 # In both cases, the 2nd line will be 'output'.
436 444 #
437 445 mode = 'output'
438 446 else:
439 447 mode = 'tb'
440 448
441 449 code = line
442 450 insertion = None
443 451
444 452 return mode, code, insertion
445 453
446 454 def get_tokens_unprocessed(self, text):
447 455 self.reset()
448 456 for match in line_re.finditer(text):
449 457 line = match.group()
450 458 mode, code, insertion = self.get_mci(line)
451 459
452 460 if mode != self.mode:
453 461 # Yield buffered tokens before transitioning to new mode.
454 462 for token in self.buffered_tokens():
455 463 yield token
456 464 self.mode = mode
457 465
458 466 if insertion:
459 467 self.insertions.append((len(self.buffer), [insertion]))
460 468 self.buffer += code
461 469 else:
462 470 for token in self.buffered_tokens():
463 471 yield token
464 472
465 473 class IPyLexer(Lexer):
466 474 """
467 475 Primary lexer for all IPython-like code.
468 476
469 477 This is a simple helper lexer. If the first line of the text begins with
470 478 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
471 479 lexer. If not, then the entire text is parsed with an IPython lexer.
472 480
473 481 The goal is to reduce the number of lexers that are registered
474 482 with Pygments.
475 483
476 484 """
477 485 name = 'IPy session'
478 486 aliases = ['ipy']
479 487
480 488 def __init__(self, **options):
481 489 self.python3 = get_bool_opt(options, 'python3', False)
490 if self.python3:
491 self.aliases = ['ipy3']
492 else:
493 self.aliases = ['ipy2', 'ipy']
494
482 495 Lexer.__init__(self, **options)
483 496
484 497 self.IPythonLexer = IPythonLexer(**options)
485 498 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
486 499
487 500 def get_tokens_unprocessed(self, text):
488 501 if re.match(r'(In \[[0-9]+\]:)', text.strip()):
489 502 lex = self.IPythonConsoleLexer
490 503 else:
491 504 lex = self.IPythonLexer
492 505 for token in lex.get_tokens_unprocessed(text):
493 506 yield token
494 507
@@ -1,27 +1,27
1 1 """
2 2 reST directive for syntax-highlighting ipython interactive sessions.
3 3
4 4 """
5 5
6 6 from sphinx import highlighting
7 7 from ..nbconvert.utils.lexers import IPyLexer
8 8
9 9 def setup(app):
10 10 """Setup as a sphinx extension."""
11 11
12 12 # This is only a lexer, so adding it below to pygments appears sufficient.
13 13 # But if somebody knows what the right API usage should be to do that via
14 14 # sphinx, by all means fix it here. At least having this setup.py
15 15 # suppresses the sphinx warning we'd get without it.
16 16 pass
17 17
18 18 # Register the extension as a valid pygments lexer.
19 19 # Alternatively, we could register the lexer with pygments instead. This would
20 20 # require using setuptools entrypoints: http://pygments.org/docs/plugins
21 21
22 ipy = IPyLexer(python3=False)
22 ipy2 = IPyLexer(python3=False)
23 23 ipy3 = IPyLexer(python3=True)
24 ipy3.aliases = ['ipy3']
25 24
26 highlighting.lexers['ipython'] = ipy
25 highlighting.lexers['ipython'] = ipy2
26 highlighting.lexers['ipython2'] = ipy2
27 27 highlighting.lexers['ipython3'] = ipy3
General Comments 0
You need to be logged in to leave comments. Login now