##// END OF EJS Templates
More updates to comments and docstrings.
chebee7i -
Show More
@@ -1,483 +1,494 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 Defines a variety of Pygments lexers for highlighting IPython code.
4 4
5 5 This includes:
6 6
7 7 IPythonLexer
8 8 IPython3Lexer
9 9 Lexers for pure IPython (python + magic/shell commands)
10 10
11 11 IPythonPartialTracebackLexer
12 12 IPythonTracebackLexer
13 13 Supports 2.x and 3.x via keyword `python3`. The partial traceback
14 14 lexer reads everything but the Python code appearing in a traceback.
15 15 The full lexer combines the partial lexer with an IPython lexer.
16 16
17 17 IPythonConsoleLexer
18 18 A lexer for IPython console sessions, with support for tracebacks.
19 19
20 20 IPyLexer
21 21 A friendly lexer which examines the first line of text and from it,
22 22 decides whether to use an IPython lexer or an IPython console lexer.
23 23 This is probably the only lexer that needs to be explicitly added
24 24 to Pygments.
25 25
26 26 """
27 27 #-----------------------------------------------------------------------------
28 28 # Copyright (c) 2013, the IPython Development Team.
29 29 #
30 30 # Distributed under the terms of the Modified BSD License.
31 31 #
32 32 # The full license is in the file COPYING.txt, distributed with this software.
33 33 #-----------------------------------------------------------------------------
34 34
35 35 # Standard library
36 36 import re
37 37
38 38 # Third party
39 39 from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
40 40 from pygments.lexer import (
41 41 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
42 42 )
43 43 from pygments.token import (
44 44 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
45 45 )
46 46 from pygments.util import get_bool_opt
47 47
48 48 # Local
49 49 from IPython.testing.skipdoctest import skip_doctest
50 50
51 51 line_re = re.compile('.*?\n')
52 52
53 53 ipython_tokens = [
54 54 (r'(\%+)(\w+)\s+(\.*)(\n)', bygroups(Operator, Keyword,
55 55 using(BashLexer), Text)),
56 56 (r'(\%+)(\w+)\b', bygroups(Operator, Keyword)),
57 57 (r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
58 58 ]
59 59
60 60 def build_ipy_lexer(python3):
61 61 """Builds IPython lexers depending on the value of `python3`.
62 62
63 63 The lexer inherits from an appropriate Python lexer and then adds
64 64 information about IPython specific keywords (i.e. magic commands,
65 65 shell commands, etc.)
66 66
67 67 Parameters
68 68 ----------
69 69 python3 : bool
70 70 If `True`, then build an IPython lexer from a Python 3 lexer.
71 71
72 72 """
73 73 # It would be nice to have a single IPython lexer class which takes
74 74 # a boolean `python3`. But since there are two Python lexer classes,
75 75 # we will also have two IPython lexer classes.
76 76 if python3:
77 77 PyLexer = Python3Lexer
78 78 clsname = 'IPython3Lexer'
79 79 name = 'IPython3'
80 80 aliases = ['ipython3']
81 81 doc = """IPython3 Lexer"""
82 82 else:
83 83 PyLexer = PythonLexer
84 84 clsname = 'IPythonLexer'
85 85 name = 'IPython'
86 86 aliases = ['ipython']
87 87 doc = """IPython Lexer"""
88 88
89 89 tokens = PyLexer.tokens.copy()
90 90 tokens['root'] = ipython_tokens + tokens['root']
91 91
92 92 attrs = {'name': name, 'aliases': aliases,
93 93 '__doc__': doc, 'tokens': tokens}
94 94
95 95 return type(name, (PyLexer,), attrs)
96 96
97 97
98 98 IPython3Lexer = build_ipy_lexer(python3=True)
99 99 IPythonLexer = build_ipy_lexer(python3=False)
100 100
101 101
102 102 class IPythonPartialTracebackLexer(RegexLexer):
103 103 """
104 104 Partial lexer for IPython tracebacks.
105 105
106 106 Handles all the non-python output. This works for both Python 2.x and 3.x.
107 107
108 108 """
109 109 name = 'IPython Partial Traceback'
110 110
111 111 tokens = {
112 112 'root': [
113 113 # Tracebacks for syntax errors have a different style.
114 114 # For both types of tracebacks, we mark the first line with
115 115 # Generic.Traceback. For syntax errors, we mark the filename
116 116 # as we mark the filenames for non-syntax tracebacks.
117 117 #
118 118 # These two regexps define how IPythonConsoleLexer finds a
119 119 # traceback.
120 120 #
121 121 ## Non-syntax traceback
122 122 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
123 123 ## Syntax traceback
124 124 (r'^( File)(.*)(, line )(\d+\n)',
125 125 bygroups(Generic.Traceback, Name.Namespace,
126 126 Generic.Traceback, Literal.Number.Integer)),
127 127
128 128 # (Exception Identifier)(Whitespace)(Traceback Message)
129 129 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
130 130 bygroups(Name.Exception, Generic.Whitespace, Text)),
131 131 # (Module/Filename)(Text)(Callee)(Function Signature)
132 132 # Better options for callee and function signature?
133 133 (r'(.*)( in )(.*)(\(.*\)\n)',
134 134 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
135 135 # Regular line: (Whitespace)(Line Number)(Python Code)
136 136 (r'(\s*?)(\d+)(.*?\n)',
137 137 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
138 138 # Emphasized line: (Arrow)(Line Number)(Python Code)
139 139 # Using Exception token so arrow color matches the Exception.
140 140 (r'(-*>?\s?)(\d+)(.*?\n)',
141 141 bygroups(Name.Exception, Literal.Number.Integer, Other)),
142 142 # (Exception Identifier)(Message)
143 143 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
144 144 bygroups(Name.Exception, Text)),
145 145 # Tag everything else as Other, will be handled later.
146 146 (r'.*\n', Other),
147 147 ],
148 148 }
149 149
150 150
151 151 class IPythonTracebackLexer(DelegatingLexer):
152 152 """
153 153 IPython traceback lexer.
154 154
155 155 For doctests, the tracebacks can be snipped as much as desired with the
156 156 exception to the lines that designate a traceback. For non-syntax error
157 157 tracebacks, this is the line of hyphens. For syntax error tracebacks,
158 158 this is the line which lists the File and line number.
159 159
160 160 """
161 161 # The lexer inherits from DelegatingLexer. The "root" lexer is an
162 162 # appropriate IPython lexer, which depends on the value of the boolean
163 163 # `python3`. First, we parse with the partial IPython traceback lexer.
164 164 # Then, any code marked with the "Other" token is delegated to the root
165 165 # lexer.
166 166 #
167 167 name = 'IPython Traceback'
168 168 aliases = ['ipythontb']
169 169
170 170 def __init__(self, **options):
171 171 self.python3 = get_bool_opt(options, 'python3', False)
172 172
173 173 if self.python3:
174 174 IPyLexer = IPython3Lexer
175 175 else:
176 176 IPyLexer = IPythonLexer
177 177
178 178 DelegatingLexer.__init__(self, IPyLexer,
179 179 IPythonPartialTracebackLexer, **options)
180 180
181 181 @skip_doctest
182 182 class IPythonConsoleLexer(Lexer):
183 183 """
184 184 An IPython console lexer for IPython code-blocks and doctests, such as:
185 185
186 186 .. code-block:: rst
187 187
188 188 .. code-block:: ipythoncon
189 189
190 190 In [1]: a = 'foo'
191 191
192 192 In [2]: a
193 193 Out[2]: 'foo'
194 194
195 195 In [3]: print a
196 196 foo
197 197
198 198 In [4]: 1 / 0
199 199
200 200
201 201 Support is also provided for IPython exceptions:
202 202
203 203 .. code-block:: rst
204 204
205 205 .. code-block:: ipythoncon
206 206
207 207 In [1]: raise Exception
208 208 ---------------------------------------------------------------------------
209 209 Exception Traceback (most recent call last)
210 210 <ipython-input-1-fca2ab0ca76b> in <module>()
211 211 ----> 1 raise Exception
212 212
213 213 Exception:
214 214
215 215 """
216 216 name = 'IPython console session'
217 217 aliases = ['ipythoncon']
218 218 mimetypes = ['text/x-ipython-console']
219 219
220 # The regexps used to determine what is input and what is output. The
221 # input regex should be consistent with and also be the combination of
222 # the values of the `in_template` and `in2_templates`. For example, the
223 # defaults prompts are:
220 # The regexps used to determine what is input and what is output.
221 # The default prompts for IPython are:
224 222 #
225 223 # c.PromptManager.in_template = 'In [\#]: '
226 224 # c.PromptManager.in2_template = ' .\D.: '
227 225 # c.PromptManager.out_template = 'Out[\#]: '
228 226 #
229 227 in1_regex = r'In \[[0-9]+\]: '
230 228 in2_regex = r' \.\.+\.: '
231 229 out_regex = r'Out\[[0-9]+\]: '
232 230
233 231 #: The regex to determine when a traceback starts.
234 232 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
235 233
236 234 def __init__(self, **options):
237 235 """Initialize the IPython console lexer.
238 236
239 237 Parameters
240 238 ----------
241 239 python3 : bool
242 240 If `True`, then the console inputs are parsed using a Python 3
243 241 lexer. Otherwise, they are parsed using a Python 2 lexer.
244 242 in1_regex : RegexObject
245 243 The compiled regular expression used to detect the start
246 244 of inputs. Although the IPython configuration setting may have a
247 245 trailing whitespace, do not include it in the regex. If `None`,
248 246 then the default input prompt is assumed.
249 247 in2_regex : RegexObject
250 248 The compiled regular expression used to detect the continuation
251 249 of inputs. Although the IPython configuration setting may have a
252 250 trailing whitespace, do not include it in the regex. If `None`,
253 251 then the default input prompt is assumed.
254 252 out_regex : RegexObject
255 253 The compiled regular expression used to detect outputs. If `None`,
256 254 then the default output prompt is assumed.
257 255
258 256 """
259 257 self.python3 = get_bool_opt(options, 'python3', False)
260 258
261 259 in1_regex = options.get('in1_regex', self.in1_regex)
262 260 in2_regex = options.get('in2_regex', self.in2_regex)
263 261 out_regex = options.get('out_regex', self.out_regex)
264 262
265 263 # So that we can work with input and output prompts which have been
266 264 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
267 265 # we do not do this, then such prompts will be tagged as 'output'.
268 266 # The reason can't just use the rstrip'd variants instead is because
269 267 # we want any whitespace associated with the prompt to be inserted
270 268 # with the token. This allows formatted code to be modified so as hide
271 269 # the appearance of prompts, with the whitespace included. One example
272 270 # use of this is in copybutton.js from the standard lib Python docs.
273 271 in1_regex_rstrip = in1_regex.rstrip() + '\n'
274 272 in2_regex_rstrip = in2_regex.rstrip() + '\n'
275 273 out_regex_rstrip = out_regex.rstrip() + '\n'
276 274
277 275 # Compile and save them all.
278 276 attrs = ['in1_regex', 'in2_regex', 'out_regex',
279 277 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
280 278 for attr in attrs:
281 279 self.__setattr__(attr, re.compile(locals()[attr]))
282 280
283 281 Lexer.__init__(self, **options)
284 282
285 283 if self.python3:
286 284 pylexer = IPython3Lexer
287 285 tblexer = IPythonTracebackLexer
288 286 else:
289 287 pylexer = IPythonLexer
290 288 tblexer = IPythonTracebackLexer
291 289
292 290 self.pylexer = pylexer(**options)
293 291 self.tblexer = tblexer(**options)
294 292
295 293 self.reset()
296 294
297 295 def reset(self):
298 296 self.mode = 'output'
299 297 self.index = 0
300 298 self.buffer = u''
301 299 self.insertions = []
302 300
303 301 def buffered_tokens(self):
304 302 """
305 303 Generator of unprocessed tokens after doing insertions and before
306 304 changing to a new state.
307 305
308 306 """
309 307 if self.mode == 'output':
310 308 tokens = [(0, Generic.Output, self.buffer)]
311 309 elif self.mode == 'input':
312 310 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
313 311 else: # traceback
314 312 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
315 313
316 314 for i, t, v in do_insertions(self.insertions, tokens):
317 315 # All token indexes are relative to the buffer.
318 316 yield self.index + i, t, v
319 317
320 318 # Clear it all
321 319 self.index += len(self.buffer)
322 320 self.buffer = u''
323 321 self.insertions = []
324 322
325 def get_modecode(self, line):
323 def get_mci(self, line):
326 324 """
327 Returns the next mode and code to be added to the next mode's buffer.
325 Parses the line and returns a 3-tuple: (mode, code, insertion).
328 326
329 The next mode depends on current mode and contents of line.
327 `mode` is the next mode (or state) of the lexer, and is always equal
328 to 'input', 'output', or 'tb'.
329
330 `code` is a portion of the line that should be added to the buffer
331 corresponding to the next mode and eventually lexed by another lexer.
332 For example, `code` could be Python code if `mode` were 'input'.
333
334 `insertion` is a 3-tuple (index, token, text) representing an
335 unprocessed "token" that will be inserted into the stream of tokens
336 that are created from the buffer once we change modes. This is usually
337 the input or output prompt.
338
339 In general, the next mode depends on current mode and on the contents
340 of `line`.
330 341
331 342 """
332 343 # To reduce the number of regex match checks, we have multiple
333 344 # 'if' blocks instead of 'if-elif' blocks.
334 345
335 346 ### Check for possible end of input
336 347 ###
337 348 in2_match = self.in2_regex.match(line)
338 349 in2_match_rstrip = self.in2_regex_rstrip.match(line)
339 350 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
340 351 in2_match_rstrip:
341 352 end_input = True
342 353 else:
343 354 end_input = False
344 355 if end_input and self.mode != 'tb':
345 356 # Only look for an end of input when not in tb mode.
346 357 # An ellipsis could appear within the traceback.
347 358 mode = 'output'
348 359 code = u''
349 360 insertion = (0, Generic.Prompt, line)
350 361 return mode, code, insertion
351 362
352 363 ### Check for output prompt
353 364 ###
354 365 out_match = self.out_regex.match(line)
355 366 out_match_rstrip = self.out_regex_rstrip.match(line)
356 367 if out_match or out_match_rstrip:
357 368 mode = 'output'
358 369 if out_match:
359 370 idx = out_match.end()
360 371 else:
361 372 idx = out_match_rstrip.end()
362 373 code = line[idx:]
363 374 # Use the 'heading' token for output. We cannot use Generic.Error
364 375 # since it would conflict with exceptions.
365 376 insertion = (0, Generic.Heading, line[:idx])
366 377 return mode, code, insertion
367 378
368 379
369 380 ### Check for input or continuation prompt (non stripped version)
370 381 ###
371 382 in1_match = self.in1_regex.match(line)
372 383 if in1_match or (in2_match and self.mode != 'tb'):
373 384 # New input or when not in tb, continued input.
374 385 # We do not check for continued input when in tb since it is
375 386 # allowable to replace a long stack with an ellipsis.
376 387 mode = 'input'
377 388 if in1_match:
378 389 idx = in1_match.end()
379 390 else: # in2_match
380 391 idx = in2_match.end()
381 392 code = line[idx:]
382 393 insertion = (0, Generic.Prompt, line[:idx])
383 394 return mode, code, insertion
384 395
385 396 ### Check for input or continuation prompt (stripped version)
386 397 ###
387 398 in1_match_rstrip = self.in1_regex_rstrip.match(line)
388 399 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
389 400 # New input or when not in tb, continued input.
390 401 # We do not check for continued input when in tb since it is
391 402 # allowable to replace a long stack with an ellipsis.
392 403 mode = 'input'
393 404 if in1_match_rstrip:
394 405 idx = in1_match_rstrip.end()
395 406 else: # in2_match
396 407 idx = in2_match_rstrip.end()
397 408 code = line[idx:]
398 409 insertion = (0, Generic.Prompt, line[:idx])
399 410 return mode, code, insertion
400 411
401 412 ### Check for traceback
402 413 ###
403 414 if self.ipytb_start.match(line):
404 415 mode = 'tb'
405 416 code = line
406 417 insertion = None
407 418 return mode, code, insertion
408 419
409 420 ### All other stuff...
410 421 ###
411 422 if self.mode in ('input', 'output'):
412 423 # We assume all other text is output. Multiline input that
413 424 # does not use the continuation marker cannot be detected.
414 425 # For example, the 3 in the following is clearly output:
415 426 #
416 427 # In [1]: print 3
417 428 # 3
418 429 #
419 430 # But the following second line is part of the input:
420 431 #
421 432 # In [2]: while True:
422 433 # print True
423 434 #
424 435 # In both cases, the 2nd line will be 'output'.
425 436 #
426 437 mode = 'output'
427 438 else:
428 439 mode = 'tb'
429 440
430 441 code = line
431 442 insertion = None
432 443
433 444 return mode, code, insertion
434 445
435 446 def get_tokens_unprocessed(self, text):
436 447 self.reset()
437 448 for match in line_re.finditer(text):
438 449 line = match.group()
439 mode, code, insertion = self.get_modecode(line)
450 mode, code, insertion = self.get_mci(line)
440 451
441 452 if mode != self.mode:
442 453 # Yield buffered tokens before transitioning to new mode.
443 454 for token in self.buffered_tokens():
444 455 yield token
445 456 self.mode = mode
446 457
447 458 if insertion:
448 459 self.insertions.append((len(self.buffer), [insertion]))
449 460 self.buffer += code
450 461 else:
451 462 for token in self.buffered_tokens():
452 463 yield token
453 464
454 465 class IPyLexer(Lexer):
455 466 """
456 467 Primary lexer for all IPython-like code.
457 468
458 469 This is a simple helper lexer. If the first line of the text begins with
459 470 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
460 471 lexer. If not, then the entire text is parsed with an IPython lexer.
461 472
462 473 The goal is to reduce the number of lexers that are registered
463 474 with Pygments.
464 475
465 476 """
466 477 name = 'IPy session'
467 478 aliases = ['ipy']
468 479
469 480 def __init__(self, **options):
470 481 self.python3 = get_bool_opt(options, 'python3', False)
471 482 Lexer.__init__(self, **options)
472 483
473 484 self.IPythonLexer = IPythonLexer(**options)
474 485 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
475 486
476 487 def get_tokens_unprocessed(self, text):
477 488 if re.match(r'(In \[[0-9]+\]:)', text.strip()):
478 489 lex = self.IPythonConsoleLexer
479 490 else:
480 491 lex = self.IPythonLexer
481 492 for token in lex.get_tokens_unprocessed(text):
482 493 yield token
483 494
@@ -1,58 +1,62 b''
1 1 New IPython Console Lexer
2 2 -------------------------
3 3
4 4 The IPython console lexer has been rewritten and now supports tracebacks
5 5 and customized input/output prompts. An entire suite of lexers is now
6 6 available at :module:`IPython.nbconvert.utils.lexers`. These include:
7 7
8 8 IPythonLexer
9 9 IPython3Lexer
10 10 Lexers for pure IPython (python + magic/shell commands)
11 11
12 12 IPythonPartialTracebackLexer
13 13 IPythonTracebackLexer
14 14 Supports 2.x and 3.x via the keyword `python3`. The partial traceback
15 15 lexer reads everything but the Python code appearing in a traceback.
16 16 The full lexer combines the partial lexer with an IPython lexer.
17 17
18 18 IPythonConsoleLexer
19 19 A lexer for IPython console sessions, with support for tracebacks.
20 20 Supports 2.x and 3.x via the keyword `python3`.
21 21
22 22 IPyLexer
23 23 A friendly lexer which examines the first line of text and from it,
24 24 decides whether to use an IPython lexer or an IPython console lexer.
25 25 Supports 2.x and 3.x via the keyword `python3`.
26 26
27 27 Previously, the :class:`IPythonConsoleLexer` class was available at
28 28 :module:`IPython.sphinxext.ipython_console_hightlight`. It was inserted
29 29 into Pygments' list of available lexers under the name `ipython`. It should
30 be mentioned that this name is inaccurate. An IPython console session
30 be mentioned that this name is inaccurate, since an IPython console session
31 31 is not the same as IPython code (which itself is a superset of the Python
32 32 language).
33 33
34 Now, the Sphinx extension inserts two console lexers into Pygment's list of
34 Now, the Sphinx extension inserts two console lexers into Pygments' list of
35 35 available lexers. Both are IPyLexer instances under the names: `ipython` and
36 `ipython3`. As mentioned above, these names are misleading, but they are kept
37 for backwards compatibility and typical usage. If a project needs to make
38 Pygments aware of more than just the IPyLexer class, then one should not
39 make the IPyLexer class available under the name `ipython` and use `ipy` or
40 some other non-conflicting value.
36 `ipython3`. Although the names can be confusing (as mentioned above), their
37 continued use is, in part, to maintain backwards compatibility and to
38 aid typical usage. If a project needs to make Pygments aware of more than just
39 the IPyLexer class, then one should not make the IPyLexer class available under
40 the name `ipython` and use `ipy` or some other non-conflicting value.
41 41
42 Code blocks such as::
42 Code blocks such as:
43
44 .. code-block:: rst
43 45
44 46 .. code-block:: ipython
45 47
46 48 In [1]: 2**2
47 49 Out[1]: 4
48 50
49 51 will continue to work as before, but now, they will also properly highlight
50 tracebacks. For pure IPython code, the same lexer will work::
52 tracebacks. For pure IPython code, the same lexer will also work:
53
54 .. code-block:: rst
51 55
52 56 .. code-block:: ipython
53 57
54 58 x = ''.join(map(str, range(10)))
55 59 !echo $x
56 60
57 61 Since the first line of the block did not begin with a standard IPython console
58 prompt, the entire block is assumed to be IPython code instead.
62 prompt, the entire block is assumed to consist of IPython code instead.
General Comments 0
You need to be logged in to leave comments. Login now