##// END OF EJS Templates
Revert "Cleanup Python 2 compact from Lexers"
Matthias Bussonnier -
r28494:56f48b87 revert-14190-1402...
parent child Browse files
Show More
@@ -1,549 +1,540
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 Defines a variety of Pygments lexers for highlighting IPython code.
4 4
5 5 This includes:
6 6
7 IPython3Lexer
8 Lexer for pure IPython (python + magic/shell commands)
7 IPythonLexer, IPython3Lexer
8 Lexers for pure IPython (python + magic/shell commands)
9 9
10 10 IPythonPartialTracebackLexer, IPythonTracebackLexer
11 The partial traceback lexer reads everything but the Python code
12 appearing in a traceback.
13 The full lexer combines the partial lexer with the IPython3Lexer.
11 Supports 2.x and 3.x via keyword `python3`. The partial traceback
12 lexer reads everything but the Python code appearing in a traceback.
13 The full lexer combines the partial lexer with an IPython lexer.
14 14
15 15 IPythonConsoleLexer
16 16 A lexer for IPython console sessions, with support for tracebacks.
17 17
18 18 IPyLexer
19 19 A friendly lexer which examines the first line of text and from it,
20 20 decides whether to use an IPython lexer or an IPython console lexer.
21 21 This is probably the only lexer that needs to be explicitly added
22 22 to Pygments.
23 23
24 24 """
25 25 #-----------------------------------------------------------------------------
26 26 # Copyright (c) 2013, the IPython Development Team.
27 27 #
28 28 # Distributed under the terms of the Modified BSD License.
29 29 #
30 30 # The full license is in the file COPYING.txt, distributed with this software.
31 31 #-----------------------------------------------------------------------------
32 32
33 33 # Standard library
34 34 import re
35 35
36 36 # Third party
37 37 from pygments.lexers import (
38 BashLexer,
39 HtmlLexer,
40 JavascriptLexer,
41 RubyLexer,
42 PerlLexer,
43 Python3Lexer,
44 TexLexer,
45 )
38 BashLexer, HtmlLexer, JavascriptLexer, RubyLexer, PerlLexer, PythonLexer,
39 Python3Lexer, TexLexer)
46 40 from pygments.lexer import (
47 Lexer,
48 DelegatingLexer,
49 RegexLexer,
50 do_insertions,
51 bygroups,
52 using,
53 inherit,
41 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
54 42 )
55 43 from pygments.token import (
56 44 Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
57 45 )
58 46 from pygments.util import get_bool_opt
59 47
60 48 # Local
61 49
62 50 line_re = re.compile('.*?\n')
63 51
64 __all__ = [
65 "IPython3Lexer",
66 "IPythonPartialTracebackLexer",
67 "IPythonTracebackLexer",
68 "IPythonConsoleLexer",
69 "IPyLexer",
70 ]
52 __all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',
53 'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',
54 'IPythonConsoleLexer', 'IPyLexer']
71 55
72 56
73 class IPython3Lexer(Python3Lexer):
74 """IPython3 Lexer"""
57 def build_ipy_lexer(python3):
58 """Builds IPython lexers depending on the value of `python3`.
75 59
76 name = "IPython3"
77 aliases = ["ipython3"]
60 The lexer inherits from an appropriate Python lexer and then adds
61 information about IPython specific keywords (i.e. magic commands,
62 shell commands, etc.)
78 63
79 tokens = {
80 "root": [
81 (
82 r"(?s)(\s*)(%%capture)([^\n]*\n)(.*)",
83 bygroups(Text, Operator, Text, using(Python3Lexer)),
84 ),
85 (
86 r"(?s)(\s*)(%%debug)([^\n]*\n)(.*)",
87 bygroups(Text, Operator, Text, using(Python3Lexer)),
88 ),
89 (
90 r"(?is)(\s*)(%%html)([^\n]*\n)(.*)",
91 bygroups(Text, Operator, Text, using(HtmlLexer)),
92 ),
93 (
94 r"(?s)(\s*)(%%javascript)([^\n]*\n)(.*)",
95 bygroups(Text, Operator, Text, using(JavascriptLexer)),
96 ),
97 (
98 r"(?s)(\s*)(%%js)([^\n]*\n)(.*)",
99 bygroups(Text, Operator, Text, using(JavascriptLexer)),
100 ),
101 (
102 r"(?s)(\s*)(%%latex)([^\n]*\n)(.*)",
103 bygroups(Text, Operator, Text, using(TexLexer)),
104 ),
105 (
106 r"(?s)(\s*)(%%perl)([^\n]*\n)(.*)",
107 bygroups(Text, Operator, Text, using(PerlLexer)),
108 ),
109 (
110 r"(?s)(\s*)(%%prun)([^\n]*\n)(.*)",
111 bygroups(Text, Operator, Text, using(Python3Lexer)),
112 ),
113 (
114 r"(?s)(\s*)(%%pypy)([^\n]*\n)(.*)",
115 bygroups(Text, Operator, Text, using(Python3Lexer)),
116 ),
117 (
118 r"(?s)(\s*)(%%python)([^\n]*\n)(.*)",
119 bygroups(Text, Operator, Text, using(Python3Lexer)),
120 ),
121 (
122 r"(?s)(\s*)(%%python3)([^\n]*\n)(.*)",
123 bygroups(Text, Operator, Text, using(Python3Lexer)),
124 ),
125 (
126 r"(?s)(\s*)(%%ruby)([^\n]*\n)(.*)",
127 bygroups(Text, Operator, Text, using(RubyLexer)),
128 ),
129 (
130 r"(?s)(\s*)(%%time)([^\n]*\n)(.*)",
131 bygroups(Text, Operator, Text, using(Python3Lexer)),
132 ),
133 (
134 r"(?s)(\s*)(%%timeit)([^\n]*\n)(.*)",
135 bygroups(Text, Operator, Text, using(Python3Lexer)),
136 ),
137 (
138 r"(?s)(\s*)(%%writefile)([^\n]*\n)(.*)",
139 bygroups(Text, Operator, Text, using(Python3Lexer)),
140 ),
141 (
142 r"(?s)(\s*)(%%file)([^\n]*\n)(.*)",
143 bygroups(Text, Operator, Text, using(Python3Lexer)),
144 ),
64 Parameters
65 ----------
66 python3 : bool
67 If `True`, then build an IPython lexer from a Python 3 lexer.
68
69 """
70 # It would be nice to have a single IPython lexer class which takes
71 # a boolean `python3`. But since there are two Python lexer classes,
72 # we will also have two IPython lexer classes.
73 if python3:
74 PyLexer = Python3Lexer
75 name = 'IPython3'
76 aliases = ['ipython3']
77 doc = """IPython3 Lexer"""
78 else:
79 PyLexer = PythonLexer
80 name = 'IPython'
81 aliases = ['ipython2', 'ipython']
82 doc = """IPython Lexer"""
83
84 ipython_tokens = [
85 (r'(?s)(\s*)(%%capture)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
86 (r'(?s)(\s*)(%%debug)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
87 (r'(?is)(\s*)(%%html)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(HtmlLexer))),
88 (r'(?s)(\s*)(%%javascript)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),
89 (r'(?s)(\s*)(%%js)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),
90 (r'(?s)(\s*)(%%latex)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(TexLexer))),
91 (r'(?s)(\s*)(%%perl)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PerlLexer))),
92 (r'(?s)(\s*)(%%prun)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
93 (r'(?s)(\s*)(%%pypy)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
94 (r'(?s)(\s*)(%%python)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
95 (r'(?s)(\s*)(%%python2)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PythonLexer))),
96 (r'(?s)(\s*)(%%python3)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(Python3Lexer))),
97 (r'(?s)(\s*)(%%ruby)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(RubyLexer))),
98 (r'(?s)(\s*)(%%time)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
99 (r'(?s)(\s*)(%%timeit)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
100 (r'(?s)(\s*)(%%writefile)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
101 (r'(?s)(\s*)(%%file)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
145 102 (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
146 (
147 r"(?s)(^\s*)(%%!)([^\n]*\n)(.*)",
148 bygroups(Text, Operator, Text, using(BashLexer)),
149 ),
103 (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
150 104 (r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),
151 105 (r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),
152 (
153 r"(%)(sx|sc|system)(.*)(\n)",
154 bygroups(Operator, Keyword, using(BashLexer), Text),
155 ),
156 (r"(%)(\w+)(.*\n)", bygroups(Operator, Keyword, Text)),
157 (r"^(!!)(.+)(\n)", bygroups(Operator, using(BashLexer), Text)),
158 (r"(!)(?!=)(.+)(\n)", bygroups(Operator, using(BashLexer), Text)),
159 (r"^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)", bygroups(Text, Operator, Text)),
160 (r"(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$", bygroups(Text, Operator, Text)),
161 inherit,
106 (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
107 using(BashLexer), Text)),
108 (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
109 (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
110 (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
111 (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),
112 (r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),
162 113 ]
163 }
114
115 tokens = PyLexer.tokens.copy()
116 tokens['root'] = ipython_tokens + tokens['root']
117
118 attrs = {'name': name, 'aliases': aliases, 'filenames': [],
119 '__doc__': doc, 'tokens': tokens}
120
121 return type(name, (PyLexer,), attrs)
122
123
124 IPython3Lexer = build_ipy_lexer(python3=True)
125 IPythonLexer = build_ipy_lexer(python3=False)
164 126
165 127
166 128 class IPythonPartialTracebackLexer(RegexLexer):
167 129 """
168 130 Partial lexer for IPython tracebacks.
169 131
170 132 Handles all the non-python output.
171 133
172 134 """
173 135 name = 'IPython Partial Traceback'
174 136
175 137 tokens = {
176 138 'root': [
177 139 # Tracebacks for syntax errors have a different style.
178 140 # For both types of tracebacks, we mark the first line with
179 141 # Generic.Traceback. For syntax errors, we mark the filename
180 142 # as we mark the filenames for non-syntax tracebacks.
181 143 #
182 144 # These two regexps define how IPythonConsoleLexer finds a
183 145 # traceback.
184 146 #
185 147 ## Non-syntax traceback
186 148 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
187 149 ## Syntax traceback
188 150 (r'^( File)(.*)(, line )(\d+\n)',
189 151 bygroups(Generic.Traceback, Name.Namespace,
190 152 Generic.Traceback, Literal.Number.Integer)),
191 153
192 154 # (Exception Identifier)(Whitespace)(Traceback Message)
193 155 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
194 156 bygroups(Name.Exception, Generic.Whitespace, Text)),
195 157 # (Module/Filename)(Text)(Callee)(Function Signature)
196 158 # Better options for callee and function signature?
197 159 (r'(.*)( in )(.*)(\(.*\)\n)',
198 160 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
199 161 # Regular line: (Whitespace)(Line Number)(Python Code)
200 162 (r'(\s*?)(\d+)(.*?\n)',
201 163 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
202 164 # Emphasized line: (Arrow)(Line Number)(Python Code)
203 165 # Using Exception token so arrow color matches the Exception.
204 166 (r'(-*>?\s?)(\d+)(.*?\n)',
205 167 bygroups(Name.Exception, Literal.Number.Integer, Other)),
206 168 # (Exception Identifier)(Message)
207 169 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
208 170 bygroups(Name.Exception, Text)),
209 171 # Tag everything else as Other, will be handled later.
210 172 (r'.*\n', Other),
211 173 ],
212 174 }
213 175
214 176
215 177 class IPythonTracebackLexer(DelegatingLexer):
216 178 """
217 179 IPython traceback lexer.
218 180
219 181 For doctests, the tracebacks can be snipped as much as desired with the
220 182 exception to the lines that designate a traceback. For non-syntax error
221 183 tracebacks, this is the line of hyphens. For syntax error tracebacks,
222 184 this is the line which lists the File and line number.
223 185
224 186 """
225
226 # The lexer inherits from DelegatingLexer. The "root" lexer is the
227 # IPython3 lexer. First, we parse with the partial IPython traceback lexer.
187 # The lexer inherits from DelegatingLexer. The "root" lexer is an
188 # appropriate IPython lexer, which depends on the value of the boolean
189 # `python3`. First, we parse with the partial IPython traceback lexer.
228 190 # Then, any code marked with the "Other" token is delegated to the root
229 191 # lexer.
230 192 #
231 193 name = 'IPython Traceback'
232 194 aliases = ['ipythontb']
233 195
234 196 def __init__(self, **options):
235 197 """
236 198 A subclass of `DelegatingLexer` which delegates to the appropriate to either IPyLexer,
237 199 IPythonPartialTracebackLexer.
238 200 """
239 201 # note we need a __init__ doc, as otherwise it inherits the doc from the super class
240 202 # which will fail the documentation build as it references section of the pygments docs that
241 203 # do not exists when building IPython's docs.
204 self.python3 = get_bool_opt(options, 'python3', False)
205 if self.python3:
206 self.aliases = ['ipython3tb']
207 else:
208 self.aliases = ['ipython2tb', 'ipythontb']
242 209
243 super().__init__(IPython3Lexer, IPythonPartialTracebackLexer, **options)
210 if self.python3:
211 IPyLexer = IPython3Lexer
212 else:
213 IPyLexer = IPythonLexer
244 214
215 DelegatingLexer.__init__(self, IPyLexer,
216 IPythonPartialTracebackLexer, **options)
245 217
246 218 class IPythonConsoleLexer(Lexer):
247 219 """
248 220 An IPython console lexer for IPython code-blocks and doctests, such as:
249 221
250 222 .. code-block:: rst
251 223
252 224 .. code-block:: ipythonconsole
253 225
254 226 In [1]: a = 'foo'
255 227
256 228 In [2]: a
257 229 Out[2]: 'foo'
258 230
259 231 In [3]: print(a)
260 232 foo
261 233
262 234
263 235 Support is also provided for IPython exceptions:
264 236
265 237 .. code-block:: rst
266 238
267 239 .. code-block:: ipythonconsole
268 240
269 241 In [1]: raise Exception
270 242 Traceback (most recent call last):
271 243 ...
272 244 Exception
273 245
274 246 """
275 247 name = 'IPython console session'
276 248 aliases = ['ipythonconsole']
277 249 mimetypes = ['text/x-ipython-console']
278 250
279 251 # The regexps used to determine what is input and what is output.
280 252 # The default prompts for IPython are:
281 253 #
282 254 # in = 'In [#]: '
283 255 # continuation = ' .D.: '
284 256 # template = 'Out[#]: '
285 257 #
286 258 # Where '#' is the 'prompt number' or 'execution count' and 'D'
287 259 # D is a number of dots matching the width of the execution count
288 260 #
289 261 in1_regex = r'In \[[0-9]+\]: '
290 262 in2_regex = r' \.\.+\.: '
291 263 out_regex = r'Out\[[0-9]+\]: '
292 264
293 265 #: The regex to determine when a traceback starts.
294 266 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
295 267
296 268 def __init__(self, **options):
297 269 """Initialize the IPython console lexer.
298 270
299 271 Parameters
300 272 ----------
273 python3 : bool
274 If `True`, then the console inputs are parsed using a Python 3
275 lexer. Otherwise, they are parsed using a Python 2 lexer.
301 276 in1_regex : RegexObject
302 277 The compiled regular expression used to detect the start
303 278 of inputs. Although the IPython configuration setting may have a
304 279 trailing whitespace, do not include it in the regex. If `None`,
305 280 then the default input prompt is assumed.
306 281 in2_regex : RegexObject
307 282 The compiled regular expression used to detect the continuation
308 283 of inputs. Although the IPython configuration setting may have a
309 284 trailing whitespace, do not include it in the regex. If `None`,
310 285 then the default input prompt is assumed.
311 286 out_regex : RegexObject
312 287 The compiled regular expression used to detect outputs. If `None`,
313 288 then the default output prompt is assumed.
314 289
315 290 """
316 self.aliases = ["ipython3console"]
291 self.python3 = get_bool_opt(options, 'python3', False)
292 if self.python3:
293 self.aliases = ['ipython3console']
294 else:
295 self.aliases = ['ipython2console', 'ipythonconsole']
317 296
318 297 in1_regex = options.get('in1_regex', self.in1_regex)
319 298 in2_regex = options.get('in2_regex', self.in2_regex)
320 299 out_regex = options.get('out_regex', self.out_regex)
321 300
322 301 # So that we can work with input and output prompts which have been
323 302 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
324 303 # we do not do this, then such prompts will be tagged as 'output'.
325 304 # The reason can't just use the rstrip'd variants instead is because
326 305 # we want any whitespace associated with the prompt to be inserted
327 306 # with the token. This allows formatted code to be modified so as hide
328 307 # the appearance of prompts, with the whitespace included. One example
329 308 # use of this is in copybutton.js from the standard lib Python docs.
330 309 in1_regex_rstrip = in1_regex.rstrip() + '\n'
331 310 in2_regex_rstrip = in2_regex.rstrip() + '\n'
332 311 out_regex_rstrip = out_regex.rstrip() + '\n'
333 312
334 313 # Compile and save them all.
335 314 attrs = ['in1_regex', 'in2_regex', 'out_regex',
336 315 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
337 316 for attr in attrs:
338 317 self.__setattr__(attr, re.compile(locals()[attr]))
339 318
340 319 Lexer.__init__(self, **options)
341 320
342 self.pylexer = IPython3Lexer(**options)
343 self.tblexer = IPythonTracebackLexer(**options)
321 if self.python3:
322 pylexer = IPython3Lexer
323 tblexer = IPythonTracebackLexer
324 else:
325 pylexer = IPythonLexer
326 tblexer = IPythonTracebackLexer
327
328 self.pylexer = pylexer(**options)
329 self.tblexer = tblexer(**options)
344 330
345 331 self.reset()
346 332
347 333 def reset(self):
348 334 self.mode = 'output'
349 335 self.index = 0
350 336 self.buffer = u''
351 337 self.insertions = []
352 338
353 339 def buffered_tokens(self):
354 340 """
355 341 Generator of unprocessed tokens after doing insertions and before
356 342 changing to a new state.
357 343
358 344 """
359 345 if self.mode == 'output':
360 346 tokens = [(0, Generic.Output, self.buffer)]
361 347 elif self.mode == 'input':
362 348 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
363 349 else: # traceback
364 350 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
365 351
366 352 for i, t, v in do_insertions(self.insertions, tokens):
367 353 # All token indexes are relative to the buffer.
368 354 yield self.index + i, t, v
369 355
370 356 # Clear it all
371 357 self.index += len(self.buffer)
372 358 self.buffer = u''
373 359 self.insertions = []
374 360
375 361 def get_mci(self, line):
376 362 """
377 363 Parses the line and returns a 3-tuple: (mode, code, insertion).
378 364
379 365 `mode` is the next mode (or state) of the lexer, and is always equal
380 366 to 'input', 'output', or 'tb'.
381 367
382 368 `code` is a portion of the line that should be added to the buffer
383 369 corresponding to the next mode and eventually lexed by another lexer.
384 370 For example, `code` could be Python code if `mode` were 'input'.
385 371
386 372 `insertion` is a 3-tuple (index, token, text) representing an
387 373 unprocessed "token" that will be inserted into the stream of tokens
388 374 that are created from the buffer once we change modes. This is usually
389 375 the input or output prompt.
390 376
391 377 In general, the next mode depends on current mode and on the contents
392 378 of `line`.
393 379
394 380 """
395 381 # To reduce the number of regex match checks, we have multiple
396 382 # 'if' blocks instead of 'if-elif' blocks.
397 383
398 384 # Check for possible end of input
399 385 in2_match = self.in2_regex.match(line)
400 386 in2_match_rstrip = self.in2_regex_rstrip.match(line)
401 387 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
402 388 in2_match_rstrip:
403 389 end_input = True
404 390 else:
405 391 end_input = False
406 392 if end_input and self.mode != 'tb':
407 393 # Only look for an end of input when not in tb mode.
408 394 # An ellipsis could appear within the traceback.
409 395 mode = 'output'
410 396 code = u''
411 397 insertion = (0, Generic.Prompt, line)
412 398 return mode, code, insertion
413 399
414 400 # Check for output prompt
415 401 out_match = self.out_regex.match(line)
416 402 out_match_rstrip = self.out_regex_rstrip.match(line)
417 403 if out_match or out_match_rstrip:
418 404 mode = 'output'
419 405 if out_match:
420 406 idx = out_match.end()
421 407 else:
422 408 idx = out_match_rstrip.end()
423 409 code = line[idx:]
424 410 # Use the 'heading' token for output. We cannot use Generic.Error
425 411 # since it would conflict with exceptions.
426 412 insertion = (0, Generic.Heading, line[:idx])
427 413 return mode, code, insertion
428 414
429 415
430 416 # Check for input or continuation prompt (non stripped version)
431 417 in1_match = self.in1_regex.match(line)
432 418 if in1_match or (in2_match and self.mode != 'tb'):
433 419 # New input or when not in tb, continued input.
434 420 # We do not check for continued input when in tb since it is
435 421 # allowable to replace a long stack with an ellipsis.
436 422 mode = 'input'
437 423 if in1_match:
438 424 idx = in1_match.end()
439 425 else: # in2_match
440 426 idx = in2_match.end()
441 427 code = line[idx:]
442 428 insertion = (0, Generic.Prompt, line[:idx])
443 429 return mode, code, insertion
444 430
445 431 # Check for input or continuation prompt (stripped version)
446 432 in1_match_rstrip = self.in1_regex_rstrip.match(line)
447 433 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
448 434 # New input or when not in tb, continued input.
449 435 # We do not check for continued input when in tb since it is
450 436 # allowable to replace a long stack with an ellipsis.
451 437 mode = 'input'
452 438 if in1_match_rstrip:
453 439 idx = in1_match_rstrip.end()
454 440 else: # in2_match
455 441 idx = in2_match_rstrip.end()
456 442 code = line[idx:]
457 443 insertion = (0, Generic.Prompt, line[:idx])
458 444 return mode, code, insertion
459 445
460 446 # Check for traceback
461 447 if self.ipytb_start.match(line):
462 448 mode = 'tb'
463 449 code = line
464 450 insertion = None
465 451 return mode, code, insertion
466 452
467 453 # All other stuff...
468 454 if self.mode in ('input', 'output'):
469 455 # We assume all other text is output. Multiline input that
470 456 # does not use the continuation marker cannot be detected.
471 457 # For example, the 3 in the following is clearly output:
472 458 #
473 459 # In [1]: print 3
474 460 # 3
475 461 #
476 462 # But the following second line is part of the input:
477 463 #
478 464 # In [2]: while True:
479 465 # print True
480 466 #
481 467 # In both cases, the 2nd line will be 'output'.
482 468 #
483 469 mode = 'output'
484 470 else:
485 471 mode = 'tb'
486 472
487 473 code = line
488 474 insertion = None
489 475
490 476 return mode, code, insertion
491 477
492 478 def get_tokens_unprocessed(self, text):
493 479 self.reset()
494 480 for match in line_re.finditer(text):
495 481 line = match.group()
496 482 mode, code, insertion = self.get_mci(line)
497 483
498 484 if mode != self.mode:
499 485 # Yield buffered tokens before transitioning to new mode.
500 486 for token in self.buffered_tokens():
501 487 yield token
502 488 self.mode = mode
503 489
504 490 if insertion:
505 491 self.insertions.append((len(self.buffer), [insertion]))
506 492 self.buffer += code
507 493
508 494 for token in self.buffered_tokens():
509 495 yield token
510 496
511 497 class IPyLexer(Lexer):
512 498 r"""
513 499 Primary lexer for all IPython-like code.
514 500
515 501 This is a simple helper lexer. If the first line of the text begins with
516 502 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
517 503 lexer. If not, then the entire text is parsed with an IPython lexer.
518 504
519 505 The goal is to reduce the number of lexers that are registered
520 506 with Pygments.
521 507
522 508 """
523 509 name = 'IPy session'
524 510 aliases = ['ipy']
525 511
526 512 def __init__(self, **options):
527 513 """
528 514 Create a new IPyLexer instance which dispatch to either an
529 IPythonConsoleLexer (if In prompts are present) or and IPython3Lexer (if
515 IPythonCOnsoleLexer (if In prompts are present) or and IPythonLexer (if
530 516 In prompts are not present).
531 517 """
532 518 # init docstring is necessary for docs not to fail to build do to parent
533 519 # docs referenceing a section in pygments docs.
534 self.aliases = ["ipy3"]
520 self.python3 = get_bool_opt(options, 'python3', False)
521 if self.python3:
522 self.aliases = ['ipy3']
523 else:
524 self.aliases = ['ipy2', 'ipy']
535 525
536 526 Lexer.__init__(self, **options)
537 527
538 self.IPythonLexer = IPython3Lexer(**options)
528 self.IPythonLexer = IPythonLexer(**options)
539 529 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
540 530
541 531 def get_tokens_unprocessed(self, text):
542 532 # Search for the input prompt anywhere...this allows code blocks to
543 533 # begin with comments as well.
544 534 if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
545 535 lex = self.IPythonConsoleLexer
546 536 else:
547 537 lex = self.IPythonLexer
548 538 for token in lex.get_tokens_unprocessed(text):
549 539 yield token
540
@@ -1,184 +1,184
1 1 """Test lexers module"""
2 2
3 3 # Copyright (c) IPython Development Team.
4 4 # Distributed under the terms of the Modified BSD License.
5 5
6 6 from unittest import TestCase
7 7 from pygments import __version__ as pygments_version
8 8 from pygments.token import Token
9 9 from pygments.lexers import BashLexer
10 10
11 11 from .. import lexers
12 12
13 13 pyg214 = tuple(int(x) for x in pygments_version.split(".")[:2]) >= (2, 14)
14 14
15 15
16 16 class TestLexers(TestCase):
17 17 """Collection of lexers tests"""
18 18 def setUp(self):
19 self.lexer = lexers.IPython3Lexer()
19 self.lexer = lexers.IPythonLexer()
20 20 self.bash_lexer = BashLexer()
21 21
22 def testIPython3Lexer(self):
22 def testIPythonLexer(self):
23 23 fragment = '!echo $HOME\n'
24 24 bash_tokens = [
25 25 (Token.Operator, '!'),
26 26 ]
27 27 bash_tokens.extend(self.bash_lexer.get_tokens(fragment[1:]))
28 28 ipylex_token = list(self.lexer.get_tokens(fragment))
29 29 assert bash_tokens[:-1] == ipylex_token[:-1]
30 30
31 31 fragment_2 = "!" + fragment
32 32 tokens_2 = [
33 33 (Token.Operator, '!!'),
34 34 ] + bash_tokens[1:]
35 35 assert tokens_2[:-1] == list(self.lexer.get_tokens(fragment_2))[:-1]
36 36
37 37 fragment_2 = '\t %%!\n' + fragment[1:]
38 38 tokens_2 = [
39 39 (Token.Text, '\t '),
40 40 (Token.Operator, '%%!'),
41 41 (Token.Text, '\n'),
42 42 ] + bash_tokens[1:]
43 43 assert tokens_2 == list(self.lexer.get_tokens(fragment_2))
44 44
45 45 fragment_2 = 'x = ' + fragment
46 46 tokens_2 = [
47 47 (Token.Name, 'x'),
48 48 (Token.Text, ' '),
49 49 (Token.Operator, '='),
50 50 (Token.Text, ' '),
51 51 ] + bash_tokens
52 52 assert tokens_2[:-1] == list(self.lexer.get_tokens(fragment_2))[:-1]
53 53
54 54 fragment_2 = 'x, = ' + fragment
55 55 tokens_2 = [
56 56 (Token.Name, 'x'),
57 57 (Token.Punctuation, ','),
58 58 (Token.Text, ' '),
59 59 (Token.Operator, '='),
60 60 (Token.Text, ' '),
61 61 ] + bash_tokens
62 62 assert tokens_2[:-1] == list(self.lexer.get_tokens(fragment_2))[:-1]
63 63
64 64 fragment_2 = 'x, = %sx ' + fragment[1:]
65 65 tokens_2 = [
66 66 (Token.Name, 'x'),
67 67 (Token.Punctuation, ','),
68 68 (Token.Text, ' '),
69 69 (Token.Operator, '='),
70 70 (Token.Text, ' '),
71 71 (Token.Operator, '%'),
72 72 (Token.Keyword, 'sx'),
73 73 (Token.Text, ' '),
74 74 ] + bash_tokens[1:]
75 75 if tokens_2[7] == (Token.Text, " ") and pyg214: # pygments 2.14+
76 76 tokens_2[7] = (Token.Text.Whitespace, " ")
77 77 assert tokens_2[:-1] == list(self.lexer.get_tokens(fragment_2))[:-1]
78 78
79 79 fragment_2 = 'f = %R function () {}\n'
80 80 tokens_2 = [
81 81 (Token.Name, 'f'),
82 82 (Token.Text, ' '),
83 83 (Token.Operator, '='),
84 84 (Token.Text, ' '),
85 85 (Token.Operator, '%'),
86 86 (Token.Keyword, 'R'),
87 87 (Token.Text, ' function () {}\n'),
88 88 ]
89 89 assert tokens_2 == list(self.lexer.get_tokens(fragment_2))
90 90
91 91 fragment_2 = '\t%%xyz\n$foo\n'
92 92 tokens_2 = [
93 93 (Token.Text, '\t'),
94 94 (Token.Operator, '%%'),
95 95 (Token.Keyword, 'xyz'),
96 96 (Token.Text, '\n$foo\n'),
97 97 ]
98 98 assert tokens_2 == list(self.lexer.get_tokens(fragment_2))
99 99
100 100 fragment_2 = '%system?\n'
101 101 tokens_2 = [
102 102 (Token.Operator, '%'),
103 103 (Token.Keyword, 'system'),
104 104 (Token.Operator, '?'),
105 105 (Token.Text, '\n'),
106 106 ]
107 107 assert tokens_2[:-1] == list(self.lexer.get_tokens(fragment_2))[:-1]
108 108
109 109 fragment_2 = 'x != y\n'
110 110 tokens_2 = [
111 111 (Token.Name, 'x'),
112 112 (Token.Text, ' '),
113 113 (Token.Operator, '!='),
114 114 (Token.Text, ' '),
115 115 (Token.Name, 'y'),
116 116 (Token.Text, '\n'),
117 117 ]
118 118 assert tokens_2[:-1] == list(self.lexer.get_tokens(fragment_2))[:-1]
119 119
120 120 fragment_2 = ' ?math.sin\n'
121 121 tokens_2 = [
122 122 (Token.Text, ' '),
123 123 (Token.Operator, '?'),
124 124 (Token.Text, 'math.sin'),
125 125 (Token.Text, '\n'),
126 126 ]
127 127 assert tokens_2[:-1] == list(self.lexer.get_tokens(fragment_2))[:-1]
128 128
129 129 fragment = ' *int*?\n'
130 130 tokens = [
131 131 (Token.Text, ' *int*'),
132 132 (Token.Operator, '?'),
133 133 (Token.Text, '\n'),
134 134 ]
135 135 assert tokens == list(self.lexer.get_tokens(fragment))
136 136
137 137 fragment = '%%writefile -a foo.py\nif a == b:\n pass'
138 138 tokens = [
139 139 (Token.Operator, '%%writefile'),
140 140 (Token.Text, ' -a foo.py\n'),
141 141 (Token.Keyword, 'if'),
142 142 (Token.Text, ' '),
143 143 (Token.Name, 'a'),
144 144 (Token.Text, ' '),
145 145 (Token.Operator, '=='),
146 146 (Token.Text, ' '),
147 147 (Token.Name, 'b'),
148 148 (Token.Punctuation, ':'),
149 149 (Token.Text, '\n'),
150 150 (Token.Text, ' '),
151 151 (Token.Keyword, 'pass'),
152 152 (Token.Text, '\n'),
153 153 ]
154 154 if tokens[10] == (Token.Text, "\n") and pyg214: # pygments 2.14+
155 155 tokens[10] = (Token.Text.Whitespace, "\n")
156 156 assert tokens[:-1] == list(self.lexer.get_tokens(fragment))[:-1]
157 157
158 158 fragment = '%%timeit\nmath.sin(0)'
159 159 tokens = [
160 160 (Token.Operator, '%%timeit\n'),
161 161 (Token.Name, 'math'),
162 162 (Token.Operator, '.'),
163 163 (Token.Name, 'sin'),
164 164 (Token.Punctuation, '('),
165 165 (Token.Literal.Number.Integer, '0'),
166 166 (Token.Punctuation, ')'),
167 167 (Token.Text, '\n'),
168 168 ]
169 169
170 170 fragment = '%%HTML\n<div>foo</div>'
171 171 tokens = [
172 172 (Token.Operator, '%%HTML'),
173 173 (Token.Text, '\n'),
174 174 (Token.Punctuation, '<'),
175 175 (Token.Name.Tag, 'div'),
176 176 (Token.Punctuation, '>'),
177 177 (Token.Text, 'foo'),
178 178 (Token.Punctuation, '<'),
179 179 (Token.Punctuation, '/'),
180 180 (Token.Name.Tag, 'div'),
181 181 (Token.Punctuation, '>'),
182 182 (Token.Text, '\n'),
183 183 ]
184 184 assert tokens == list(self.lexer.get_tokens(fragment))
@@ -1,24 +1,26
1 1 from typing import List
2 2
3 3 import pytest
4 4 import pygments.lexers
5 5 import pygments.lexer
6 6
7 from IPython.lib.lexers import IPythonConsoleLexer, IPython3Lexer
7 from IPython.lib.lexers import IPythonConsoleLexer, IPythonLexer, IPython3Lexer
8 8
9 9 #: the human-readable names of the IPython lexers with ``entry_points``
10 EXPECTED_LEXER_NAMES = [cls.name for cls in [IPythonConsoleLexer, IPython3Lexer]]
10 EXPECTED_LEXER_NAMES = [
11 cls.name for cls in [IPythonConsoleLexer, IPythonLexer, IPython3Lexer]
12 ]
11 13
12 14
13 15 @pytest.fixture
14 16 def all_pygments_lexer_names() -> List[str]:
15 17 """Get all lexer names registered in pygments."""
16 18 return {l[0] for l in pygments.lexers.get_all_lexers()}
17 19
18 20
19 21 @pytest.mark.parametrize("expected_lexer", EXPECTED_LEXER_NAMES)
20 22 def test_pygments_entry_points(
21 23 expected_lexer: str, all_pygments_lexer_names: List[str]
22 24 ) -> None:
23 25 """Check whether the ``entry_points`` for ``pygments.lexers`` are correct."""
24 26 assert expected_lexer in all_pygments_lexer_names
@@ -1,24 +1,28
1 1 """
2 2 reST directive for syntax-highlighting ipython interactive sessions.
3 3
4 4 """
5 5
6 6 from sphinx import highlighting
7 7 from IPython.lib.lexers import IPyLexer
8 8
9 9 def setup(app):
10 10 """Setup as a sphinx extension."""
11 11
12 12 # This is only a lexer, so adding it below to pygments appears sufficient.
13 13 # But if somebody knows what the right API usage should be to do that via
14 14 # sphinx, by all means fix it here. At least having this setup.py
15 15 # suppresses the sphinx warning we'd get without it.
16 16 metadata = {'parallel_read_safe': True, 'parallel_write_safe': True}
17 17 return metadata
18 18
19 19 # Register the extension as a valid pygments lexer.
20 20 # Alternatively, we could register the lexer with pygments instead. This would
21 21 # require using setuptools entrypoints: http://pygments.org/docs/plugins
22 22
23 highlighting.lexers["ipython"] = IPyLexer()
24 highlighting.lexers["ipython3"] = IPyLexer()
23 ipy2 = IPyLexer(python3=False)
24 ipy3 = IPyLexer(python3=True)
25
26 highlighting.lexers['ipython'] = ipy2
27 highlighting.lexers['ipython2'] = ipy2
28 highlighting.lexers['ipython3'] = ipy3
@@ -1,62 +1,64
1 1 .. _console_lexer:
2 2
3 3 New IPython Console Lexer
4 4 -------------------------
5 5
6 6 .. versionadded:: 2.0.0
7 7
8 8 The IPython console lexer has been rewritten and now supports tracebacks
9 9 and customized input/output prompts. An entire suite of lexers is now
10 10 available at :mod:`IPython.lib.lexers`. These include:
11 11
12 IPython3Lexer
13 Lexer for pure IPython (python 3 + magic/shell commands)
12 IPythonLexer & IPython3Lexer
13 Lexers for pure IPython (python + magic/shell commands)
14 14
15 15 IPythonPartialTracebackLexer & IPythonTracebackLexer
16 The partial traceback lexer reads everything but the Python code
17 appearing in a traceback. The full lexer combines the partial lexer
18 with the IPython3Lexer.
16 Supports 2.x and 3.x via the keyword `python3`. The partial traceback
17 lexer reads everything but the Python code appearing in a traceback.
18 The full lexer combines the partial lexer with an IPython lexer.
19 19
20 20 IPythonConsoleLexer
21 A lexer for python 3 IPython console sessions, with support for tracebacks.
21 A lexer for IPython console sessions, with support for tracebacks.
22 Supports 2.x and 3.x via the keyword `python3`.
22 23
23 24 IPyLexer
24 25 A friendly lexer which examines the first line of text and from it,
25 26 decides whether to use an IPython lexer or an IPython console lexer.
27 Supports 2.x and 3.x via the keyword `python3`.
26 28
27 29 Previously, the :class:`IPythonConsoleLexer` class was available at
28 30 :mod:`IPython.sphinxext.ipython_console_hightlight`. It was inserted
29 31 into Pygments' list of available lexers under the name `ipython`. It should
30 32 be mentioned that this name is inaccurate, since an IPython console session
31 33 is not the same as IPython code (which itself is a superset of the Python
32 34 language).
33 35
34 36 Now, the Sphinx extension inserts two console lexers into Pygments' list of
35 37 available lexers. Both are IPyLexer instances under the names: `ipython` and
36 38 `ipython3`. Although the names can be confusing (as mentioned above), their
37 39 continued use is, in part, to maintain backwards compatibility and to
38 40 aid typical usage. If a project needs to make Pygments aware of more than just
39 41 the IPyLexer class, then one should not make the IPyLexer class available under
40 42 the name `ipython` and use `ipy` or some other non-conflicting value.
41 43
42 44 Code blocks such as:
43 45
44 46 .. code-block:: rst
45 47
46 48 .. code-block:: ipython
47 49
48 50 In [1]: 2**2
49 51 Out[1]: 4
50 52
51 53 will continue to work as before, but now, they will also properly highlight
52 54 tracebacks. For pure IPython code, the same lexer will also work:
53 55
54 56 .. code-block:: rst
55 57
56 58 .. code-block:: ipython
57 59
58 60 x = ''.join(map(str, range(10)))
59 61 !echo $x
60 62
61 63 Since the first line of the block did not begin with a standard IPython console
62 64 prompt, the entire block is assumed to consist of IPython code instead.
@@ -1,157 +1,158
1 1 # -*- coding: utf-8 -*-
2 2 """Setup script for IPython.
3 3
4 4 Under Posix environments it works like a typical setup.py script.
5 5 Under Windows, the command sdist is not supported, since IPython
6 6 requires utilities which are not available under Windows."""
7 7
8 8 #-----------------------------------------------------------------------------
9 9 # Copyright (c) 2008-2011, IPython Development Team.
10 10 # Copyright (c) 2001-2007, Fernando Perez <fernando.perez@colorado.edu>
11 11 # Copyright (c) 2001, Janko Hauser <jhauser@zscout.de>
12 12 # Copyright (c) 2001, Nathaniel Gray <n8gray@caltech.edu>
13 13 #
14 14 # Distributed under the terms of the Modified BSD License.
15 15 #
16 16 # The full license is in the file COPYING.rst, distributed with this software.
17 17 #-----------------------------------------------------------------------------
18 18
19 19 import os
20 20 import sys
21 21
22 22 # **Python version check**
23 23 #
24 24 # This check is also made in IPython/__init__, don't forget to update both when
25 25 # changing Python version requirements.
26 26 if sys.version_info < (3, 9):
27 27 pip_message = 'This may be due to an out of date pip. Make sure you have pip >= 9.0.1.'
28 28 try:
29 29 import pip
30 30 pip_version = tuple([int(x) for x in pip.__version__.split('.')[:3]])
31 31 if pip_version < (9, 0, 1) :
32 32 pip_message = 'Your pip version is out of date, please install pip >= 9.0.1. '\
33 33 'pip {} detected.'.format(pip.__version__)
34 34 else:
35 35 # pip is new enough - it must be something else
36 36 pip_message = ''
37 37 except Exception:
38 38 pass
39 39
40 40
41 41 error = """
42 42 IPython 8.13+ supports Python 3.9 and above, following NEP 29.
43 43 IPython 8.0-8.12 supports Python 3.8 and above, following NEP 29.
44 44 When using Python 2.7, please install IPython 5.x LTS Long Term Support version.
45 45 Python 3.3 and 3.4 were supported up to IPython 6.x.
46 46 Python 3.5 was supported with IPython 7.0 to 7.9.
47 47 Python 3.6 was supported with IPython up to 7.16.
48 48 Python 3.7 was still supported with the 7.x branch.
49 49
50 50 See IPython `README.rst` file for more information:
51 51
52 52 https://github.com/ipython/ipython/blob/main/README.rst
53 53
54 54 Python {py} detected.
55 55 {pip}
56 56 """.format(
57 57 py=sys.version_info, pip=pip_message
58 58 )
59 59
60 60 print(error, file=sys.stderr)
61 61 sys.exit(1)
62 62
63 63 # At least we're on the python version we need, move on.
64 64
65 65 from setuptools import setup
66 66
67 67 # Our own imports
68 68 sys.path.insert(0, ".")
69 69
70 70 from setupbase import target_update, find_entry_points
71 71
72 72 from setupbase import (
73 73 setup_args,
74 74 check_package_data_first,
75 75 find_data_files,
76 76 git_prebuild,
77 77 install_symlinked,
78 78 install_lib_symlink,
79 79 install_scripts_for_symlink,
80 80 unsymlink,
81 81 )
82 82
83 83 #-------------------------------------------------------------------------------
84 84 # Handle OS specific things
85 85 #-------------------------------------------------------------------------------
86 86
87 87 if os.name in ('nt','dos'):
88 88 os_name = 'windows'
89 89 else:
90 90 os_name = os.name
91 91
92 92 # Under Windows, 'sdist' has not been supported. Now that the docs build with
93 93 # Sphinx it might work, but let's not turn it on until someone confirms that it
94 94 # actually works.
95 95 if os_name == 'windows' and 'sdist' in sys.argv:
96 96 print('The sdist command is not available under Windows. Exiting.')
97 97 sys.exit(1)
98 98
99 99
100 100 #-------------------------------------------------------------------------------
101 101 # Things related to the IPython documentation
102 102 #-------------------------------------------------------------------------------
103 103
104 104 # update the manuals when building a source dist
105 105 if len(sys.argv) >= 2 and sys.argv[1] in ('sdist','bdist_rpm'):
106 106
107 107 # List of things to be updated. Each entry is a triplet of args for
108 108 # target_update()
109 109 to_update = [
110 110 (
111 111 "docs/man/ipython.1.gz",
112 112 ["docs/man/ipython.1"],
113 113 "cd docs/man && python -m gzip --best ipython.1",
114 114 ),
115 115 ]
116 116
117 117
118 118 [ target_update(*t) for t in to_update ]
119 119
120 120 #---------------------------------------------------------------------------
121 121 # Find all the packages, package data, and data_files
122 122 #---------------------------------------------------------------------------
123 123
124 124 data_files = find_data_files()
125 125
126 126 setup_args['data_files'] = data_files
127 127
128 128 #---------------------------------------------------------------------------
129 129 # custom distutils commands
130 130 #---------------------------------------------------------------------------
131 131 # imports here, so they are after setuptools import if there was one
132 132 from setuptools.command.sdist import sdist
133 133
134 134 setup_args['cmdclass'] = {
135 135 'build_py': \
136 136 check_package_data_first(git_prebuild('IPython')),
137 137 'sdist' : git_prebuild('IPython', sdist),
138 138 'symlink': install_symlinked,
139 139 'install_lib_symlink': install_lib_symlink,
140 140 'install_scripts_sym': install_scripts_for_symlink,
141 141 'unsymlink': unsymlink,
142 142 }
143 143
144 144 setup_args["entry_points"] = {
145 145 "console_scripts": find_entry_points(),
146 146 "pygments.lexers": [
147 147 "ipythonconsole = IPython.lib.lexers:IPythonConsoleLexer",
148 "ipython = IPython.lib.lexers:IPythonLexer",
148 149 "ipython3 = IPython.lib.lexers:IPython3Lexer",
149 150 ],
150 151 }
151 152
152 153 #---------------------------------------------------------------------------
153 154 # Do the actual setup now
154 155 #---------------------------------------------------------------------------
155 156
156 157 if __name__ == "__main__":
157 158 setup(**setup_args)
General Comments 0
You need to be logged in to leave comments. Login now