##// END OF EJS Templates
Improve detection of IPython console sessions.
chebee7i -
Show More
@@ -1,500 +1,502 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 """
2 """
3 Defines a variety of Pygments lexers for highlighting IPython code.
3 Defines a variety of Pygments lexers for highlighting IPython code.
4
4
5 This includes:
5 This includes:
6
6
7 IPythonLexer, IPython3Lexer
7 IPythonLexer, IPython3Lexer
8 Lexers for pure IPython (python + magic/shell commands)
8 Lexers for pure IPython (python + magic/shell commands)
9
9
10 IPythonPartialTracebackLexer, IPythonTracebackLexer
10 IPythonPartialTracebackLexer, IPythonTracebackLexer
11 Supports 2.x and 3.x via keyword `python3`. The partial traceback
11 Supports 2.x and 3.x via keyword `python3`. The partial traceback
12 lexer reads everything but the Python code appearing in a traceback.
12 lexer reads everything but the Python code appearing in a traceback.
13 The full lexer combines the partial lexer with an IPython lexer.
13 The full lexer combines the partial lexer with an IPython lexer.
14
14
15 IPythonConsoleLexer
15 IPythonConsoleLexer
16 A lexer for IPython console sessions, with support for tracebacks.
16 A lexer for IPython console sessions, with support for tracebacks.
17
17
18 IPyLexer
18 IPyLexer
19 A friendly lexer which examines the first line of text and from it,
19 A friendly lexer which examines the first line of text and from it,
20 decides whether to use an IPython lexer or an IPython console lexer.
20 decides whether to use an IPython lexer or an IPython console lexer.
21 This is probably the only lexer that needs to be explicitly added
21 This is probably the only lexer that needs to be explicitly added
22 to Pygments.
22 to Pygments.
23
23
24 """
24 """
25 #-----------------------------------------------------------------------------
25 #-----------------------------------------------------------------------------
26 # Copyright (c) 2013, the IPython Development Team.
26 # Copyright (c) 2013, the IPython Development Team.
27 #
27 #
28 # Distributed under the terms of the Modified BSD License.
28 # Distributed under the terms of the Modified BSD License.
29 #
29 #
30 # The full license is in the file COPYING.txt, distributed with this software.
30 # The full license is in the file COPYING.txt, distributed with this software.
31 #-----------------------------------------------------------------------------
31 #-----------------------------------------------------------------------------
32
32
33 # Standard library
33 # Standard library
34 import re
34 import re
35
35
36 # Third party
36 # Third party
37 from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
37 from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
38 from pygments.lexer import (
38 from pygments.lexer import (
39 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
39 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
40 )
40 )
41 from pygments.token import (
41 from pygments.token import (
42 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
42 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
43 )
43 )
44 from pygments.util import get_bool_opt
44 from pygments.util import get_bool_opt
45
45
46 # Local
46 # Local
47 from IPython.testing.skipdoctest import skip_doctest
47 from IPython.testing.skipdoctest import skip_doctest
48
48
49 line_re = re.compile('.*?\n')
49 line_re = re.compile('.*?\n')
50
50
51 ipython_tokens = [
51 ipython_tokens = [
52 (r'(\%+)(\w+)\s+(\.*)(\n)', bygroups(Operator, Keyword,
52 (r'(\%+)(\w+)\s+(\.*)(\n)', bygroups(Operator, Keyword,
53 using(BashLexer), Text)),
53 using(BashLexer), Text)),
54 (r'(\%+)(\w+)\b', bygroups(Operator, Keyword)),
54 (r'(\%+)(\w+)\b', bygroups(Operator, Keyword)),
55 (r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
55 (r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
56 ]
56 ]
57
57
58 def build_ipy_lexer(python3):
58 def build_ipy_lexer(python3):
59 """Builds IPython lexers depending on the value of `python3`.
59 """Builds IPython lexers depending on the value of `python3`.
60
60
61 The lexer inherits from an appropriate Python lexer and then adds
61 The lexer inherits from an appropriate Python lexer and then adds
62 information about IPython specific keywords (i.e. magic commands,
62 information about IPython specific keywords (i.e. magic commands,
63 shell commands, etc.)
63 shell commands, etc.)
64
64
65 Parameters
65 Parameters
66 ----------
66 ----------
67 python3 : bool
67 python3 : bool
68 If `True`, then build an IPython lexer from a Python 3 lexer.
68 If `True`, then build an IPython lexer from a Python 3 lexer.
69
69
70 """
70 """
71 # It would be nice to have a single IPython lexer class which takes
71 # It would be nice to have a single IPython lexer class which takes
72 # a boolean `python3`. But since there are two Python lexer classes,
72 # a boolean `python3`. But since there are two Python lexer classes,
73 # we will also have two IPython lexer classes.
73 # we will also have two IPython lexer classes.
74 if python3:
74 if python3:
75 PyLexer = Python3Lexer
75 PyLexer = Python3Lexer
76 clsname = 'IPython3Lexer'
76 clsname = 'IPython3Lexer'
77 name = 'IPython3'
77 name = 'IPython3'
78 aliases = ['ipython3']
78 aliases = ['ipython3']
79 doc = """IPython3 Lexer"""
79 doc = """IPython3 Lexer"""
80 else:
80 else:
81 PyLexer = PythonLexer
81 PyLexer = PythonLexer
82 clsname = 'IPythonLexer'
82 clsname = 'IPythonLexer'
83 name = 'IPython'
83 name = 'IPython'
84 aliases = ['ipython2', 'ipython']
84 aliases = ['ipython2', 'ipython']
85 doc = """IPython Lexer"""
85 doc = """IPython Lexer"""
86
86
87 tokens = PyLexer.tokens.copy()
87 tokens = PyLexer.tokens.copy()
88 tokens['root'] = ipython_tokens + tokens['root']
88 tokens['root'] = ipython_tokens + tokens['root']
89
89
90 attrs = {'name': name, 'aliases': aliases,
90 attrs = {'name': name, 'aliases': aliases,
91 '__doc__': doc, 'tokens': tokens}
91 '__doc__': doc, 'tokens': tokens}
92
92
93 return type(name, (PyLexer,), attrs)
93 return type(name, (PyLexer,), attrs)
94
94
95
95
96 IPython3Lexer = build_ipy_lexer(python3=True)
96 IPython3Lexer = build_ipy_lexer(python3=True)
97 IPythonLexer = build_ipy_lexer(python3=False)
97 IPythonLexer = build_ipy_lexer(python3=False)
98
98
99
99
100 class IPythonPartialTracebackLexer(RegexLexer):
100 class IPythonPartialTracebackLexer(RegexLexer):
101 """
101 """
102 Partial lexer for IPython tracebacks.
102 Partial lexer for IPython tracebacks.
103
103
104 Handles all the non-python output. This works for both Python 2.x and 3.x.
104 Handles all the non-python output. This works for both Python 2.x and 3.x.
105
105
106 """
106 """
107 name = 'IPython Partial Traceback'
107 name = 'IPython Partial Traceback'
108
108
109 tokens = {
109 tokens = {
110 'root': [
110 'root': [
111 # Tracebacks for syntax errors have a different style.
111 # Tracebacks for syntax errors have a different style.
112 # For both types of tracebacks, we mark the first line with
112 # For both types of tracebacks, we mark the first line with
113 # Generic.Traceback. For syntax errors, we mark the filename
113 # Generic.Traceback. For syntax errors, we mark the filename
114 # as we mark the filenames for non-syntax tracebacks.
114 # as we mark the filenames for non-syntax tracebacks.
115 #
115 #
116 # These two regexps define how IPythonConsoleLexer finds a
116 # These two regexps define how IPythonConsoleLexer finds a
117 # traceback.
117 # traceback.
118 #
118 #
119 ## Non-syntax traceback
119 ## Non-syntax traceback
120 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
120 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
121 ## Syntax traceback
121 ## Syntax traceback
122 (r'^( File)(.*)(, line )(\d+\n)',
122 (r'^( File)(.*)(, line )(\d+\n)',
123 bygroups(Generic.Traceback, Name.Namespace,
123 bygroups(Generic.Traceback, Name.Namespace,
124 Generic.Traceback, Literal.Number.Integer)),
124 Generic.Traceback, Literal.Number.Integer)),
125
125
126 # (Exception Identifier)(Whitespace)(Traceback Message)
126 # (Exception Identifier)(Whitespace)(Traceback Message)
127 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
127 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
128 bygroups(Name.Exception, Generic.Whitespace, Text)),
128 bygroups(Name.Exception, Generic.Whitespace, Text)),
129 # (Module/Filename)(Text)(Callee)(Function Signature)
129 # (Module/Filename)(Text)(Callee)(Function Signature)
130 # Better options for callee and function signature?
130 # Better options for callee and function signature?
131 (r'(.*)( in )(.*)(\(.*\)\n)',
131 (r'(.*)( in )(.*)(\(.*\)\n)',
132 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
132 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
133 # Regular line: (Whitespace)(Line Number)(Python Code)
133 # Regular line: (Whitespace)(Line Number)(Python Code)
134 (r'(\s*?)(\d+)(.*?\n)',
134 (r'(\s*?)(\d+)(.*?\n)',
135 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
135 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
136 # Emphasized line: (Arrow)(Line Number)(Python Code)
136 # Emphasized line: (Arrow)(Line Number)(Python Code)
137 # Using Exception token so arrow color matches the Exception.
137 # Using Exception token so arrow color matches the Exception.
138 (r'(-*>?\s?)(\d+)(.*?\n)',
138 (r'(-*>?\s?)(\d+)(.*?\n)',
139 bygroups(Name.Exception, Literal.Number.Integer, Other)),
139 bygroups(Name.Exception, Literal.Number.Integer, Other)),
140 # (Exception Identifier)(Message)
140 # (Exception Identifier)(Message)
141 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
141 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
142 bygroups(Name.Exception, Text)),
142 bygroups(Name.Exception, Text)),
143 # Tag everything else as Other, will be handled later.
143 # Tag everything else as Other, will be handled later.
144 (r'.*\n', Other),
144 (r'.*\n', Other),
145 ],
145 ],
146 }
146 }
147
147
148
148
149 class IPythonTracebackLexer(DelegatingLexer):
149 class IPythonTracebackLexer(DelegatingLexer):
150 """
150 """
151 IPython traceback lexer.
151 IPython traceback lexer.
152
152
153 For doctests, the tracebacks can be snipped as much as desired with the
153 For doctests, the tracebacks can be snipped as much as desired with the
154 exception to the lines that designate a traceback. For non-syntax error
154 exception to the lines that designate a traceback. For non-syntax error
155 tracebacks, this is the line of hyphens. For syntax error tracebacks,
155 tracebacks, this is the line of hyphens. For syntax error tracebacks,
156 this is the line which lists the File and line number.
156 this is the line which lists the File and line number.
157
157
158 """
158 """
159 # The lexer inherits from DelegatingLexer. The "root" lexer is an
159 # The lexer inherits from DelegatingLexer. The "root" lexer is an
160 # appropriate IPython lexer, which depends on the value of the boolean
160 # appropriate IPython lexer, which depends on the value of the boolean
161 # `python3`. First, we parse with the partial IPython traceback lexer.
161 # `python3`. First, we parse with the partial IPython traceback lexer.
162 # Then, any code marked with the "Other" token is delegated to the root
162 # Then, any code marked with the "Other" token is delegated to the root
163 # lexer.
163 # lexer.
164 #
164 #
165 name = 'IPython Traceback'
165 name = 'IPython Traceback'
166 aliases = ['ipythontb']
166 aliases = ['ipythontb']
167
167
168 def __init__(self, **options):
168 def __init__(self, **options):
169 self.python3 = get_bool_opt(options, 'python3', False)
169 self.python3 = get_bool_opt(options, 'python3', False)
170 if self.python3:
170 if self.python3:
171 self.aliases = ['ipython3tb']
171 self.aliases = ['ipython3tb']
172 else:
172 else:
173 self.aliases = ['ipython2tb', 'ipythontb']
173 self.aliases = ['ipython2tb', 'ipythontb']
174
174
175 if self.python3:
175 if self.python3:
176 IPyLexer = IPython3Lexer
176 IPyLexer = IPython3Lexer
177 else:
177 else:
178 IPyLexer = IPythonLexer
178 IPyLexer = IPythonLexer
179
179
180 DelegatingLexer.__init__(self, IPyLexer,
180 DelegatingLexer.__init__(self, IPyLexer,
181 IPythonPartialTracebackLexer, **options)
181 IPythonPartialTracebackLexer, **options)
182
182
183 @skip_doctest
183 @skip_doctest
184 class IPythonConsoleLexer(Lexer):
184 class IPythonConsoleLexer(Lexer):
185 """
185 """
186 An IPython console lexer for IPython code-blocks and doctests, such as:
186 An IPython console lexer for IPython code-blocks and doctests, such as:
187
187
188 .. code-block:: rst
188 .. code-block:: rst
189
189
190 .. code-block:: ipythonconsole
190 .. code-block:: ipythonconsole
191
191
192 In [1]: a = 'foo'
192 In [1]: a = 'foo'
193
193
194 In [2]: a
194 In [2]: a
195 Out[2]: 'foo'
195 Out[2]: 'foo'
196
196
197 In [3]: print a
197 In [3]: print a
198 foo
198 foo
199
199
200 In [4]: 1 / 0
200 In [4]: 1 / 0
201
201
202
202
203 Support is also provided for IPython exceptions:
203 Support is also provided for IPython exceptions:
204
204
205 .. code-block:: rst
205 .. code-block:: rst
206
206
207 .. code-block:: ipythonconsole
207 .. code-block:: ipythonconsole
208
208
209 In [1]: raise Exception
209 In [1]: raise Exception
210
210
211 ---------------------------------------------------------------------------
211 ---------------------------------------------------------------------------
212 Exception Traceback (most recent call last)
212 Exception Traceback (most recent call last)
213 <ipython-input-1-fca2ab0ca76b> in <module>()
213 <ipython-input-1-fca2ab0ca76b> in <module>()
214 ----> 1 raise Exception
214 ----> 1 raise Exception
215
215
216 Exception:
216 Exception:
217
217
218 """
218 """
219 name = 'IPython console session'
219 name = 'IPython console session'
220 aliases = ['ipythonconsole']
220 aliases = ['ipythonconsole']
221 mimetypes = ['text/x-ipython-console']
221 mimetypes = ['text/x-ipython-console']
222
222
223 # The regexps used to determine what is input and what is output.
223 # The regexps used to determine what is input and what is output.
224 # The default prompts for IPython are:
224 # The default prompts for IPython are:
225 #
225 #
226 # c.PromptManager.in_template = 'In [\#]: '
226 # c.PromptManager.in_template = 'In [\#]: '
227 # c.PromptManager.in2_template = ' .\D.: '
227 # c.PromptManager.in2_template = ' .\D.: '
228 # c.PromptManager.out_template = 'Out[\#]: '
228 # c.PromptManager.out_template = 'Out[\#]: '
229 #
229 #
230 in1_regex = r'In \[[0-9]+\]: '
230 in1_regex = r'In \[[0-9]+\]: '
231 in2_regex = r' \.\.+\.: '
231 in2_regex = r' \.\.+\.: '
232 out_regex = r'Out\[[0-9]+\]: '
232 out_regex = r'Out\[[0-9]+\]: '
233
233
234 #: The regex to determine when a traceback starts.
234 #: The regex to determine when a traceback starts.
235 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
235 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
236
236
237 def __init__(self, **options):
237 def __init__(self, **options):
238 """Initialize the IPython console lexer.
238 """Initialize the IPython console lexer.
239
239
240 Parameters
240 Parameters
241 ----------
241 ----------
242 python3 : bool
242 python3 : bool
243 If `True`, then the console inputs are parsed using a Python 3
243 If `True`, then the console inputs are parsed using a Python 3
244 lexer. Otherwise, they are parsed using a Python 2 lexer.
244 lexer. Otherwise, they are parsed using a Python 2 lexer.
245 in1_regex : RegexObject
245 in1_regex : RegexObject
246 The compiled regular expression used to detect the start
246 The compiled regular expression used to detect the start
247 of inputs. Although the IPython configuration setting may have a
247 of inputs. Although the IPython configuration setting may have a
248 trailing whitespace, do not include it in the regex. If `None`,
248 trailing whitespace, do not include it in the regex. If `None`,
249 then the default input prompt is assumed.
249 then the default input prompt is assumed.
250 in2_regex : RegexObject
250 in2_regex : RegexObject
251 The compiled regular expression used to detect the continuation
251 The compiled regular expression used to detect the continuation
252 of inputs. Although the IPython configuration setting may have a
252 of inputs. Although the IPython configuration setting may have a
253 trailing whitespace, do not include it in the regex. If `None`,
253 trailing whitespace, do not include it in the regex. If `None`,
254 then the default input prompt is assumed.
254 then the default input prompt is assumed.
255 out_regex : RegexObject
255 out_regex : RegexObject
256 The compiled regular expression used to detect outputs. If `None`,
256 The compiled regular expression used to detect outputs. If `None`,
257 then the default output prompt is assumed.
257 then the default output prompt is assumed.
258
258
259 """
259 """
260 self.python3 = get_bool_opt(options, 'python3', False)
260 self.python3 = get_bool_opt(options, 'python3', False)
261 if self.python3:
261 if self.python3:
262 self.aliases = ['ipython3console']
262 self.aliases = ['ipython3console']
263 else:
263 else:
264 self.aliases = ['ipython2console', 'ipythonconsole']
264 self.aliases = ['ipython2console', 'ipythonconsole']
265
265
266 in1_regex = options.get('in1_regex', self.in1_regex)
266 in1_regex = options.get('in1_regex', self.in1_regex)
267 in2_regex = options.get('in2_regex', self.in2_regex)
267 in2_regex = options.get('in2_regex', self.in2_regex)
268 out_regex = options.get('out_regex', self.out_regex)
268 out_regex = options.get('out_regex', self.out_regex)
269
269
270 # So that we can work with input and output prompts which have been
270 # So that we can work with input and output prompts which have been
271 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
271 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
272 # we do not do this, then such prompts will be tagged as 'output'.
272 # we do not do this, then such prompts will be tagged as 'output'.
273 # The reason can't just use the rstrip'd variants instead is because
273 # The reason can't just use the rstrip'd variants instead is because
274 # we want any whitespace associated with the prompt to be inserted
274 # we want any whitespace associated with the prompt to be inserted
275 # with the token. This allows formatted code to be modified so as hide
275 # with the token. This allows formatted code to be modified so as hide
276 # the appearance of prompts, with the whitespace included. One example
276 # the appearance of prompts, with the whitespace included. One example
277 # use of this is in copybutton.js from the standard lib Python docs.
277 # use of this is in copybutton.js from the standard lib Python docs.
278 in1_regex_rstrip = in1_regex.rstrip() + '\n'
278 in1_regex_rstrip = in1_regex.rstrip() + '\n'
279 in2_regex_rstrip = in2_regex.rstrip() + '\n'
279 in2_regex_rstrip = in2_regex.rstrip() + '\n'
280 out_regex_rstrip = out_regex.rstrip() + '\n'
280 out_regex_rstrip = out_regex.rstrip() + '\n'
281
281
282 # Compile and save them all.
282 # Compile and save them all.
283 attrs = ['in1_regex', 'in2_regex', 'out_regex',
283 attrs = ['in1_regex', 'in2_regex', 'out_regex',
284 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
284 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
285 for attr in attrs:
285 for attr in attrs:
286 self.__setattr__(attr, re.compile(locals()[attr]))
286 self.__setattr__(attr, re.compile(locals()[attr]))
287
287
288 Lexer.__init__(self, **options)
288 Lexer.__init__(self, **options)
289
289
290 if self.python3:
290 if self.python3:
291 pylexer = IPython3Lexer
291 pylexer = IPython3Lexer
292 tblexer = IPythonTracebackLexer
292 tblexer = IPythonTracebackLexer
293 else:
293 else:
294 pylexer = IPythonLexer
294 pylexer = IPythonLexer
295 tblexer = IPythonTracebackLexer
295 tblexer = IPythonTracebackLexer
296
296
297 self.pylexer = pylexer(**options)
297 self.pylexer = pylexer(**options)
298 self.tblexer = tblexer(**options)
298 self.tblexer = tblexer(**options)
299
299
300 self.reset()
300 self.reset()
301
301
302 def reset(self):
302 def reset(self):
303 self.mode = 'output'
303 self.mode = 'output'
304 self.index = 0
304 self.index = 0
305 self.buffer = u''
305 self.buffer = u''
306 self.insertions = []
306 self.insertions = []
307
307
308 def buffered_tokens(self):
308 def buffered_tokens(self):
309 """
309 """
310 Generator of unprocessed tokens after doing insertions and before
310 Generator of unprocessed tokens after doing insertions and before
311 changing to a new state.
311 changing to a new state.
312
312
313 """
313 """
314 if self.mode == 'output':
314 if self.mode == 'output':
315 tokens = [(0, Generic.Output, self.buffer)]
315 tokens = [(0, Generic.Output, self.buffer)]
316 elif self.mode == 'input':
316 elif self.mode == 'input':
317 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
317 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
318 else: # traceback
318 else: # traceback
319 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
319 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
320
320
321 for i, t, v in do_insertions(self.insertions, tokens):
321 for i, t, v in do_insertions(self.insertions, tokens):
322 # All token indexes are relative to the buffer.
322 # All token indexes are relative to the buffer.
323 yield self.index + i, t, v
323 yield self.index + i, t, v
324
324
325 # Clear it all
325 # Clear it all
326 self.index += len(self.buffer)
326 self.index += len(self.buffer)
327 self.buffer = u''
327 self.buffer = u''
328 self.insertions = []
328 self.insertions = []
329
329
330 def get_mci(self, line):
330 def get_mci(self, line):
331 """
331 """
332 Parses the line and returns a 3-tuple: (mode, code, insertion).
332 Parses the line and returns a 3-tuple: (mode, code, insertion).
333
333
334 `mode` is the next mode (or state) of the lexer, and is always equal
334 `mode` is the next mode (or state) of the lexer, and is always equal
335 to 'input', 'output', or 'tb'.
335 to 'input', 'output', or 'tb'.
336
336
337 `code` is a portion of the line that should be added to the buffer
337 `code` is a portion of the line that should be added to the buffer
338 corresponding to the next mode and eventually lexed by another lexer.
338 corresponding to the next mode and eventually lexed by another lexer.
339 For example, `code` could be Python code if `mode` were 'input'.
339 For example, `code` could be Python code if `mode` were 'input'.
340
340
341 `insertion` is a 3-tuple (index, token, text) representing an
341 `insertion` is a 3-tuple (index, token, text) representing an
342 unprocessed "token" that will be inserted into the stream of tokens
342 unprocessed "token" that will be inserted into the stream of tokens
343 that are created from the buffer once we change modes. This is usually
343 that are created from the buffer once we change modes. This is usually
344 the input or output prompt.
344 the input or output prompt.
345
345
346 In general, the next mode depends on current mode and on the contents
346 In general, the next mode depends on current mode and on the contents
347 of `line`.
347 of `line`.
348
348
349 """
349 """
350 # To reduce the number of regex match checks, we have multiple
350 # To reduce the number of regex match checks, we have multiple
351 # 'if' blocks instead of 'if-elif' blocks.
351 # 'if' blocks instead of 'if-elif' blocks.
352
352
353 # Check for possible end of input
353 # Check for possible end of input
354 in2_match = self.in2_regex.match(line)
354 in2_match = self.in2_regex.match(line)
355 in2_match_rstrip = self.in2_regex_rstrip.match(line)
355 in2_match_rstrip = self.in2_regex_rstrip.match(line)
356 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
356 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
357 in2_match_rstrip:
357 in2_match_rstrip:
358 end_input = True
358 end_input = True
359 else:
359 else:
360 end_input = False
360 end_input = False
361 if end_input and self.mode != 'tb':
361 if end_input and self.mode != 'tb':
362 # Only look for an end of input when not in tb mode.
362 # Only look for an end of input when not in tb mode.
363 # An ellipsis could appear within the traceback.
363 # An ellipsis could appear within the traceback.
364 mode = 'output'
364 mode = 'output'
365 code = u''
365 code = u''
366 insertion = (0, Generic.Prompt, line)
366 insertion = (0, Generic.Prompt, line)
367 return mode, code, insertion
367 return mode, code, insertion
368
368
369 # Check for output prompt
369 # Check for output prompt
370 out_match = self.out_regex.match(line)
370 out_match = self.out_regex.match(line)
371 out_match_rstrip = self.out_regex_rstrip.match(line)
371 out_match_rstrip = self.out_regex_rstrip.match(line)
372 if out_match or out_match_rstrip:
372 if out_match or out_match_rstrip:
373 mode = 'output'
373 mode = 'output'
374 if out_match:
374 if out_match:
375 idx = out_match.end()
375 idx = out_match.end()
376 else:
376 else:
377 idx = out_match_rstrip.end()
377 idx = out_match_rstrip.end()
378 code = line[idx:]
378 code = line[idx:]
379 # Use the 'heading' token for output. We cannot use Generic.Error
379 # Use the 'heading' token for output. We cannot use Generic.Error
380 # since it would conflict with exceptions.
380 # since it would conflict with exceptions.
381 insertion = (0, Generic.Heading, line[:idx])
381 insertion = (0, Generic.Heading, line[:idx])
382 return mode, code, insertion
382 return mode, code, insertion
383
383
384
384
385 # Check for input or continuation prompt (non stripped version)
385 # Check for input or continuation prompt (non stripped version)
386 in1_match = self.in1_regex.match(line)
386 in1_match = self.in1_regex.match(line)
387 if in1_match or (in2_match and self.mode != 'tb'):
387 if in1_match or (in2_match and self.mode != 'tb'):
388 # New input or when not in tb, continued input.
388 # New input or when not in tb, continued input.
389 # We do not check for continued input when in tb since it is
389 # We do not check for continued input when in tb since it is
390 # allowable to replace a long stack with an ellipsis.
390 # allowable to replace a long stack with an ellipsis.
391 mode = 'input'
391 mode = 'input'
392 if in1_match:
392 if in1_match:
393 idx = in1_match.end()
393 idx = in1_match.end()
394 else: # in2_match
394 else: # in2_match
395 idx = in2_match.end()
395 idx = in2_match.end()
396 code = line[idx:]
396 code = line[idx:]
397 insertion = (0, Generic.Prompt, line[:idx])
397 insertion = (0, Generic.Prompt, line[:idx])
398 return mode, code, insertion
398 return mode, code, insertion
399
399
400 # Check for input or continuation prompt (stripped version)
400 # Check for input or continuation prompt (stripped version)
401 in1_match_rstrip = self.in1_regex_rstrip.match(line)
401 in1_match_rstrip = self.in1_regex_rstrip.match(line)
402 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
402 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
403 # New input or when not in tb, continued input.
403 # New input or when not in tb, continued input.
404 # We do not check for continued input when in tb since it is
404 # We do not check for continued input when in tb since it is
405 # allowable to replace a long stack with an ellipsis.
405 # allowable to replace a long stack with an ellipsis.
406 mode = 'input'
406 mode = 'input'
407 if in1_match_rstrip:
407 if in1_match_rstrip:
408 idx = in1_match_rstrip.end()
408 idx = in1_match_rstrip.end()
409 else: # in2_match
409 else: # in2_match
410 idx = in2_match_rstrip.end()
410 idx = in2_match_rstrip.end()
411 code = line[idx:]
411 code = line[idx:]
412 insertion = (0, Generic.Prompt, line[:idx])
412 insertion = (0, Generic.Prompt, line[:idx])
413 return mode, code, insertion
413 return mode, code, insertion
414
414
415 # Check for traceback
415 # Check for traceback
416 if self.ipytb_start.match(line):
416 if self.ipytb_start.match(line):
417 mode = 'tb'
417 mode = 'tb'
418 code = line
418 code = line
419 insertion = None
419 insertion = None
420 return mode, code, insertion
420 return mode, code, insertion
421
421
422 # All other stuff...
422 # All other stuff...
423 if self.mode in ('input', 'output'):
423 if self.mode in ('input', 'output'):
424 # We assume all other text is output. Multiline input that
424 # We assume all other text is output. Multiline input that
425 # does not use the continuation marker cannot be detected.
425 # does not use the continuation marker cannot be detected.
426 # For example, the 3 in the following is clearly output:
426 # For example, the 3 in the following is clearly output:
427 #
427 #
428 # In [1]: print 3
428 # In [1]: print 3
429 # 3
429 # 3
430 #
430 #
431 # But the following second line is part of the input:
431 # But the following second line is part of the input:
432 #
432 #
433 # In [2]: while True:
433 # In [2]: while True:
434 # print True
434 # print True
435 #
435 #
436 # In both cases, the 2nd line will be 'output'.
436 # In both cases, the 2nd line will be 'output'.
437 #
437 #
438 mode = 'output'
438 mode = 'output'
439 else:
439 else:
440 mode = 'tb'
440 mode = 'tb'
441
441
442 code = line
442 code = line
443 insertion = None
443 insertion = None
444
444
445 return mode, code, insertion
445 return mode, code, insertion
446
446
447 def get_tokens_unprocessed(self, text):
447 def get_tokens_unprocessed(self, text):
448 self.reset()
448 self.reset()
449 for match in line_re.finditer(text):
449 for match in line_re.finditer(text):
450 line = match.group()
450 line = match.group()
451 mode, code, insertion = self.get_mci(line)
451 mode, code, insertion = self.get_mci(line)
452
452
453 if mode != self.mode:
453 if mode != self.mode:
454 # Yield buffered tokens before transitioning to new mode.
454 # Yield buffered tokens before transitioning to new mode.
455 for token in self.buffered_tokens():
455 for token in self.buffered_tokens():
456 yield token
456 yield token
457 self.mode = mode
457 self.mode = mode
458
458
459 if insertion:
459 if insertion:
460 self.insertions.append((len(self.buffer), [insertion]))
460 self.insertions.append((len(self.buffer), [insertion]))
461 self.buffer += code
461 self.buffer += code
462 else:
462 else:
463 for token in self.buffered_tokens():
463 for token in self.buffered_tokens():
464 yield token
464 yield token
465
465
466 class IPyLexer(Lexer):
466 class IPyLexer(Lexer):
467 """
467 """
468 Primary lexer for all IPython-like code.
468 Primary lexer for all IPython-like code.
469
469
470 This is a simple helper lexer. If the first line of the text begins with
470 This is a simple helper lexer. If the first line of the text begins with
471 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
471 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
472 lexer. If not, then the entire text is parsed with an IPython lexer.
472 lexer. If not, then the entire text is parsed with an IPython lexer.
473
473
474 The goal is to reduce the number of lexers that are registered
474 The goal is to reduce the number of lexers that are registered
475 with Pygments.
475 with Pygments.
476
476
477 """
477 """
478 name = 'IPy session'
478 name = 'IPy session'
479 aliases = ['ipy']
479 aliases = ['ipy']
480
480
481 def __init__(self, **options):
481 def __init__(self, **options):
482 self.python3 = get_bool_opt(options, 'python3', False)
482 self.python3 = get_bool_opt(options, 'python3', False)
483 if self.python3:
483 if self.python3:
484 self.aliases = ['ipy3']
484 self.aliases = ['ipy3']
485 else:
485 else:
486 self.aliases = ['ipy2', 'ipy']
486 self.aliases = ['ipy2', 'ipy']
487
487
488 Lexer.__init__(self, **options)
488 Lexer.__init__(self, **options)
489
489
490 self.IPythonLexer = IPythonLexer(**options)
490 self.IPythonLexer = IPythonLexer(**options)
491 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
491 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
492
492
493 def get_tokens_unprocessed(self, text):
493 def get_tokens_unprocessed(self, text):
494 if re.match(r'(In \[[0-9]+\]:)', text.strip()):
494 # Search for the input prompt anywhere...this allows code blocks to
495 # begin with comments as well.
496 if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
495 lex = self.IPythonConsoleLexer
497 lex = self.IPythonConsoleLexer
496 else:
498 else:
497 lex = self.IPythonLexer
499 lex = self.IPythonLexer
498 for token in lex.get_tokens_unprocessed(text):
500 for token in lex.get_tokens_unprocessed(text):
499 yield token
501 yield token
500
502
General Comments 0
You need to be logged in to leave comments. Login now