##// END OF EJS Templates
Style fix for some comments.
chebee7i -
Show More
@@ -1,507 +1,501 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 """
2 """
3 Defines a variety of Pygments lexers for highlighting IPython code.
3 Defines a variety of Pygments lexers for highlighting IPython code.
4
4
5 This includes:
5 This includes:
6
6
7 IPythonLexer
7 IPythonLexer
8 IPython3Lexer
8 IPython3Lexer
9 Lexers for pure IPython (python + magic/shell commands)
9 Lexers for pure IPython (python + magic/shell commands)
10
10
11 IPythonPartialTracebackLexer
11 IPythonPartialTracebackLexer
12 IPythonTracebackLexer
12 IPythonTracebackLexer
13 Supports 2.x and 3.x via keyword `python3`. The partial traceback
13 Supports 2.x and 3.x via keyword `python3`. The partial traceback
14 lexer reads everything but the Python code appearing in a traceback.
14 lexer reads everything but the Python code appearing in a traceback.
15 The full lexer combines the partial lexer with an IPython lexer.
15 The full lexer combines the partial lexer with an IPython lexer.
16
16
17 IPythonConsoleLexer
17 IPythonConsoleLexer
18 A lexer for IPython console sessions, with support for tracebacks.
18 A lexer for IPython console sessions, with support for tracebacks.
19
19
20 IPyLexer
20 IPyLexer
21 A friendly lexer which examines the first line of text and from it,
21 A friendly lexer which examines the first line of text and from it,
22 decides whether to use an IPython lexer or an IPython console lexer.
22 decides whether to use an IPython lexer or an IPython console lexer.
23 This is probably the only lexer that needs to be explicitly added
23 This is probably the only lexer that needs to be explicitly added
24 to Pygments.
24 to Pygments.
25
25
26 """
26 """
27 #-----------------------------------------------------------------------------
27 #-----------------------------------------------------------------------------
28 # Copyright (c) 2013, the IPython Development Team.
28 # Copyright (c) 2013, the IPython Development Team.
29 #
29 #
30 # Distributed under the terms of the Modified BSD License.
30 # Distributed under the terms of the Modified BSD License.
31 #
31 #
32 # The full license is in the file COPYING.txt, distributed with this software.
32 # The full license is in the file COPYING.txt, distributed with this software.
33 #-----------------------------------------------------------------------------
33 #-----------------------------------------------------------------------------
34
34
35 # Standard library
35 # Standard library
36 import re
36 import re
37
37
38 # Third party
38 # Third party
39 from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
39 from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
40 from pygments.lexer import (
40 from pygments.lexer import (
41 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
41 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
42 )
42 )
43 from pygments.token import (
43 from pygments.token import (
44 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
44 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
45 )
45 )
46 from pygments.util import get_bool_opt
46 from pygments.util import get_bool_opt
47
47
48 # Local
48 # Local
49 from IPython.testing.skipdoctest import skip_doctest
49 from IPython.testing.skipdoctest import skip_doctest
50
50
51 line_re = re.compile('.*?\n')
51 line_re = re.compile('.*?\n')
52
52
53 ipython_tokens = [
53 ipython_tokens = [
54 (r'(\%+)(\w+)\s+(\.*)(\n)', bygroups(Operator, Keyword,
54 (r'(\%+)(\w+)\s+(\.*)(\n)', bygroups(Operator, Keyword,
55 using(BashLexer), Text)),
55 using(BashLexer), Text)),
56 (r'(\%+)(\w+)\b', bygroups(Operator, Keyword)),
56 (r'(\%+)(\w+)\b', bygroups(Operator, Keyword)),
57 (r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
57 (r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
58 ]
58 ]
59
59
60 def build_ipy_lexer(python3):
60 def build_ipy_lexer(python3):
61 """Builds IPython lexers depending on the value of `python3`.
61 """Builds IPython lexers depending on the value of `python3`.
62
62
63 The lexer inherits from an appropriate Python lexer and then adds
63 The lexer inherits from an appropriate Python lexer and then adds
64 information about IPython specific keywords (i.e. magic commands,
64 information about IPython specific keywords (i.e. magic commands,
65 shell commands, etc.)
65 shell commands, etc.)
66
66
67 Parameters
67 Parameters
68 ----------
68 ----------
69 python3 : bool
69 python3 : bool
70 If `True`, then build an IPython lexer from a Python 3 lexer.
70 If `True`, then build an IPython lexer from a Python 3 lexer.
71
71
72 """
72 """
73 # It would be nice to have a single IPython lexer class which takes
73 # It would be nice to have a single IPython lexer class which takes
74 # a boolean `python3`. But since there are two Python lexer classes,
74 # a boolean `python3`. But since there are two Python lexer classes,
75 # we will also have two IPython lexer classes.
75 # we will also have two IPython lexer classes.
76 if python3:
76 if python3:
77 PyLexer = Python3Lexer
77 PyLexer = Python3Lexer
78 clsname = 'IPython3Lexer'
78 clsname = 'IPython3Lexer'
79 name = 'IPython3'
79 name = 'IPython3'
80 aliases = ['ipython3']
80 aliases = ['ipython3']
81 doc = """IPython3 Lexer"""
81 doc = """IPython3 Lexer"""
82 else:
82 else:
83 PyLexer = PythonLexer
83 PyLexer = PythonLexer
84 clsname = 'IPythonLexer'
84 clsname = 'IPythonLexer'
85 name = 'IPython'
85 name = 'IPython'
86 aliases = ['ipython2', 'ipython']
86 aliases = ['ipython2', 'ipython']
87 doc = """IPython Lexer"""
87 doc = """IPython Lexer"""
88
88
89 tokens = PyLexer.tokens.copy()
89 tokens = PyLexer.tokens.copy()
90 tokens['root'] = ipython_tokens + tokens['root']
90 tokens['root'] = ipython_tokens + tokens['root']
91
91
92 attrs = {'name': name, 'aliases': aliases,
92 attrs = {'name': name, 'aliases': aliases,
93 '__doc__': doc, 'tokens': tokens}
93 '__doc__': doc, 'tokens': tokens}
94
94
95 return type(name, (PyLexer,), attrs)
95 return type(name, (PyLexer,), attrs)
96
96
97
97
98 IPython3Lexer = build_ipy_lexer(python3=True)
98 IPython3Lexer = build_ipy_lexer(python3=True)
99 IPythonLexer = build_ipy_lexer(python3=False)
99 IPythonLexer = build_ipy_lexer(python3=False)
100
100
101
101
102 class IPythonPartialTracebackLexer(RegexLexer):
102 class IPythonPartialTracebackLexer(RegexLexer):
103 """
103 """
104 Partial lexer for IPython tracebacks.
104 Partial lexer for IPython tracebacks.
105
105
106 Handles all the non-python output. This works for both Python 2.x and 3.x.
106 Handles all the non-python output. This works for both Python 2.x and 3.x.
107
107
108 """
108 """
109 name = 'IPython Partial Traceback'
109 name = 'IPython Partial Traceback'
110
110
111 tokens = {
111 tokens = {
112 'root': [
112 'root': [
113 # Tracebacks for syntax errors have a different style.
113 # Tracebacks for syntax errors have a different style.
114 # For both types of tracebacks, we mark the first line with
114 # For both types of tracebacks, we mark the first line with
115 # Generic.Traceback. For syntax errors, we mark the filename
115 # Generic.Traceback. For syntax errors, we mark the filename
116 # as we mark the filenames for non-syntax tracebacks.
116 # as we mark the filenames for non-syntax tracebacks.
117 #
117 #
118 # These two regexps define how IPythonConsoleLexer finds a
118 # These two regexps define how IPythonConsoleLexer finds a
119 # traceback.
119 # traceback.
120 #
120 #
121 ## Non-syntax traceback
121 ## Non-syntax traceback
122 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
122 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
123 ## Syntax traceback
123 ## Syntax traceback
124 (r'^( File)(.*)(, line )(\d+\n)',
124 (r'^( File)(.*)(, line )(\d+\n)',
125 bygroups(Generic.Traceback, Name.Namespace,
125 bygroups(Generic.Traceback, Name.Namespace,
126 Generic.Traceback, Literal.Number.Integer)),
126 Generic.Traceback, Literal.Number.Integer)),
127
127
128 # (Exception Identifier)(Whitespace)(Traceback Message)
128 # (Exception Identifier)(Whitespace)(Traceback Message)
129 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
129 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
130 bygroups(Name.Exception, Generic.Whitespace, Text)),
130 bygroups(Name.Exception, Generic.Whitespace, Text)),
131 # (Module/Filename)(Text)(Callee)(Function Signature)
131 # (Module/Filename)(Text)(Callee)(Function Signature)
132 # Better options for callee and function signature?
132 # Better options for callee and function signature?
133 (r'(.*)( in )(.*)(\(.*\)\n)',
133 (r'(.*)( in )(.*)(\(.*\)\n)',
134 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
134 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
135 # Regular line: (Whitespace)(Line Number)(Python Code)
135 # Regular line: (Whitespace)(Line Number)(Python Code)
136 (r'(\s*?)(\d+)(.*?\n)',
136 (r'(\s*?)(\d+)(.*?\n)',
137 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
137 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
138 # Emphasized line: (Arrow)(Line Number)(Python Code)
138 # Emphasized line: (Arrow)(Line Number)(Python Code)
139 # Using Exception token so arrow color matches the Exception.
139 # Using Exception token so arrow color matches the Exception.
140 (r'(-*>?\s?)(\d+)(.*?\n)',
140 (r'(-*>?\s?)(\d+)(.*?\n)',
141 bygroups(Name.Exception, Literal.Number.Integer, Other)),
141 bygroups(Name.Exception, Literal.Number.Integer, Other)),
142 # (Exception Identifier)(Message)
142 # (Exception Identifier)(Message)
143 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
143 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
144 bygroups(Name.Exception, Text)),
144 bygroups(Name.Exception, Text)),
145 # Tag everything else as Other, will be handled later.
145 # Tag everything else as Other, will be handled later.
146 (r'.*\n', Other),
146 (r'.*\n', Other),
147 ],
147 ],
148 }
148 }
149
149
150
150
151 class IPythonTracebackLexer(DelegatingLexer):
151 class IPythonTracebackLexer(DelegatingLexer):
152 """
152 """
153 IPython traceback lexer.
153 IPython traceback lexer.
154
154
155 For doctests, the tracebacks can be snipped as much as desired with the
155 For doctests, the tracebacks can be snipped as much as desired with the
156 exception to the lines that designate a traceback. For non-syntax error
156 exception to the lines that designate a traceback. For non-syntax error
157 tracebacks, this is the line of hyphens. For syntax error tracebacks,
157 tracebacks, this is the line of hyphens. For syntax error tracebacks,
158 this is the line which lists the File and line number.
158 this is the line which lists the File and line number.
159
159
160 """
160 """
161 # The lexer inherits from DelegatingLexer. The "root" lexer is an
161 # The lexer inherits from DelegatingLexer. The "root" lexer is an
162 # appropriate IPython lexer, which depends on the value of the boolean
162 # appropriate IPython lexer, which depends on the value of the boolean
163 # `python3`. First, we parse with the partial IPython traceback lexer.
163 # `python3`. First, we parse with the partial IPython traceback lexer.
164 # Then, any code marked with the "Other" token is delegated to the root
164 # Then, any code marked with the "Other" token is delegated to the root
165 # lexer.
165 # lexer.
166 #
166 #
167 name = 'IPython Traceback'
167 name = 'IPython Traceback'
168 aliases = ['ipythontb']
168 aliases = ['ipythontb']
169
169
170 def __init__(self, **options):
170 def __init__(self, **options):
171 self.python3 = get_bool_opt(options, 'python3', False)
171 self.python3 = get_bool_opt(options, 'python3', False)
172 if self.python3:
172 if self.python3:
173 self.aliases = ['ipythontb3']
173 self.aliases = ['ipythontb3']
174 else:
174 else:
175 self.aliases = ['ipythontb2', 'ipythontb']
175 self.aliases = ['ipythontb2', 'ipythontb']
176
176
177 if self.python3:
177 if self.python3:
178 IPyLexer = IPython3Lexer
178 IPyLexer = IPython3Lexer
179 else:
179 else:
180 IPyLexer = IPythonLexer
180 IPyLexer = IPythonLexer
181
181
182 DelegatingLexer.__init__(self, IPyLexer,
182 DelegatingLexer.__init__(self, IPyLexer,
183 IPythonPartialTracebackLexer, **options)
183 IPythonPartialTracebackLexer, **options)
184
184
185 @skip_doctest
185 @skip_doctest
186 class IPythonConsoleLexer(Lexer):
186 class IPythonConsoleLexer(Lexer):
187 """
187 """
188 An IPython console lexer for IPython code-blocks and doctests, such as:
188 An IPython console lexer for IPython code-blocks and doctests, such as:
189
189
190 .. code-block:: rst
190 .. code-block:: rst
191
191
192 .. code-block:: ipythoncon
192 .. code-block:: ipythoncon
193
193
194 In [1]: a = 'foo'
194 In [1]: a = 'foo'
195
195
196 In [2]: a
196 In [2]: a
197 Out[2]: 'foo'
197 Out[2]: 'foo'
198
198
199 In [3]: print a
199 In [3]: print a
200 foo
200 foo
201
201
202 In [4]: 1 / 0
202 In [4]: 1 / 0
203
203
204
204
205 Support is also provided for IPython exceptions:
205 Support is also provided for IPython exceptions:
206
206
207 .. code-block:: rst
207 .. code-block:: rst
208
208
209 .. code-block:: ipythoncon
209 .. code-block:: ipythoncon
210
210
211 In [1]: raise Exception
211 In [1]: raise Exception
212 ---------------------------------------------------------------------------
212 ---------------------------------------------------------------------------
213 Exception Traceback (most recent call last)
213 Exception Traceback (most recent call last)
214 <ipython-input-1-fca2ab0ca76b> in <module>()
214 <ipython-input-1-fca2ab0ca76b> in <module>()
215 ----> 1 raise Exception
215 ----> 1 raise Exception
216
216
217 Exception:
217 Exception:
218
218
219 """
219 """
220 name = 'IPython console session'
220 name = 'IPython console session'
221 aliases = ['ipythoncon']
221 aliases = ['ipythoncon']
222 mimetypes = ['text/x-ipython-console']
222 mimetypes = ['text/x-ipython-console']
223
223
224 # The regexps used to determine what is input and what is output.
224 # The regexps used to determine what is input and what is output.
225 # The default prompts for IPython are:
225 # The default prompts for IPython are:
226 #
226 #
227 # c.PromptManager.in_template = 'In [\#]: '
227 # c.PromptManager.in_template = 'In [\#]: '
228 # c.PromptManager.in2_template = ' .\D.: '
228 # c.PromptManager.in2_template = ' .\D.: '
229 # c.PromptManager.out_template = 'Out[\#]: '
229 # c.PromptManager.out_template = 'Out[\#]: '
230 #
230 #
231 in1_regex = r'In \[[0-9]+\]: '
231 in1_regex = r'In \[[0-9]+\]: '
232 in2_regex = r' \.\.+\.: '
232 in2_regex = r' \.\.+\.: '
233 out_regex = r'Out\[[0-9]+\]: '
233 out_regex = r'Out\[[0-9]+\]: '
234
234
235 #: The regex to determine when a traceback starts.
235 #: The regex to determine when a traceback starts.
236 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
236 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
237
237
238 def __init__(self, **options):
238 def __init__(self, **options):
239 """Initialize the IPython console lexer.
239 """Initialize the IPython console lexer.
240
240
241 Parameters
241 Parameters
242 ----------
242 ----------
243 python3 : bool
243 python3 : bool
244 If `True`, then the console inputs are parsed using a Python 3
244 If `True`, then the console inputs are parsed using a Python 3
245 lexer. Otherwise, they are parsed using a Python 2 lexer.
245 lexer. Otherwise, they are parsed using a Python 2 lexer.
246 in1_regex : RegexObject
246 in1_regex : RegexObject
247 The compiled regular expression used to detect the start
247 The compiled regular expression used to detect the start
248 of inputs. Although the IPython configuration setting may have a
248 of inputs. Although the IPython configuration setting may have a
249 trailing whitespace, do not include it in the regex. If `None`,
249 trailing whitespace, do not include it in the regex. If `None`,
250 then the default input prompt is assumed.
250 then the default input prompt is assumed.
251 in2_regex : RegexObject
251 in2_regex : RegexObject
252 The compiled regular expression used to detect the continuation
252 The compiled regular expression used to detect the continuation
253 of inputs. Although the IPython configuration setting may have a
253 of inputs. Although the IPython configuration setting may have a
254 trailing whitespace, do not include it in the regex. If `None`,
254 trailing whitespace, do not include it in the regex. If `None`,
255 then the default input prompt is assumed.
255 then the default input prompt is assumed.
256 out_regex : RegexObject
256 out_regex : RegexObject
257 The compiled regular expression used to detect outputs. If `None`,
257 The compiled regular expression used to detect outputs. If `None`,
258 then the default output prompt is assumed.
258 then the default output prompt is assumed.
259
259
260 """
260 """
261 self.python3 = get_bool_opt(options, 'python3', False)
261 self.python3 = get_bool_opt(options, 'python3', False)
262 if self.python3:
262 if self.python3:
263 self.aliases = ['ipythoncon3']
263 self.aliases = ['ipythoncon3']
264 else:
264 else:
265 self.aliases = ['ipythoncon2', 'ipythoncon']
265 self.aliases = ['ipythoncon2', 'ipythoncon']
266
266
267 in1_regex = options.get('in1_regex', self.in1_regex)
267 in1_regex = options.get('in1_regex', self.in1_regex)
268 in2_regex = options.get('in2_regex', self.in2_regex)
268 in2_regex = options.get('in2_regex', self.in2_regex)
269 out_regex = options.get('out_regex', self.out_regex)
269 out_regex = options.get('out_regex', self.out_regex)
270
270
271 # So that we can work with input and output prompts which have been
271 # So that we can work with input and output prompts which have been
272 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
272 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
273 # we do not do this, then such prompts will be tagged as 'output'.
273 # we do not do this, then such prompts will be tagged as 'output'.
274 # The reason can't just use the rstrip'd variants instead is because
274 # The reason can't just use the rstrip'd variants instead is because
275 # we want any whitespace associated with the prompt to be inserted
275 # we want any whitespace associated with the prompt to be inserted
276 # with the token. This allows formatted code to be modified so as hide
276 # with the token. This allows formatted code to be modified so as hide
277 # the appearance of prompts, with the whitespace included. One example
277 # the appearance of prompts, with the whitespace included. One example
278 # use of this is in copybutton.js from the standard lib Python docs.
278 # use of this is in copybutton.js from the standard lib Python docs.
279 in1_regex_rstrip = in1_regex.rstrip() + '\n'
279 in1_regex_rstrip = in1_regex.rstrip() + '\n'
280 in2_regex_rstrip = in2_regex.rstrip() + '\n'
280 in2_regex_rstrip = in2_regex.rstrip() + '\n'
281 out_regex_rstrip = out_regex.rstrip() + '\n'
281 out_regex_rstrip = out_regex.rstrip() + '\n'
282
282
283 # Compile and save them all.
283 # Compile and save them all.
284 attrs = ['in1_regex', 'in2_regex', 'out_regex',
284 attrs = ['in1_regex', 'in2_regex', 'out_regex',
285 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
285 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
286 for attr in attrs:
286 for attr in attrs:
287 self.__setattr__(attr, re.compile(locals()[attr]))
287 self.__setattr__(attr, re.compile(locals()[attr]))
288
288
289 Lexer.__init__(self, **options)
289 Lexer.__init__(self, **options)
290
290
291 if self.python3:
291 if self.python3:
292 pylexer = IPython3Lexer
292 pylexer = IPython3Lexer
293 tblexer = IPythonTracebackLexer
293 tblexer = IPythonTracebackLexer
294 else:
294 else:
295 pylexer = IPythonLexer
295 pylexer = IPythonLexer
296 tblexer = IPythonTracebackLexer
296 tblexer = IPythonTracebackLexer
297
297
298 self.pylexer = pylexer(**options)
298 self.pylexer = pylexer(**options)
299 self.tblexer = tblexer(**options)
299 self.tblexer = tblexer(**options)
300
300
301 self.reset()
301 self.reset()
302
302
303 def reset(self):
303 def reset(self):
304 self.mode = 'output'
304 self.mode = 'output'
305 self.index = 0
305 self.index = 0
306 self.buffer = u''
306 self.buffer = u''
307 self.insertions = []
307 self.insertions = []
308
308
309 def buffered_tokens(self):
309 def buffered_tokens(self):
310 """
310 """
311 Generator of unprocessed tokens after doing insertions and before
311 Generator of unprocessed tokens after doing insertions and before
312 changing to a new state.
312 changing to a new state.
313
313
314 """
314 """
315 if self.mode == 'output':
315 if self.mode == 'output':
316 tokens = [(0, Generic.Output, self.buffer)]
316 tokens = [(0, Generic.Output, self.buffer)]
317 elif self.mode == 'input':
317 elif self.mode == 'input':
318 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
318 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
319 else: # traceback
319 else: # traceback
320 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
320 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
321
321
322 for i, t, v in do_insertions(self.insertions, tokens):
322 for i, t, v in do_insertions(self.insertions, tokens):
323 # All token indexes are relative to the buffer.
323 # All token indexes are relative to the buffer.
324 yield self.index + i, t, v
324 yield self.index + i, t, v
325
325
326 # Clear it all
326 # Clear it all
327 self.index += len(self.buffer)
327 self.index += len(self.buffer)
328 self.buffer = u''
328 self.buffer = u''
329 self.insertions = []
329 self.insertions = []
330
330
331 def get_mci(self, line):
331 def get_mci(self, line):
332 """
332 """
333 Parses the line and returns a 3-tuple: (mode, code, insertion).
333 Parses the line and returns a 3-tuple: (mode, code, insertion).
334
334
335 `mode` is the next mode (or state) of the lexer, and is always equal
335 `mode` is the next mode (or state) of the lexer, and is always equal
336 to 'input', 'output', or 'tb'.
336 to 'input', 'output', or 'tb'.
337
337
338 `code` is a portion of the line that should be added to the buffer
338 `code` is a portion of the line that should be added to the buffer
339 corresponding to the next mode and eventually lexed by another lexer.
339 corresponding to the next mode and eventually lexed by another lexer.
340 For example, `code` could be Python code if `mode` were 'input'.
340 For example, `code` could be Python code if `mode` were 'input'.
341
341
342 `insertion` is a 3-tuple (index, token, text) representing an
342 `insertion` is a 3-tuple (index, token, text) representing an
343 unprocessed "token" that will be inserted into the stream of tokens
343 unprocessed "token" that will be inserted into the stream of tokens
344 that are created from the buffer once we change modes. This is usually
344 that are created from the buffer once we change modes. This is usually
345 the input or output prompt.
345 the input or output prompt.
346
346
347 In general, the next mode depends on current mode and on the contents
347 In general, the next mode depends on current mode and on the contents
348 of `line`.
348 of `line`.
349
349
350 """
350 """
351 # To reduce the number of regex match checks, we have multiple
351 # To reduce the number of regex match checks, we have multiple
352 # 'if' blocks instead of 'if-elif' blocks.
352 # 'if' blocks instead of 'if-elif' blocks.
353
353
354 ### Check for possible end of input
354 # Check for possible end of input
355 ###
356 in2_match = self.in2_regex.match(line)
355 in2_match = self.in2_regex.match(line)
357 in2_match_rstrip = self.in2_regex_rstrip.match(line)
356 in2_match_rstrip = self.in2_regex_rstrip.match(line)
358 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
357 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
359 in2_match_rstrip:
358 in2_match_rstrip:
360 end_input = True
359 end_input = True
361 else:
360 else:
362 end_input = False
361 end_input = False
363 if end_input and self.mode != 'tb':
362 if end_input and self.mode != 'tb':
364 # Only look for an end of input when not in tb mode.
363 # Only look for an end of input when not in tb mode.
365 # An ellipsis could appear within the traceback.
364 # An ellipsis could appear within the traceback.
366 mode = 'output'
365 mode = 'output'
367 code = u''
366 code = u''
368 insertion = (0, Generic.Prompt, line)
367 insertion = (0, Generic.Prompt, line)
369 return mode, code, insertion
368 return mode, code, insertion
370
369
371 ### Check for output prompt
370 # Check for output prompt
372 ###
373 out_match = self.out_regex.match(line)
371 out_match = self.out_regex.match(line)
374 out_match_rstrip = self.out_regex_rstrip.match(line)
372 out_match_rstrip = self.out_regex_rstrip.match(line)
375 if out_match or out_match_rstrip:
373 if out_match or out_match_rstrip:
376 mode = 'output'
374 mode = 'output'
377 if out_match:
375 if out_match:
378 idx = out_match.end()
376 idx = out_match.end()
379 else:
377 else:
380 idx = out_match_rstrip.end()
378 idx = out_match_rstrip.end()
381 code = line[idx:]
379 code = line[idx:]
382 # Use the 'heading' token for output. We cannot use Generic.Error
380 # Use the 'heading' token for output. We cannot use Generic.Error
383 # since it would conflict with exceptions.
381 # since it would conflict with exceptions.
384 insertion = (0, Generic.Heading, line[:idx])
382 insertion = (0, Generic.Heading, line[:idx])
385 return mode, code, insertion
383 return mode, code, insertion
386
384
387
385
388 ### Check for input or continuation prompt (non stripped version)
386 # Check for input or continuation prompt (non stripped version)
389 ###
390 in1_match = self.in1_regex.match(line)
387 in1_match = self.in1_regex.match(line)
391 if in1_match or (in2_match and self.mode != 'tb'):
388 if in1_match or (in2_match and self.mode != 'tb'):
392 # New input or when not in tb, continued input.
389 # New input or when not in tb, continued input.
393 # We do not check for continued input when in tb since it is
390 # We do not check for continued input when in tb since it is
394 # allowable to replace a long stack with an ellipsis.
391 # allowable to replace a long stack with an ellipsis.
395 mode = 'input'
392 mode = 'input'
396 if in1_match:
393 if in1_match:
397 idx = in1_match.end()
394 idx = in1_match.end()
398 else: # in2_match
395 else: # in2_match
399 idx = in2_match.end()
396 idx = in2_match.end()
400 code = line[idx:]
397 code = line[idx:]
401 insertion = (0, Generic.Prompt, line[:idx])
398 insertion = (0, Generic.Prompt, line[:idx])
402 return mode, code, insertion
399 return mode, code, insertion
403
400
404 ### Check for input or continuation prompt (stripped version)
401 # Check for input or continuation prompt (stripped version)
405 ###
406 in1_match_rstrip = self.in1_regex_rstrip.match(line)
402 in1_match_rstrip = self.in1_regex_rstrip.match(line)
407 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
403 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
408 # New input or when not in tb, continued input.
404 # New input or when not in tb, continued input.
409 # We do not check for continued input when in tb since it is
405 # We do not check for continued input when in tb since it is
410 # allowable to replace a long stack with an ellipsis.
406 # allowable to replace a long stack with an ellipsis.
411 mode = 'input'
407 mode = 'input'
412 if in1_match_rstrip:
408 if in1_match_rstrip:
413 idx = in1_match_rstrip.end()
409 idx = in1_match_rstrip.end()
414 else: # in2_match
410 else: # in2_match
415 idx = in2_match_rstrip.end()
411 idx = in2_match_rstrip.end()
416 code = line[idx:]
412 code = line[idx:]
417 insertion = (0, Generic.Prompt, line[:idx])
413 insertion = (0, Generic.Prompt, line[:idx])
418 return mode, code, insertion
414 return mode, code, insertion
419
415
420 ### Check for traceback
416 # Check for traceback
421 ###
422 if self.ipytb_start.match(line):
417 if self.ipytb_start.match(line):
423 mode = 'tb'
418 mode = 'tb'
424 code = line
419 code = line
425 insertion = None
420 insertion = None
426 return mode, code, insertion
421 return mode, code, insertion
427
422
428 ### All other stuff...
423 # All other stuff...
429 ###
430 if self.mode in ('input', 'output'):
424 if self.mode in ('input', 'output'):
431 # We assume all other text is output. Multiline input that
425 # We assume all other text is output. Multiline input that
432 # does not use the continuation marker cannot be detected.
426 # does not use the continuation marker cannot be detected.
433 # For example, the 3 in the following is clearly output:
427 # For example, the 3 in the following is clearly output:
434 #
428 #
435 # In [1]: print 3
429 # In [1]: print 3
436 # 3
430 # 3
437 #
431 #
438 # But the following second line is part of the input:
432 # But the following second line is part of the input:
439 #
433 #
440 # In [2]: while True:
434 # In [2]: while True:
441 # print True
435 # print True
442 #
436 #
443 # In both cases, the 2nd line will be 'output'.
437 # In both cases, the 2nd line will be 'output'.
444 #
438 #
445 mode = 'output'
439 mode = 'output'
446 else:
440 else:
447 mode = 'tb'
441 mode = 'tb'
448
442
449 code = line
443 code = line
450 insertion = None
444 insertion = None
451
445
452 return mode, code, insertion
446 return mode, code, insertion
453
447
454 def get_tokens_unprocessed(self, text):
448 def get_tokens_unprocessed(self, text):
455 self.reset()
449 self.reset()
456 for match in line_re.finditer(text):
450 for match in line_re.finditer(text):
457 line = match.group()
451 line = match.group()
458 mode, code, insertion = self.get_mci(line)
452 mode, code, insertion = self.get_mci(line)
459
453
460 if mode != self.mode:
454 if mode != self.mode:
461 # Yield buffered tokens before transitioning to new mode.
455 # Yield buffered tokens before transitioning to new mode.
462 for token in self.buffered_tokens():
456 for token in self.buffered_tokens():
463 yield token
457 yield token
464 self.mode = mode
458 self.mode = mode
465
459
466 if insertion:
460 if insertion:
467 self.insertions.append((len(self.buffer), [insertion]))
461 self.insertions.append((len(self.buffer), [insertion]))
468 self.buffer += code
462 self.buffer += code
469 else:
463 else:
470 for token in self.buffered_tokens():
464 for token in self.buffered_tokens():
471 yield token
465 yield token
472
466
473 class IPyLexer(Lexer):
467 class IPyLexer(Lexer):
474 """
468 """
475 Primary lexer for all IPython-like code.
469 Primary lexer for all IPython-like code.
476
470
477 This is a simple helper lexer. If the first line of the text begins with
471 This is a simple helper lexer. If the first line of the text begins with
478 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
472 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
479 lexer. If not, then the entire text is parsed with an IPython lexer.
473 lexer. If not, then the entire text is parsed with an IPython lexer.
480
474
481 The goal is to reduce the number of lexers that are registered
475 The goal is to reduce the number of lexers that are registered
482 with Pygments.
476 with Pygments.
483
477
484 """
478 """
485 name = 'IPy session'
479 name = 'IPy session'
486 aliases = ['ipy']
480 aliases = ['ipy']
487
481
488 def __init__(self, **options):
482 def __init__(self, **options):
489 self.python3 = get_bool_opt(options, 'python3', False)
483 self.python3 = get_bool_opt(options, 'python3', False)
490 if self.python3:
484 if self.python3:
491 self.aliases = ['ipy3']
485 self.aliases = ['ipy3']
492 else:
486 else:
493 self.aliases = ['ipy2', 'ipy']
487 self.aliases = ['ipy2', 'ipy']
494
488
495 Lexer.__init__(self, **options)
489 Lexer.__init__(self, **options)
496
490
497 self.IPythonLexer = IPythonLexer(**options)
491 self.IPythonLexer = IPythonLexer(**options)
498 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
492 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
499
493
500 def get_tokens_unprocessed(self, text):
494 def get_tokens_unprocessed(self, text):
501 if re.match(r'(In \[[0-9]+\]:)', text.strip()):
495 if re.match(r'(In \[[0-9]+\]:)', text.strip()):
502 lex = self.IPythonConsoleLexer
496 lex = self.IPythonConsoleLexer
503 else:
497 else:
504 lex = self.IPythonLexer
498 lex = self.IPythonLexer
505 for token in lex.get_tokens_unprocessed(text):
499 for token in lex.get_tokens_unprocessed(text):
506 yield token
500 yield token
507
501
General Comments 0
You need to be logged in to leave comments. Login now