##// END OF EJS Templates
Merge pull request #4891 from takluyver/docs-lexers-warnings...
Min RK -
r14819:837fa285 merge
parent child Browse files
Show More
@@ -1,501 +1,500 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 """
2 """
3 Defines a variety of Pygments lexers for highlighting IPython code.
3 Defines a variety of Pygments lexers for highlighting IPython code.
4
4
5 This includes:
5 This includes:
6
6
7 IPythonLexer
7 IPythonLexer, IPython3Lexer
8 IPython3Lexer
9 Lexers for pure IPython (python + magic/shell commands)
8 Lexers for pure IPython (python + magic/shell commands)
10
9
11 IPythonPartialTracebackLexer
10 IPythonPartialTracebackLexer, IPythonTracebackLexer
12 IPythonTracebackLexer
13 Supports 2.x and 3.x via keyword `python3`. The partial traceback
11 Supports 2.x and 3.x via keyword `python3`. The partial traceback
14 lexer reads everything but the Python code appearing in a traceback.
12 lexer reads everything but the Python code appearing in a traceback.
15 The full lexer combines the partial lexer with an IPython lexer.
13 The full lexer combines the partial lexer with an IPython lexer.
16
14
17 IPythonConsoleLexer
15 IPythonConsoleLexer
18 A lexer for IPython console sessions, with support for tracebacks.
16 A lexer for IPython console sessions, with support for tracebacks.
19
17
20 IPyLexer
18 IPyLexer
21 A friendly lexer which examines the first line of text and from it,
19 A friendly lexer which examines the first line of text and from it,
22 decides whether to use an IPython lexer or an IPython console lexer.
20 decides whether to use an IPython lexer or an IPython console lexer.
23 This is probably the only lexer that needs to be explicitly added
21 This is probably the only lexer that needs to be explicitly added
24 to Pygments.
22 to Pygments.
25
23
26 """
24 """
27 #-----------------------------------------------------------------------------
25 #-----------------------------------------------------------------------------
28 # Copyright (c) 2013, the IPython Development Team.
26 # Copyright (c) 2013, the IPython Development Team.
29 #
27 #
30 # Distributed under the terms of the Modified BSD License.
28 # Distributed under the terms of the Modified BSD License.
31 #
29 #
32 # The full license is in the file COPYING.txt, distributed with this software.
30 # The full license is in the file COPYING.txt, distributed with this software.
33 #-----------------------------------------------------------------------------
31 #-----------------------------------------------------------------------------
34
32
35 # Standard library
33 # Standard library
36 import re
34 import re
37
35
38 # Third party
36 # Third party
39 from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
37 from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
40 from pygments.lexer import (
38 from pygments.lexer import (
41 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
39 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
42 )
40 )
43 from pygments.token import (
41 from pygments.token import (
44 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
42 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
45 )
43 )
46 from pygments.util import get_bool_opt
44 from pygments.util import get_bool_opt
47
45
48 # Local
46 # Local
49 from IPython.testing.skipdoctest import skip_doctest
47 from IPython.testing.skipdoctest import skip_doctest
50
48
51 line_re = re.compile('.*?\n')
49 line_re = re.compile('.*?\n')
52
50
53 ipython_tokens = [
51 ipython_tokens = [
54 (r'(\%+)(\w+)\s+(\.*)(\n)', bygroups(Operator, Keyword,
52 (r'(\%+)(\w+)\s+(\.*)(\n)', bygroups(Operator, Keyword,
55 using(BashLexer), Text)),
53 using(BashLexer), Text)),
56 (r'(\%+)(\w+)\b', bygroups(Operator, Keyword)),
54 (r'(\%+)(\w+)\b', bygroups(Operator, Keyword)),
57 (r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
55 (r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
58 ]
56 ]
59
57
60 def build_ipy_lexer(python3):
58 def build_ipy_lexer(python3):
61 """Builds IPython lexers depending on the value of `python3`.
59 """Builds IPython lexers depending on the value of `python3`.
62
60
63 The lexer inherits from an appropriate Python lexer and then adds
61 The lexer inherits from an appropriate Python lexer and then adds
64 information about IPython specific keywords (i.e. magic commands,
62 information about IPython specific keywords (i.e. magic commands,
65 shell commands, etc.)
63 shell commands, etc.)
66
64
67 Parameters
65 Parameters
68 ----------
66 ----------
69 python3 : bool
67 python3 : bool
70 If `True`, then build an IPython lexer from a Python 3 lexer.
68 If `True`, then build an IPython lexer from a Python 3 lexer.
71
69
72 """
70 """
73 # It would be nice to have a single IPython lexer class which takes
71 # It would be nice to have a single IPython lexer class which takes
74 # a boolean `python3`. But since there are two Python lexer classes,
72 # a boolean `python3`. But since there are two Python lexer classes,
75 # we will also have two IPython lexer classes.
73 # we will also have two IPython lexer classes.
76 if python3:
74 if python3:
77 PyLexer = Python3Lexer
75 PyLexer = Python3Lexer
78 clsname = 'IPython3Lexer'
76 clsname = 'IPython3Lexer'
79 name = 'IPython3'
77 name = 'IPython3'
80 aliases = ['ipython3']
78 aliases = ['ipython3']
81 doc = """IPython3 Lexer"""
79 doc = """IPython3 Lexer"""
82 else:
80 else:
83 PyLexer = PythonLexer
81 PyLexer = PythonLexer
84 clsname = 'IPythonLexer'
82 clsname = 'IPythonLexer'
85 name = 'IPython'
83 name = 'IPython'
86 aliases = ['ipython2', 'ipython']
84 aliases = ['ipython2', 'ipython']
87 doc = """IPython Lexer"""
85 doc = """IPython Lexer"""
88
86
89 tokens = PyLexer.tokens.copy()
87 tokens = PyLexer.tokens.copy()
90 tokens['root'] = ipython_tokens + tokens['root']
88 tokens['root'] = ipython_tokens + tokens['root']
91
89
92 attrs = {'name': name, 'aliases': aliases,
90 attrs = {'name': name, 'aliases': aliases,
93 '__doc__': doc, 'tokens': tokens}
91 '__doc__': doc, 'tokens': tokens}
94
92
95 return type(name, (PyLexer,), attrs)
93 return type(name, (PyLexer,), attrs)
96
94
97
95
98 IPython3Lexer = build_ipy_lexer(python3=True)
96 IPython3Lexer = build_ipy_lexer(python3=True)
99 IPythonLexer = build_ipy_lexer(python3=False)
97 IPythonLexer = build_ipy_lexer(python3=False)
100
98
101
99
102 class IPythonPartialTracebackLexer(RegexLexer):
100 class IPythonPartialTracebackLexer(RegexLexer):
103 """
101 """
104 Partial lexer for IPython tracebacks.
102 Partial lexer for IPython tracebacks.
105
103
106 Handles all the non-python output. This works for both Python 2.x and 3.x.
104 Handles all the non-python output. This works for both Python 2.x and 3.x.
107
105
108 """
106 """
109 name = 'IPython Partial Traceback'
107 name = 'IPython Partial Traceback'
110
108
111 tokens = {
109 tokens = {
112 'root': [
110 'root': [
113 # Tracebacks for syntax errors have a different style.
111 # Tracebacks for syntax errors have a different style.
114 # For both types of tracebacks, we mark the first line with
112 # For both types of tracebacks, we mark the first line with
115 # Generic.Traceback. For syntax errors, we mark the filename
113 # Generic.Traceback. For syntax errors, we mark the filename
116 # as we mark the filenames for non-syntax tracebacks.
114 # as we mark the filenames for non-syntax tracebacks.
117 #
115 #
118 # These two regexps define how IPythonConsoleLexer finds a
116 # These two regexps define how IPythonConsoleLexer finds a
119 # traceback.
117 # traceback.
120 #
118 #
121 ## Non-syntax traceback
119 ## Non-syntax traceback
122 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
120 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
123 ## Syntax traceback
121 ## Syntax traceback
124 (r'^( File)(.*)(, line )(\d+\n)',
122 (r'^( File)(.*)(, line )(\d+\n)',
125 bygroups(Generic.Traceback, Name.Namespace,
123 bygroups(Generic.Traceback, Name.Namespace,
126 Generic.Traceback, Literal.Number.Integer)),
124 Generic.Traceback, Literal.Number.Integer)),
127
125
128 # (Exception Identifier)(Whitespace)(Traceback Message)
126 # (Exception Identifier)(Whitespace)(Traceback Message)
129 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
127 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
130 bygroups(Name.Exception, Generic.Whitespace, Text)),
128 bygroups(Name.Exception, Generic.Whitespace, Text)),
131 # (Module/Filename)(Text)(Callee)(Function Signature)
129 # (Module/Filename)(Text)(Callee)(Function Signature)
132 # Better options for callee and function signature?
130 # Better options for callee and function signature?
133 (r'(.*)( in )(.*)(\(.*\)\n)',
131 (r'(.*)( in )(.*)(\(.*\)\n)',
134 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
132 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
135 # Regular line: (Whitespace)(Line Number)(Python Code)
133 # Regular line: (Whitespace)(Line Number)(Python Code)
136 (r'(\s*?)(\d+)(.*?\n)',
134 (r'(\s*?)(\d+)(.*?\n)',
137 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
135 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
138 # Emphasized line: (Arrow)(Line Number)(Python Code)
136 # Emphasized line: (Arrow)(Line Number)(Python Code)
139 # Using Exception token so arrow color matches the Exception.
137 # Using Exception token so arrow color matches the Exception.
140 (r'(-*>?\s?)(\d+)(.*?\n)',
138 (r'(-*>?\s?)(\d+)(.*?\n)',
141 bygroups(Name.Exception, Literal.Number.Integer, Other)),
139 bygroups(Name.Exception, Literal.Number.Integer, Other)),
142 # (Exception Identifier)(Message)
140 # (Exception Identifier)(Message)
143 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
141 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
144 bygroups(Name.Exception, Text)),
142 bygroups(Name.Exception, Text)),
145 # Tag everything else as Other, will be handled later.
143 # Tag everything else as Other, will be handled later.
146 (r'.*\n', Other),
144 (r'.*\n', Other),
147 ],
145 ],
148 }
146 }
149
147
150
148
151 class IPythonTracebackLexer(DelegatingLexer):
149 class IPythonTracebackLexer(DelegatingLexer):
152 """
150 """
153 IPython traceback lexer.
151 IPython traceback lexer.
154
152
155 For doctests, the tracebacks can be snipped as much as desired with the
153 For doctests, the tracebacks can be snipped as much as desired with the
156 exception to the lines that designate a traceback. For non-syntax error
154 exception to the lines that designate a traceback. For non-syntax error
157 tracebacks, this is the line of hyphens. For syntax error tracebacks,
155 tracebacks, this is the line of hyphens. For syntax error tracebacks,
158 this is the line which lists the File and line number.
156 this is the line which lists the File and line number.
159
157
160 """
158 """
161 # The lexer inherits from DelegatingLexer. The "root" lexer is an
159 # The lexer inherits from DelegatingLexer. The "root" lexer is an
162 # appropriate IPython lexer, which depends on the value of the boolean
160 # appropriate IPython lexer, which depends on the value of the boolean
163 # `python3`. First, we parse with the partial IPython traceback lexer.
161 # `python3`. First, we parse with the partial IPython traceback lexer.
164 # Then, any code marked with the "Other" token is delegated to the root
162 # Then, any code marked with the "Other" token is delegated to the root
165 # lexer.
163 # lexer.
166 #
164 #
167 name = 'IPython Traceback'
165 name = 'IPython Traceback'
168 aliases = ['ipythontb']
166 aliases = ['ipythontb']
169
167
170 def __init__(self, **options):
168 def __init__(self, **options):
171 self.python3 = get_bool_opt(options, 'python3', False)
169 self.python3 = get_bool_opt(options, 'python3', False)
172 if self.python3:
170 if self.python3:
173 self.aliases = ['ipython3tb']
171 self.aliases = ['ipython3tb']
174 else:
172 else:
175 self.aliases = ['ipython2tb', 'ipythontb']
173 self.aliases = ['ipython2tb', 'ipythontb']
176
174
177 if self.python3:
175 if self.python3:
178 IPyLexer = IPython3Lexer
176 IPyLexer = IPython3Lexer
179 else:
177 else:
180 IPyLexer = IPythonLexer
178 IPyLexer = IPythonLexer
181
179
182 DelegatingLexer.__init__(self, IPyLexer,
180 DelegatingLexer.__init__(self, IPyLexer,
183 IPythonPartialTracebackLexer, **options)
181 IPythonPartialTracebackLexer, **options)
184
182
185 @skip_doctest
183 @skip_doctest
186 class IPythonConsoleLexer(Lexer):
184 class IPythonConsoleLexer(Lexer):
187 """
185 """
188 An IPython console lexer for IPython code-blocks and doctests, such as:
186 An IPython console lexer for IPython code-blocks and doctests, such as:
189
187
190 .. code-block:: rst
188 .. code-block:: rst
191
189
192 .. code-block:: ipythonconsole
190 .. code-block:: ipythonconsole
193
191
194 In [1]: a = 'foo'
192 In [1]: a = 'foo'
195
193
196 In [2]: a
194 In [2]: a
197 Out[2]: 'foo'
195 Out[2]: 'foo'
198
196
199 In [3]: print a
197 In [3]: print a
200 foo
198 foo
201
199
202 In [4]: 1 / 0
200 In [4]: 1 / 0
203
201
204
202
205 Support is also provided for IPython exceptions:
203 Support is also provided for IPython exceptions:
206
204
207 .. code-block:: rst
205 .. code-block:: rst
208
206
209 .. code-block:: ipythonconsole
207 .. code-block:: ipythonconsole
210
208
211 In [1]: raise Exception
209 In [1]: raise Exception
210
212 ---------------------------------------------------------------------------
211 ---------------------------------------------------------------------------
213 Exception Traceback (most recent call last)
212 Exception Traceback (most recent call last)
214 <ipython-input-1-fca2ab0ca76b> in <module>()
213 <ipython-input-1-fca2ab0ca76b> in <module>()
215 ----> 1 raise Exception
214 ----> 1 raise Exception
216
215
217 Exception:
216 Exception:
218
217
219 """
218 """
220 name = 'IPython console session'
219 name = 'IPython console session'
221 aliases = ['ipythonconsole']
220 aliases = ['ipythonconsole']
222 mimetypes = ['text/x-ipython-console']
221 mimetypes = ['text/x-ipython-console']
223
222
224 # The regexps used to determine what is input and what is output.
223 # The regexps used to determine what is input and what is output.
225 # The default prompts for IPython are:
224 # The default prompts for IPython are:
226 #
225 #
227 # c.PromptManager.in_template = 'In [\#]: '
226 # c.PromptManager.in_template = 'In [\#]: '
228 # c.PromptManager.in2_template = ' .\D.: '
227 # c.PromptManager.in2_template = ' .\D.: '
229 # c.PromptManager.out_template = 'Out[\#]: '
228 # c.PromptManager.out_template = 'Out[\#]: '
230 #
229 #
231 in1_regex = r'In \[[0-9]+\]: '
230 in1_regex = r'In \[[0-9]+\]: '
232 in2_regex = r' \.\.+\.: '
231 in2_regex = r' \.\.+\.: '
233 out_regex = r'Out\[[0-9]+\]: '
232 out_regex = r'Out\[[0-9]+\]: '
234
233
235 #: The regex to determine when a traceback starts.
234 #: The regex to determine when a traceback starts.
236 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
235 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
237
236
238 def __init__(self, **options):
237 def __init__(self, **options):
239 """Initialize the IPython console lexer.
238 """Initialize the IPython console lexer.
240
239
241 Parameters
240 Parameters
242 ----------
241 ----------
243 python3 : bool
242 python3 : bool
244 If `True`, then the console inputs are parsed using a Python 3
243 If `True`, then the console inputs are parsed using a Python 3
245 lexer. Otherwise, they are parsed using a Python 2 lexer.
244 lexer. Otherwise, they are parsed using a Python 2 lexer.
246 in1_regex : RegexObject
245 in1_regex : RegexObject
247 The compiled regular expression used to detect the start
246 The compiled regular expression used to detect the start
248 of inputs. Although the IPython configuration setting may have a
247 of inputs. Although the IPython configuration setting may have a
249 trailing whitespace, do not include it in the regex. If `None`,
248 trailing whitespace, do not include it in the regex. If `None`,
250 then the default input prompt is assumed.
249 then the default input prompt is assumed.
251 in2_regex : RegexObject
250 in2_regex : RegexObject
252 The compiled regular expression used to detect the continuation
251 The compiled regular expression used to detect the continuation
253 of inputs. Although the IPython configuration setting may have a
252 of inputs. Although the IPython configuration setting may have a
254 trailing whitespace, do not include it in the regex. If `None`,
253 trailing whitespace, do not include it in the regex. If `None`,
255 then the default input prompt is assumed.
254 then the default input prompt is assumed.
256 out_regex : RegexObject
255 out_regex : RegexObject
257 The compiled regular expression used to detect outputs. If `None`,
256 The compiled regular expression used to detect outputs. If `None`,
258 then the default output prompt is assumed.
257 then the default output prompt is assumed.
259
258
260 """
259 """
261 self.python3 = get_bool_opt(options, 'python3', False)
260 self.python3 = get_bool_opt(options, 'python3', False)
262 if self.python3:
261 if self.python3:
263 self.aliases = ['ipython3console']
262 self.aliases = ['ipython3console']
264 else:
263 else:
265 self.aliases = ['ipython2console', 'ipythonconsole']
264 self.aliases = ['ipython2console', 'ipythonconsole']
266
265
267 in1_regex = options.get('in1_regex', self.in1_regex)
266 in1_regex = options.get('in1_regex', self.in1_regex)
268 in2_regex = options.get('in2_regex', self.in2_regex)
267 in2_regex = options.get('in2_regex', self.in2_regex)
269 out_regex = options.get('out_regex', self.out_regex)
268 out_regex = options.get('out_regex', self.out_regex)
270
269
271 # So that we can work with input and output prompts which have been
270 # So that we can work with input and output prompts which have been
272 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
271 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
273 # we do not do this, then such prompts will be tagged as 'output'.
272 # we do not do this, then such prompts will be tagged as 'output'.
274 # The reason can't just use the rstrip'd variants instead is because
273 # The reason can't just use the rstrip'd variants instead is because
275 # we want any whitespace associated with the prompt to be inserted
274 # we want any whitespace associated with the prompt to be inserted
276 # with the token. This allows formatted code to be modified so as hide
275 # with the token. This allows formatted code to be modified so as hide
277 # the appearance of prompts, with the whitespace included. One example
276 # the appearance of prompts, with the whitespace included. One example
278 # use of this is in copybutton.js from the standard lib Python docs.
277 # use of this is in copybutton.js from the standard lib Python docs.
279 in1_regex_rstrip = in1_regex.rstrip() + '\n'
278 in1_regex_rstrip = in1_regex.rstrip() + '\n'
280 in2_regex_rstrip = in2_regex.rstrip() + '\n'
279 in2_regex_rstrip = in2_regex.rstrip() + '\n'
281 out_regex_rstrip = out_regex.rstrip() + '\n'
280 out_regex_rstrip = out_regex.rstrip() + '\n'
282
281
283 # Compile and save them all.
282 # Compile and save them all.
284 attrs = ['in1_regex', 'in2_regex', 'out_regex',
283 attrs = ['in1_regex', 'in2_regex', 'out_regex',
285 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
284 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
286 for attr in attrs:
285 for attr in attrs:
287 self.__setattr__(attr, re.compile(locals()[attr]))
286 self.__setattr__(attr, re.compile(locals()[attr]))
288
287
289 Lexer.__init__(self, **options)
288 Lexer.__init__(self, **options)
290
289
291 if self.python3:
290 if self.python3:
292 pylexer = IPython3Lexer
291 pylexer = IPython3Lexer
293 tblexer = IPythonTracebackLexer
292 tblexer = IPythonTracebackLexer
294 else:
293 else:
295 pylexer = IPythonLexer
294 pylexer = IPythonLexer
296 tblexer = IPythonTracebackLexer
295 tblexer = IPythonTracebackLexer
297
296
298 self.pylexer = pylexer(**options)
297 self.pylexer = pylexer(**options)
299 self.tblexer = tblexer(**options)
298 self.tblexer = tblexer(**options)
300
299
301 self.reset()
300 self.reset()
302
301
303 def reset(self):
302 def reset(self):
304 self.mode = 'output'
303 self.mode = 'output'
305 self.index = 0
304 self.index = 0
306 self.buffer = u''
305 self.buffer = u''
307 self.insertions = []
306 self.insertions = []
308
307
309 def buffered_tokens(self):
308 def buffered_tokens(self):
310 """
309 """
311 Generator of unprocessed tokens after doing insertions and before
310 Generator of unprocessed tokens after doing insertions and before
312 changing to a new state.
311 changing to a new state.
313
312
314 """
313 """
315 if self.mode == 'output':
314 if self.mode == 'output':
316 tokens = [(0, Generic.Output, self.buffer)]
315 tokens = [(0, Generic.Output, self.buffer)]
317 elif self.mode == 'input':
316 elif self.mode == 'input':
318 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
317 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
319 else: # traceback
318 else: # traceback
320 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
319 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
321
320
322 for i, t, v in do_insertions(self.insertions, tokens):
321 for i, t, v in do_insertions(self.insertions, tokens):
323 # All token indexes are relative to the buffer.
322 # All token indexes are relative to the buffer.
324 yield self.index + i, t, v
323 yield self.index + i, t, v
325
324
326 # Clear it all
325 # Clear it all
327 self.index += len(self.buffer)
326 self.index += len(self.buffer)
328 self.buffer = u''
327 self.buffer = u''
329 self.insertions = []
328 self.insertions = []
330
329
331 def get_mci(self, line):
330 def get_mci(self, line):
332 """
331 """
333 Parses the line and returns a 3-tuple: (mode, code, insertion).
332 Parses the line and returns a 3-tuple: (mode, code, insertion).
334
333
335 `mode` is the next mode (or state) of the lexer, and is always equal
334 `mode` is the next mode (or state) of the lexer, and is always equal
336 to 'input', 'output', or 'tb'.
335 to 'input', 'output', or 'tb'.
337
336
338 `code` is a portion of the line that should be added to the buffer
337 `code` is a portion of the line that should be added to the buffer
339 corresponding to the next mode and eventually lexed by another lexer.
338 corresponding to the next mode and eventually lexed by another lexer.
340 For example, `code` could be Python code if `mode` were 'input'.
339 For example, `code` could be Python code if `mode` were 'input'.
341
340
342 `insertion` is a 3-tuple (index, token, text) representing an
341 `insertion` is a 3-tuple (index, token, text) representing an
343 unprocessed "token" that will be inserted into the stream of tokens
342 unprocessed "token" that will be inserted into the stream of tokens
344 that are created from the buffer once we change modes. This is usually
343 that are created from the buffer once we change modes. This is usually
345 the input or output prompt.
344 the input or output prompt.
346
345
347 In general, the next mode depends on current mode and on the contents
346 In general, the next mode depends on current mode and on the contents
348 of `line`.
347 of `line`.
349
348
350 """
349 """
351 # To reduce the number of regex match checks, we have multiple
350 # To reduce the number of regex match checks, we have multiple
352 # 'if' blocks instead of 'if-elif' blocks.
351 # 'if' blocks instead of 'if-elif' blocks.
353
352
354 # Check for possible end of input
353 # Check for possible end of input
355 in2_match = self.in2_regex.match(line)
354 in2_match = self.in2_regex.match(line)
356 in2_match_rstrip = self.in2_regex_rstrip.match(line)
355 in2_match_rstrip = self.in2_regex_rstrip.match(line)
357 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
356 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
358 in2_match_rstrip:
357 in2_match_rstrip:
359 end_input = True
358 end_input = True
360 else:
359 else:
361 end_input = False
360 end_input = False
362 if end_input and self.mode != 'tb':
361 if end_input and self.mode != 'tb':
363 # Only look for an end of input when not in tb mode.
362 # Only look for an end of input when not in tb mode.
364 # An ellipsis could appear within the traceback.
363 # An ellipsis could appear within the traceback.
365 mode = 'output'
364 mode = 'output'
366 code = u''
365 code = u''
367 insertion = (0, Generic.Prompt, line)
366 insertion = (0, Generic.Prompt, line)
368 return mode, code, insertion
367 return mode, code, insertion
369
368
370 # Check for output prompt
369 # Check for output prompt
371 out_match = self.out_regex.match(line)
370 out_match = self.out_regex.match(line)
372 out_match_rstrip = self.out_regex_rstrip.match(line)
371 out_match_rstrip = self.out_regex_rstrip.match(line)
373 if out_match or out_match_rstrip:
372 if out_match or out_match_rstrip:
374 mode = 'output'
373 mode = 'output'
375 if out_match:
374 if out_match:
376 idx = out_match.end()
375 idx = out_match.end()
377 else:
376 else:
378 idx = out_match_rstrip.end()
377 idx = out_match_rstrip.end()
379 code = line[idx:]
378 code = line[idx:]
380 # Use the 'heading' token for output. We cannot use Generic.Error
379 # Use the 'heading' token for output. We cannot use Generic.Error
381 # since it would conflict with exceptions.
380 # since it would conflict with exceptions.
382 insertion = (0, Generic.Heading, line[:idx])
381 insertion = (0, Generic.Heading, line[:idx])
383 return mode, code, insertion
382 return mode, code, insertion
384
383
385
384
386 # Check for input or continuation prompt (non stripped version)
385 # Check for input or continuation prompt (non stripped version)
387 in1_match = self.in1_regex.match(line)
386 in1_match = self.in1_regex.match(line)
388 if in1_match or (in2_match and self.mode != 'tb'):
387 if in1_match or (in2_match and self.mode != 'tb'):
389 # New input or when not in tb, continued input.
388 # New input or when not in tb, continued input.
390 # We do not check for continued input when in tb since it is
389 # We do not check for continued input when in tb since it is
391 # allowable to replace a long stack with an ellipsis.
390 # allowable to replace a long stack with an ellipsis.
392 mode = 'input'
391 mode = 'input'
393 if in1_match:
392 if in1_match:
394 idx = in1_match.end()
393 idx = in1_match.end()
395 else: # in2_match
394 else: # in2_match
396 idx = in2_match.end()
395 idx = in2_match.end()
397 code = line[idx:]
396 code = line[idx:]
398 insertion = (0, Generic.Prompt, line[:idx])
397 insertion = (0, Generic.Prompt, line[:idx])
399 return mode, code, insertion
398 return mode, code, insertion
400
399
401 # Check for input or continuation prompt (stripped version)
400 # Check for input or continuation prompt (stripped version)
402 in1_match_rstrip = self.in1_regex_rstrip.match(line)
401 in1_match_rstrip = self.in1_regex_rstrip.match(line)
403 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
402 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
404 # New input or when not in tb, continued input.
403 # New input or when not in tb, continued input.
405 # We do not check for continued input when in tb since it is
404 # We do not check for continued input when in tb since it is
406 # allowable to replace a long stack with an ellipsis.
405 # allowable to replace a long stack with an ellipsis.
407 mode = 'input'
406 mode = 'input'
408 if in1_match_rstrip:
407 if in1_match_rstrip:
409 idx = in1_match_rstrip.end()
408 idx = in1_match_rstrip.end()
410 else: # in2_match
409 else: # in2_match
411 idx = in2_match_rstrip.end()
410 idx = in2_match_rstrip.end()
412 code = line[idx:]
411 code = line[idx:]
413 insertion = (0, Generic.Prompt, line[:idx])
412 insertion = (0, Generic.Prompt, line[:idx])
414 return mode, code, insertion
413 return mode, code, insertion
415
414
416 # Check for traceback
415 # Check for traceback
417 if self.ipytb_start.match(line):
416 if self.ipytb_start.match(line):
418 mode = 'tb'
417 mode = 'tb'
419 code = line
418 code = line
420 insertion = None
419 insertion = None
421 return mode, code, insertion
420 return mode, code, insertion
422
421
423 # All other stuff...
422 # All other stuff...
424 if self.mode in ('input', 'output'):
423 if self.mode in ('input', 'output'):
425 # We assume all other text is output. Multiline input that
424 # We assume all other text is output. Multiline input that
426 # does not use the continuation marker cannot be detected.
425 # does not use the continuation marker cannot be detected.
427 # For example, the 3 in the following is clearly output:
426 # For example, the 3 in the following is clearly output:
428 #
427 #
429 # In [1]: print 3
428 # In [1]: print 3
430 # 3
429 # 3
431 #
430 #
432 # But the following second line is part of the input:
431 # But the following second line is part of the input:
433 #
432 #
434 # In [2]: while True:
433 # In [2]: while True:
435 # print True
434 # print True
436 #
435 #
437 # In both cases, the 2nd line will be 'output'.
436 # In both cases, the 2nd line will be 'output'.
438 #
437 #
439 mode = 'output'
438 mode = 'output'
440 else:
439 else:
441 mode = 'tb'
440 mode = 'tb'
442
441
443 code = line
442 code = line
444 insertion = None
443 insertion = None
445
444
446 return mode, code, insertion
445 return mode, code, insertion
447
446
448 def get_tokens_unprocessed(self, text):
447 def get_tokens_unprocessed(self, text):
449 self.reset()
448 self.reset()
450 for match in line_re.finditer(text):
449 for match in line_re.finditer(text):
451 line = match.group()
450 line = match.group()
452 mode, code, insertion = self.get_mci(line)
451 mode, code, insertion = self.get_mci(line)
453
452
454 if mode != self.mode:
453 if mode != self.mode:
455 # Yield buffered tokens before transitioning to new mode.
454 # Yield buffered tokens before transitioning to new mode.
456 for token in self.buffered_tokens():
455 for token in self.buffered_tokens():
457 yield token
456 yield token
458 self.mode = mode
457 self.mode = mode
459
458
460 if insertion:
459 if insertion:
461 self.insertions.append((len(self.buffer), [insertion]))
460 self.insertions.append((len(self.buffer), [insertion]))
462 self.buffer += code
461 self.buffer += code
463 else:
462 else:
464 for token in self.buffered_tokens():
463 for token in self.buffered_tokens():
465 yield token
464 yield token
466
465
467 class IPyLexer(Lexer):
466 class IPyLexer(Lexer):
468 """
467 """
469 Primary lexer for all IPython-like code.
468 Primary lexer for all IPython-like code.
470
469
471 This is a simple helper lexer. If the first line of the text begins with
470 This is a simple helper lexer. If the first line of the text begins with
472 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
471 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
473 lexer. If not, then the entire text is parsed with an IPython lexer.
472 lexer. If not, then the entire text is parsed with an IPython lexer.
474
473
475 The goal is to reduce the number of lexers that are registered
474 The goal is to reduce the number of lexers that are registered
476 with Pygments.
475 with Pygments.
477
476
478 """
477 """
479 name = 'IPy session'
478 name = 'IPy session'
480 aliases = ['ipy']
479 aliases = ['ipy']
481
480
482 def __init__(self, **options):
481 def __init__(self, **options):
483 self.python3 = get_bool_opt(options, 'python3', False)
482 self.python3 = get_bool_opt(options, 'python3', False)
484 if self.python3:
483 if self.python3:
485 self.aliases = ['ipy3']
484 self.aliases = ['ipy3']
486 else:
485 else:
487 self.aliases = ['ipy2', 'ipy']
486 self.aliases = ['ipy2', 'ipy']
488
487
489 Lexer.__init__(self, **options)
488 Lexer.__init__(self, **options)
490
489
491 self.IPythonLexer = IPythonLexer(**options)
490 self.IPythonLexer = IPythonLexer(**options)
492 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
491 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
493
492
494 def get_tokens_unprocessed(self, text):
493 def get_tokens_unprocessed(self, text):
495 if re.match(r'(In \[[0-9]+\]:)', text.strip()):
494 if re.match(r'(In \[[0-9]+\]:)', text.strip()):
496 lex = self.IPythonConsoleLexer
495 lex = self.IPythonConsoleLexer
497 else:
496 else:
498 lex = self.IPythonLexer
497 lex = self.IPythonLexer
499 for token in lex.get_tokens_unprocessed(text):
498 for token in lex.get_tokens_unprocessed(text):
500 yield token
499 yield token
501
500
General Comments 0
You need to be logged in to leave comments. Login now