##// END OF EJS Templates
More updates to comments and docstrings.
chebee7i -
Show More
@@ -1,483 +1,494 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 """
2 """
3 Defines a variety of Pygments lexers for highlighting IPython code.
3 Defines a variety of Pygments lexers for highlighting IPython code.
4
4
5 This includes:
5 This includes:
6
6
7 IPythonLexer
7 IPythonLexer
8 IPython3Lexer
8 IPython3Lexer
9 Lexers for pure IPython (python + magic/shell commands)
9 Lexers for pure IPython (python + magic/shell commands)
10
10
11 IPythonPartialTracebackLexer
11 IPythonPartialTracebackLexer
12 IPythonTracebackLexer
12 IPythonTracebackLexer
13 Supports 2.x and 3.x via keyword `python3`. The partial traceback
13 Supports 2.x and 3.x via keyword `python3`. The partial traceback
14 lexer reads everything but the Python code appearing in a traceback.
14 lexer reads everything but the Python code appearing in a traceback.
15 The full lexer combines the partial lexer with an IPython lexer.
15 The full lexer combines the partial lexer with an IPython lexer.
16
16
17 IPythonConsoleLexer
17 IPythonConsoleLexer
18 A lexer for IPython console sessions, with support for tracebacks.
18 A lexer for IPython console sessions, with support for tracebacks.
19
19
20 IPyLexer
20 IPyLexer
21 A friendly lexer which examines the first line of text and from it,
21 A friendly lexer which examines the first line of text and from it,
22 decides whether to use an IPython lexer or an IPython console lexer.
22 decides whether to use an IPython lexer or an IPython console lexer.
23 This is probably the only lexer that needs to be explicitly added
23 This is probably the only lexer that needs to be explicitly added
24 to Pygments.
24 to Pygments.
25
25
26 """
26 """
27 #-----------------------------------------------------------------------------
27 #-----------------------------------------------------------------------------
28 # Copyright (c) 2013, the IPython Development Team.
28 # Copyright (c) 2013, the IPython Development Team.
29 #
29 #
30 # Distributed under the terms of the Modified BSD License.
30 # Distributed under the terms of the Modified BSD License.
31 #
31 #
32 # The full license is in the file COPYING.txt, distributed with this software.
32 # The full license is in the file COPYING.txt, distributed with this software.
33 #-----------------------------------------------------------------------------
33 #-----------------------------------------------------------------------------
34
34
35 # Standard library
35 # Standard library
36 import re
36 import re
37
37
38 # Third party
38 # Third party
39 from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
39 from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
40 from pygments.lexer import (
40 from pygments.lexer import (
41 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
41 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
42 )
42 )
43 from pygments.token import (
43 from pygments.token import (
44 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
44 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
45 )
45 )
46 from pygments.util import get_bool_opt
46 from pygments.util import get_bool_opt
47
47
48 # Local
48 # Local
49 from IPython.testing.skipdoctest import skip_doctest
49 from IPython.testing.skipdoctest import skip_doctest
50
50
51 line_re = re.compile('.*?\n')
51 line_re = re.compile('.*?\n')
52
52
53 ipython_tokens = [
53 ipython_tokens = [
54 (r'(\%+)(\w+)\s+(\.*)(\n)', bygroups(Operator, Keyword,
54 (r'(\%+)(\w+)\s+(\.*)(\n)', bygroups(Operator, Keyword,
55 using(BashLexer), Text)),
55 using(BashLexer), Text)),
56 (r'(\%+)(\w+)\b', bygroups(Operator, Keyword)),
56 (r'(\%+)(\w+)\b', bygroups(Operator, Keyword)),
57 (r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
57 (r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
58 ]
58 ]
59
59
60 def build_ipy_lexer(python3):
60 def build_ipy_lexer(python3):
61 """Builds IPython lexers depending on the value of `python3`.
61 """Builds IPython lexers depending on the value of `python3`.
62
62
63 The lexer inherits from an appropriate Python lexer and then adds
63 The lexer inherits from an appropriate Python lexer and then adds
64 information about IPython specific keywords (i.e. magic commands,
64 information about IPython specific keywords (i.e. magic commands,
65 shell commands, etc.)
65 shell commands, etc.)
66
66
67 Parameters
67 Parameters
68 ----------
68 ----------
69 python3 : bool
69 python3 : bool
70 If `True`, then build an IPython lexer from a Python 3 lexer.
70 If `True`, then build an IPython lexer from a Python 3 lexer.
71
71
72 """
72 """
73 # It would be nice to have a single IPython lexer class which takes
73 # It would be nice to have a single IPython lexer class which takes
74 # a boolean `python3`. But since there are two Python lexer classes,
74 # a boolean `python3`. But since there are two Python lexer classes,
75 # we will also have two IPython lexer classes.
75 # we will also have two IPython lexer classes.
76 if python3:
76 if python3:
77 PyLexer = Python3Lexer
77 PyLexer = Python3Lexer
78 clsname = 'IPython3Lexer'
78 clsname = 'IPython3Lexer'
79 name = 'IPython3'
79 name = 'IPython3'
80 aliases = ['ipython3']
80 aliases = ['ipython3']
81 doc = """IPython3 Lexer"""
81 doc = """IPython3 Lexer"""
82 else:
82 else:
83 PyLexer = PythonLexer
83 PyLexer = PythonLexer
84 clsname = 'IPythonLexer'
84 clsname = 'IPythonLexer'
85 name = 'IPython'
85 name = 'IPython'
86 aliases = ['ipython']
86 aliases = ['ipython']
87 doc = """IPython Lexer"""
87 doc = """IPython Lexer"""
88
88
89 tokens = PyLexer.tokens.copy()
89 tokens = PyLexer.tokens.copy()
90 tokens['root'] = ipython_tokens + tokens['root']
90 tokens['root'] = ipython_tokens + tokens['root']
91
91
92 attrs = {'name': name, 'aliases': aliases,
92 attrs = {'name': name, 'aliases': aliases,
93 '__doc__': doc, 'tokens': tokens}
93 '__doc__': doc, 'tokens': tokens}
94
94
95 return type(name, (PyLexer,), attrs)
95 return type(name, (PyLexer,), attrs)
96
96
97
97
98 IPython3Lexer = build_ipy_lexer(python3=True)
98 IPython3Lexer = build_ipy_lexer(python3=True)
99 IPythonLexer = build_ipy_lexer(python3=False)
99 IPythonLexer = build_ipy_lexer(python3=False)
100
100
101
101
102 class IPythonPartialTracebackLexer(RegexLexer):
102 class IPythonPartialTracebackLexer(RegexLexer):
103 """
103 """
104 Partial lexer for IPython tracebacks.
104 Partial lexer for IPython tracebacks.
105
105
106 Handles all the non-python output. This works for both Python 2.x and 3.x.
106 Handles all the non-python output. This works for both Python 2.x and 3.x.
107
107
108 """
108 """
109 name = 'IPython Partial Traceback'
109 name = 'IPython Partial Traceback'
110
110
111 tokens = {
111 tokens = {
112 'root': [
112 'root': [
113 # Tracebacks for syntax errors have a different style.
113 # Tracebacks for syntax errors have a different style.
114 # For both types of tracebacks, we mark the first line with
114 # For both types of tracebacks, we mark the first line with
115 # Generic.Traceback. For syntax errors, we mark the filename
115 # Generic.Traceback. For syntax errors, we mark the filename
116 # as we mark the filenames for non-syntax tracebacks.
116 # as we mark the filenames for non-syntax tracebacks.
117 #
117 #
118 # These two regexps define how IPythonConsoleLexer finds a
118 # These two regexps define how IPythonConsoleLexer finds a
119 # traceback.
119 # traceback.
120 #
120 #
121 ## Non-syntax traceback
121 ## Non-syntax traceback
122 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
122 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
123 ## Syntax traceback
123 ## Syntax traceback
124 (r'^( File)(.*)(, line )(\d+\n)',
124 (r'^( File)(.*)(, line )(\d+\n)',
125 bygroups(Generic.Traceback, Name.Namespace,
125 bygroups(Generic.Traceback, Name.Namespace,
126 Generic.Traceback, Literal.Number.Integer)),
126 Generic.Traceback, Literal.Number.Integer)),
127
127
128 # (Exception Identifier)(Whitespace)(Traceback Message)
128 # (Exception Identifier)(Whitespace)(Traceback Message)
129 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
129 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
130 bygroups(Name.Exception, Generic.Whitespace, Text)),
130 bygroups(Name.Exception, Generic.Whitespace, Text)),
131 # (Module/Filename)(Text)(Callee)(Function Signature)
131 # (Module/Filename)(Text)(Callee)(Function Signature)
132 # Better options for callee and function signature?
132 # Better options for callee and function signature?
133 (r'(.*)( in )(.*)(\(.*\)\n)',
133 (r'(.*)( in )(.*)(\(.*\)\n)',
134 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
134 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
135 # Regular line: (Whitespace)(Line Number)(Python Code)
135 # Regular line: (Whitespace)(Line Number)(Python Code)
136 (r'(\s*?)(\d+)(.*?\n)',
136 (r'(\s*?)(\d+)(.*?\n)',
137 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
137 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
138 # Emphasized line: (Arrow)(Line Number)(Python Code)
138 # Emphasized line: (Arrow)(Line Number)(Python Code)
139 # Using Exception token so arrow color matches the Exception.
139 # Using Exception token so arrow color matches the Exception.
140 (r'(-*>?\s?)(\d+)(.*?\n)',
140 (r'(-*>?\s?)(\d+)(.*?\n)',
141 bygroups(Name.Exception, Literal.Number.Integer, Other)),
141 bygroups(Name.Exception, Literal.Number.Integer, Other)),
142 # (Exception Identifier)(Message)
142 # (Exception Identifier)(Message)
143 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
143 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
144 bygroups(Name.Exception, Text)),
144 bygroups(Name.Exception, Text)),
145 # Tag everything else as Other, will be handled later.
145 # Tag everything else as Other, will be handled later.
146 (r'.*\n', Other),
146 (r'.*\n', Other),
147 ],
147 ],
148 }
148 }
149
149
150
150
151 class IPythonTracebackLexer(DelegatingLexer):
151 class IPythonTracebackLexer(DelegatingLexer):
152 """
152 """
153 IPython traceback lexer.
153 IPython traceback lexer.
154
154
155 For doctests, the tracebacks can be snipped as much as desired with the
155 For doctests, the tracebacks can be snipped as much as desired with the
156 exception to the lines that designate a traceback. For non-syntax error
156 exception to the lines that designate a traceback. For non-syntax error
157 tracebacks, this is the line of hyphens. For syntax error tracebacks,
157 tracebacks, this is the line of hyphens. For syntax error tracebacks,
158 this is the line which lists the File and line number.
158 this is the line which lists the File and line number.
159
159
160 """
160 """
161 # The lexer inherits from DelegatingLexer. The "root" lexer is an
161 # The lexer inherits from DelegatingLexer. The "root" lexer is an
162 # appropriate IPython lexer, which depends on the value of the boolean
162 # appropriate IPython lexer, which depends on the value of the boolean
163 # `python3`. First, we parse with the partial IPython traceback lexer.
163 # `python3`. First, we parse with the partial IPython traceback lexer.
164 # Then, any code marked with the "Other" token is delegated to the root
164 # Then, any code marked with the "Other" token is delegated to the root
165 # lexer.
165 # lexer.
166 #
166 #
167 name = 'IPython Traceback'
167 name = 'IPython Traceback'
168 aliases = ['ipythontb']
168 aliases = ['ipythontb']
169
169
170 def __init__(self, **options):
170 def __init__(self, **options):
171 self.python3 = get_bool_opt(options, 'python3', False)
171 self.python3 = get_bool_opt(options, 'python3', False)
172
172
173 if self.python3:
173 if self.python3:
174 IPyLexer = IPython3Lexer
174 IPyLexer = IPython3Lexer
175 else:
175 else:
176 IPyLexer = IPythonLexer
176 IPyLexer = IPythonLexer
177
177
178 DelegatingLexer.__init__(self, IPyLexer,
178 DelegatingLexer.__init__(self, IPyLexer,
179 IPythonPartialTracebackLexer, **options)
179 IPythonPartialTracebackLexer, **options)
180
180
181 @skip_doctest
181 @skip_doctest
182 class IPythonConsoleLexer(Lexer):
182 class IPythonConsoleLexer(Lexer):
183 """
183 """
184 An IPython console lexer for IPython code-blocks and doctests, such as:
184 An IPython console lexer for IPython code-blocks and doctests, such as:
185
185
186 .. code-block:: rst
186 .. code-block:: rst
187
187
188 .. code-block:: ipythoncon
188 .. code-block:: ipythoncon
189
189
190 In [1]: a = 'foo'
190 In [1]: a = 'foo'
191
191
192 In [2]: a
192 In [2]: a
193 Out[2]: 'foo'
193 Out[2]: 'foo'
194
194
195 In [3]: print a
195 In [3]: print a
196 foo
196 foo
197
197
198 In [4]: 1 / 0
198 In [4]: 1 / 0
199
199
200
200
201 Support is also provided for IPython exceptions:
201 Support is also provided for IPython exceptions:
202
202
203 .. code-block:: rst
203 .. code-block:: rst
204
204
205 .. code-block:: ipythoncon
205 .. code-block:: ipythoncon
206
206
207 In [1]: raise Exception
207 In [1]: raise Exception
208 ---------------------------------------------------------------------------
208 ---------------------------------------------------------------------------
209 Exception Traceback (most recent call last)
209 Exception Traceback (most recent call last)
210 <ipython-input-1-fca2ab0ca76b> in <module>()
210 <ipython-input-1-fca2ab0ca76b> in <module>()
211 ----> 1 raise Exception
211 ----> 1 raise Exception
212
212
213 Exception:
213 Exception:
214
214
215 """
215 """
216 name = 'IPython console session'
216 name = 'IPython console session'
217 aliases = ['ipythoncon']
217 aliases = ['ipythoncon']
218 mimetypes = ['text/x-ipython-console']
218 mimetypes = ['text/x-ipython-console']
219
219
220 # The regexps used to determine what is input and what is output. The
220 # The regexps used to determine what is input and what is output.
221 # input regex should be consistent with and also be the combination of
221 # The default prompts for IPython are:
222 # the values of the `in_template` and `in2_templates`. For example, the
223 # defaults prompts are:
224 #
222 #
225 # c.PromptManager.in_template = 'In [\#]: '
223 # c.PromptManager.in_template = 'In [\#]: '
226 # c.PromptManager.in2_template = ' .\D.: '
224 # c.PromptManager.in2_template = ' .\D.: '
227 # c.PromptManager.out_template = 'Out[\#]: '
225 # c.PromptManager.out_template = 'Out[\#]: '
228 #
226 #
229 in1_regex = r'In \[[0-9]+\]: '
227 in1_regex = r'In \[[0-9]+\]: '
230 in2_regex = r' \.\.+\.: '
228 in2_regex = r' \.\.+\.: '
231 out_regex = r'Out\[[0-9]+\]: '
229 out_regex = r'Out\[[0-9]+\]: '
232
230
233 #: The regex to determine when a traceback starts.
231 #: The regex to determine when a traceback starts.
234 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
232 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
235
233
236 def __init__(self, **options):
234 def __init__(self, **options):
237 """Initialize the IPython console lexer.
235 """Initialize the IPython console lexer.
238
236
239 Parameters
237 Parameters
240 ----------
238 ----------
241 python3 : bool
239 python3 : bool
242 If `True`, then the console inputs are parsed using a Python 3
240 If `True`, then the console inputs are parsed using a Python 3
243 lexer. Otherwise, they are parsed using a Python 2 lexer.
241 lexer. Otherwise, they are parsed using a Python 2 lexer.
244 in1_regex : RegexObject
242 in1_regex : RegexObject
245 The compiled regular expression used to detect the start
243 The compiled regular expression used to detect the start
246 of inputs. Although the IPython configuration setting may have a
244 of inputs. Although the IPython configuration setting may have a
247 trailing whitespace, do not include it in the regex. If `None`,
245 trailing whitespace, do not include it in the regex. If `None`,
248 then the default input prompt is assumed.
246 then the default input prompt is assumed.
249 in2_regex : RegexObject
247 in2_regex : RegexObject
250 The compiled regular expression used to detect the continuation
248 The compiled regular expression used to detect the continuation
251 of inputs. Although the IPython configuration setting may have a
249 of inputs. Although the IPython configuration setting may have a
252 trailing whitespace, do not include it in the regex. If `None`,
250 trailing whitespace, do not include it in the regex. If `None`,
253 then the default input prompt is assumed.
251 then the default input prompt is assumed.
254 out_regex : RegexObject
252 out_regex : RegexObject
255 The compiled regular expression used to detect outputs. If `None`,
253 The compiled regular expression used to detect outputs. If `None`,
256 then the default output prompt is assumed.
254 then the default output prompt is assumed.
257
255
258 """
256 """
259 self.python3 = get_bool_opt(options, 'python3', False)
257 self.python3 = get_bool_opt(options, 'python3', False)
260
258
261 in1_regex = options.get('in1_regex', self.in1_regex)
259 in1_regex = options.get('in1_regex', self.in1_regex)
262 in2_regex = options.get('in2_regex', self.in2_regex)
260 in2_regex = options.get('in2_regex', self.in2_regex)
263 out_regex = options.get('out_regex', self.out_regex)
261 out_regex = options.get('out_regex', self.out_regex)
264
262
265 # So that we can work with input and output prompts which have been
263 # So that we can work with input and output prompts which have been
266 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
264 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
267 # we do not do this, then such prompts will be tagged as 'output'.
265 # we do not do this, then such prompts will be tagged as 'output'.
268 # The reason can't just use the rstrip'd variants instead is because
266 # The reason can't just use the rstrip'd variants instead is because
269 # we want any whitespace associated with the prompt to be inserted
267 # we want any whitespace associated with the prompt to be inserted
270 # with the token. This allows formatted code to be modified so as hide
268 # with the token. This allows formatted code to be modified so as hide
271 # the appearance of prompts, with the whitespace included. One example
269 # the appearance of prompts, with the whitespace included. One example
272 # use of this is in copybutton.js from the standard lib Python docs.
270 # use of this is in copybutton.js from the standard lib Python docs.
273 in1_regex_rstrip = in1_regex.rstrip() + '\n'
271 in1_regex_rstrip = in1_regex.rstrip() + '\n'
274 in2_regex_rstrip = in2_regex.rstrip() + '\n'
272 in2_regex_rstrip = in2_regex.rstrip() + '\n'
275 out_regex_rstrip = out_regex.rstrip() + '\n'
273 out_regex_rstrip = out_regex.rstrip() + '\n'
276
274
277 # Compile and save them all.
275 # Compile and save them all.
278 attrs = ['in1_regex', 'in2_regex', 'out_regex',
276 attrs = ['in1_regex', 'in2_regex', 'out_regex',
279 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
277 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
280 for attr in attrs:
278 for attr in attrs:
281 self.__setattr__(attr, re.compile(locals()[attr]))
279 self.__setattr__(attr, re.compile(locals()[attr]))
282
280
283 Lexer.__init__(self, **options)
281 Lexer.__init__(self, **options)
284
282
285 if self.python3:
283 if self.python3:
286 pylexer = IPython3Lexer
284 pylexer = IPython3Lexer
287 tblexer = IPythonTracebackLexer
285 tblexer = IPythonTracebackLexer
288 else:
286 else:
289 pylexer = IPythonLexer
287 pylexer = IPythonLexer
290 tblexer = IPythonTracebackLexer
288 tblexer = IPythonTracebackLexer
291
289
292 self.pylexer = pylexer(**options)
290 self.pylexer = pylexer(**options)
293 self.tblexer = tblexer(**options)
291 self.tblexer = tblexer(**options)
294
292
295 self.reset()
293 self.reset()
296
294
297 def reset(self):
295 def reset(self):
298 self.mode = 'output'
296 self.mode = 'output'
299 self.index = 0
297 self.index = 0
300 self.buffer = u''
298 self.buffer = u''
301 self.insertions = []
299 self.insertions = []
302
300
303 def buffered_tokens(self):
301 def buffered_tokens(self):
304 """
302 """
305 Generator of unprocessed tokens after doing insertions and before
303 Generator of unprocessed tokens after doing insertions and before
306 changing to a new state.
304 changing to a new state.
307
305
308 """
306 """
309 if self.mode == 'output':
307 if self.mode == 'output':
310 tokens = [(0, Generic.Output, self.buffer)]
308 tokens = [(0, Generic.Output, self.buffer)]
311 elif self.mode == 'input':
309 elif self.mode == 'input':
312 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
310 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
313 else: # traceback
311 else: # traceback
314 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
312 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
315
313
316 for i, t, v in do_insertions(self.insertions, tokens):
314 for i, t, v in do_insertions(self.insertions, tokens):
317 # All token indexes are relative to the buffer.
315 # All token indexes are relative to the buffer.
318 yield self.index + i, t, v
316 yield self.index + i, t, v
319
317
320 # Clear it all
318 # Clear it all
321 self.index += len(self.buffer)
319 self.index += len(self.buffer)
322 self.buffer = u''
320 self.buffer = u''
323 self.insertions = []
321 self.insertions = []
324
322
325 def get_modecode(self, line):
323 def get_mci(self, line):
326 """
324 """
327 Returns the next mode and code to be added to the next mode's buffer.
325 Parses the line and returns a 3-tuple: (mode, code, insertion).
328
326
329 The next mode depends on current mode and contents of line.
327 `mode` is the next mode (or state) of the lexer, and is always equal
328 to 'input', 'output', or 'tb'.
329
330 `code` is a portion of the line that should be added to the buffer
331 corresponding to the next mode and eventually lexed by another lexer.
332 For example, `code` could be Python code if `mode` were 'input'.
333
334 `insertion` is a 3-tuple (index, token, text) representing an
335 unprocessed "token" that will be inserted into the stream of tokens
336 that are created from the buffer once we change modes. This is usually
337 the input or output prompt.
338
339 In general, the next mode depends on current mode and on the contents
340 of `line`.
330
341
331 """
342 """
332 # To reduce the number of regex match checks, we have multiple
343 # To reduce the number of regex match checks, we have multiple
333 # 'if' blocks instead of 'if-elif' blocks.
344 # 'if' blocks instead of 'if-elif' blocks.
334
345
335 ### Check for possible end of input
346 ### Check for possible end of input
336 ###
347 ###
337 in2_match = self.in2_regex.match(line)
348 in2_match = self.in2_regex.match(line)
338 in2_match_rstrip = self.in2_regex_rstrip.match(line)
349 in2_match_rstrip = self.in2_regex_rstrip.match(line)
339 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
350 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
340 in2_match_rstrip:
351 in2_match_rstrip:
341 end_input = True
352 end_input = True
342 else:
353 else:
343 end_input = False
354 end_input = False
344 if end_input and self.mode != 'tb':
355 if end_input and self.mode != 'tb':
345 # Only look for an end of input when not in tb mode.
356 # Only look for an end of input when not in tb mode.
346 # An ellipsis could appear within the traceback.
357 # An ellipsis could appear within the traceback.
347 mode = 'output'
358 mode = 'output'
348 code = u''
359 code = u''
349 insertion = (0, Generic.Prompt, line)
360 insertion = (0, Generic.Prompt, line)
350 return mode, code, insertion
361 return mode, code, insertion
351
362
352 ### Check for output prompt
363 ### Check for output prompt
353 ###
364 ###
354 out_match = self.out_regex.match(line)
365 out_match = self.out_regex.match(line)
355 out_match_rstrip = self.out_regex_rstrip.match(line)
366 out_match_rstrip = self.out_regex_rstrip.match(line)
356 if out_match or out_match_rstrip:
367 if out_match or out_match_rstrip:
357 mode = 'output'
368 mode = 'output'
358 if out_match:
369 if out_match:
359 idx = out_match.end()
370 idx = out_match.end()
360 else:
371 else:
361 idx = out_match_rstrip.end()
372 idx = out_match_rstrip.end()
362 code = line[idx:]
373 code = line[idx:]
363 # Use the 'heading' token for output. We cannot use Generic.Error
374 # Use the 'heading' token for output. We cannot use Generic.Error
364 # since it would conflict with exceptions.
375 # since it would conflict with exceptions.
365 insertion = (0, Generic.Heading, line[:idx])
376 insertion = (0, Generic.Heading, line[:idx])
366 return mode, code, insertion
377 return mode, code, insertion
367
378
368
379
369 ### Check for input or continuation prompt (non stripped version)
380 ### Check for input or continuation prompt (non stripped version)
370 ###
381 ###
371 in1_match = self.in1_regex.match(line)
382 in1_match = self.in1_regex.match(line)
372 if in1_match or (in2_match and self.mode != 'tb'):
383 if in1_match or (in2_match and self.mode != 'tb'):
373 # New input or when not in tb, continued input.
384 # New input or when not in tb, continued input.
374 # We do not check for continued input when in tb since it is
385 # We do not check for continued input when in tb since it is
375 # allowable to replace a long stack with an ellipsis.
386 # allowable to replace a long stack with an ellipsis.
376 mode = 'input'
387 mode = 'input'
377 if in1_match:
388 if in1_match:
378 idx = in1_match.end()
389 idx = in1_match.end()
379 else: # in2_match
390 else: # in2_match
380 idx = in2_match.end()
391 idx = in2_match.end()
381 code = line[idx:]
392 code = line[idx:]
382 insertion = (0, Generic.Prompt, line[:idx])
393 insertion = (0, Generic.Prompt, line[:idx])
383 return mode, code, insertion
394 return mode, code, insertion
384
395
385 ### Check for input or continuation prompt (stripped version)
396 ### Check for input or continuation prompt (stripped version)
386 ###
397 ###
387 in1_match_rstrip = self.in1_regex_rstrip.match(line)
398 in1_match_rstrip = self.in1_regex_rstrip.match(line)
388 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
399 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
389 # New input or when not in tb, continued input.
400 # New input or when not in tb, continued input.
390 # We do not check for continued input when in tb since it is
401 # We do not check for continued input when in tb since it is
391 # allowable to replace a long stack with an ellipsis.
402 # allowable to replace a long stack with an ellipsis.
392 mode = 'input'
403 mode = 'input'
393 if in1_match_rstrip:
404 if in1_match_rstrip:
394 idx = in1_match_rstrip.end()
405 idx = in1_match_rstrip.end()
395 else: # in2_match
406 else: # in2_match
396 idx = in2_match_rstrip.end()
407 idx = in2_match_rstrip.end()
397 code = line[idx:]
408 code = line[idx:]
398 insertion = (0, Generic.Prompt, line[:idx])
409 insertion = (0, Generic.Prompt, line[:idx])
399 return mode, code, insertion
410 return mode, code, insertion
400
411
401 ### Check for traceback
412 ### Check for traceback
402 ###
413 ###
403 if self.ipytb_start.match(line):
414 if self.ipytb_start.match(line):
404 mode = 'tb'
415 mode = 'tb'
405 code = line
416 code = line
406 insertion = None
417 insertion = None
407 return mode, code, insertion
418 return mode, code, insertion
408
419
409 ### All other stuff...
420 ### All other stuff...
410 ###
421 ###
411 if self.mode in ('input', 'output'):
422 if self.mode in ('input', 'output'):
412 # We assume all other text is output. Multiline input that
423 # We assume all other text is output. Multiline input that
413 # does not use the continuation marker cannot be detected.
424 # does not use the continuation marker cannot be detected.
414 # For example, the 3 in the following is clearly output:
425 # For example, the 3 in the following is clearly output:
415 #
426 #
416 # In [1]: print 3
427 # In [1]: print 3
417 # 3
428 # 3
418 #
429 #
419 # But the following second line is part of the input:
430 # But the following second line is part of the input:
420 #
431 #
421 # In [2]: while True:
432 # In [2]: while True:
422 # print True
433 # print True
423 #
434 #
424 # In both cases, the 2nd line will be 'output'.
435 # In both cases, the 2nd line will be 'output'.
425 #
436 #
426 mode = 'output'
437 mode = 'output'
427 else:
438 else:
428 mode = 'tb'
439 mode = 'tb'
429
440
430 code = line
441 code = line
431 insertion = None
442 insertion = None
432
443
433 return mode, code, insertion
444 return mode, code, insertion
434
445
435 def get_tokens_unprocessed(self, text):
446 def get_tokens_unprocessed(self, text):
436 self.reset()
447 self.reset()
437 for match in line_re.finditer(text):
448 for match in line_re.finditer(text):
438 line = match.group()
449 line = match.group()
439 mode, code, insertion = self.get_modecode(line)
450 mode, code, insertion = self.get_mci(line)
440
451
441 if mode != self.mode:
452 if mode != self.mode:
442 # Yield buffered tokens before transitioning to new mode.
453 # Yield buffered tokens before transitioning to new mode.
443 for token in self.buffered_tokens():
454 for token in self.buffered_tokens():
444 yield token
455 yield token
445 self.mode = mode
456 self.mode = mode
446
457
447 if insertion:
458 if insertion:
448 self.insertions.append((len(self.buffer), [insertion]))
459 self.insertions.append((len(self.buffer), [insertion]))
449 self.buffer += code
460 self.buffer += code
450 else:
461 else:
451 for token in self.buffered_tokens():
462 for token in self.buffered_tokens():
452 yield token
463 yield token
453
464
454 class IPyLexer(Lexer):
465 class IPyLexer(Lexer):
455 """
466 """
456 Primary lexer for all IPython-like code.
467 Primary lexer for all IPython-like code.
457
468
458 This is a simple helper lexer. If the first line of the text begins with
469 This is a simple helper lexer. If the first line of the text begins with
459 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
470 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
460 lexer. If not, then the entire text is parsed with an IPython lexer.
471 lexer. If not, then the entire text is parsed with an IPython lexer.
461
472
462 The goal is to reduce the number of lexers that are registered
473 The goal is to reduce the number of lexers that are registered
463 with Pygments.
474 with Pygments.
464
475
465 """
476 """
466 name = 'IPy session'
477 name = 'IPy session'
467 aliases = ['ipy']
478 aliases = ['ipy']
468
479
469 def __init__(self, **options):
480 def __init__(self, **options):
470 self.python3 = get_bool_opt(options, 'python3', False)
481 self.python3 = get_bool_opt(options, 'python3', False)
471 Lexer.__init__(self, **options)
482 Lexer.__init__(self, **options)
472
483
473 self.IPythonLexer = IPythonLexer(**options)
484 self.IPythonLexer = IPythonLexer(**options)
474 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
485 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
475
486
476 def get_tokens_unprocessed(self, text):
487 def get_tokens_unprocessed(self, text):
477 if re.match(r'(In \[[0-9]+\]:)', text.strip()):
488 if re.match(r'(In \[[0-9]+\]:)', text.strip()):
478 lex = self.IPythonConsoleLexer
489 lex = self.IPythonConsoleLexer
479 else:
490 else:
480 lex = self.IPythonLexer
491 lex = self.IPythonLexer
481 for token in lex.get_tokens_unprocessed(text):
492 for token in lex.get_tokens_unprocessed(text):
482 yield token
493 yield token
483
494
@@ -1,58 +1,62 b''
1 New IPython Console Lexer
1 New IPython Console Lexer
2 -------------------------
2 -------------------------
3
3
4 The IPython console lexer has been rewritten and now supports tracebacks
4 The IPython console lexer has been rewritten and now supports tracebacks
5 and customized input/output prompts. An entire suite of lexers is now
5 and customized input/output prompts. An entire suite of lexers is now
6 available at :module:`IPython.nbconvert.utils.lexers`. These include:
6 available at :module:`IPython.nbconvert.utils.lexers`. These include:
7
7
8 IPythonLexer
8 IPythonLexer
9 IPython3Lexer
9 IPython3Lexer
10 Lexers for pure IPython (python + magic/shell commands)
10 Lexers for pure IPython (python + magic/shell commands)
11
11
12 IPythonPartialTracebackLexer
12 IPythonPartialTracebackLexer
13 IPythonTracebackLexer
13 IPythonTracebackLexer
14 Supports 2.x and 3.x via the keyword `python3`. The partial traceback
14 Supports 2.x and 3.x via the keyword `python3`. The partial traceback
15 lexer reads everything but the Python code appearing in a traceback.
15 lexer reads everything but the Python code appearing in a traceback.
16 The full lexer combines the partial lexer with an IPython lexer.
16 The full lexer combines the partial lexer with an IPython lexer.
17
17
18 IPythonConsoleLexer
18 IPythonConsoleLexer
19 A lexer for IPython console sessions, with support for tracebacks.
19 A lexer for IPython console sessions, with support for tracebacks.
20 Supports 2.x and 3.x via the keyword `python3`.
20 Supports 2.x and 3.x via the keyword `python3`.
21
21
22 IPyLexer
22 IPyLexer
23 A friendly lexer which examines the first line of text and from it,
23 A friendly lexer which examines the first line of text and from it,
24 decides whether to use an IPython lexer or an IPython console lexer.
24 decides whether to use an IPython lexer or an IPython console lexer.
25 Supports 2.x and 3.x via the keyword `python3`.
25 Supports 2.x and 3.x via the keyword `python3`.
26
26
27 Previously, the :class:`IPythonConsoleLexer` class was available at
27 Previously, the :class:`IPythonConsoleLexer` class was available at
28 :module:`IPython.sphinxext.ipython_console_hightlight`. It was inserted
28 :module:`IPython.sphinxext.ipython_console_hightlight`. It was inserted
29 into Pygments' list of available lexers under the name `ipython`. It should
29 into Pygments' list of available lexers under the name `ipython`. It should
30 be mentioned that this name is inaccurate. An IPython console session
30 be mentioned that this name is inaccurate, since an IPython console session
31 is not the same as IPython code (which itself is a superset of the Python
31 is not the same as IPython code (which itself is a superset of the Python
32 language).
32 language).
33
33
34 Now, the Sphinx extension inserts two console lexers into Pygment's list of
34 Now, the Sphinx extension inserts two console lexers into Pygments' list of
35 available lexers. Both are IPyLexer instances under the names: `ipython` and
35 available lexers. Both are IPyLexer instances under the names: `ipython` and
36 `ipython3`. As mentioned above, these names are misleading, but they are kept
36 `ipython3`. Although the names can be confusing (as mentioned above), their
37 for backwards compatibility and typical usage. If a project needs to make
37 continued use is, in part, to maintain backwards compatibility and to
38 Pygments aware of more than just the IPyLexer class, then one should not
38 aid typical usage. If a project needs to make Pygments aware of more than just
39 make the IPyLexer class available under the name `ipython` and use `ipy` or
39 the IPyLexer class, then one should not make the IPyLexer class available under
40 some other non-conflicting value.
40 the name `ipython` and use `ipy` or some other non-conflicting value.
41
41
42 Code blocks such as::
42 Code blocks such as:
43
44 .. code-block:: rst
43
45
44 .. code-block:: ipython
46 .. code-block:: ipython
45
47
46 In [1]: 2**2
48 In [1]: 2**2
47 Out[1]: 4
49 Out[1]: 4
48
50
49 will continue to work as before, but now, they will also properly highlight
51 will continue to work as before, but now, they will also properly highlight
50 tracebacks. For pure IPython code, the same lexer will work::
52 tracebacks. For pure IPython code, the same lexer will also work:
53
54 .. code-block:: rst
51
55
52 .. code-block:: ipython
56 .. code-block:: ipython
53
57
54 x = ''.join(map(str, range(10)))
58 x = ''.join(map(str, range(10)))
55 !echo $x
59 !echo $x
56
60
57 Since the first line of the block did not begin with a standard IPython console
61 Since the first line of the block did not begin with a standard IPython console
58 prompt, the entire block is assumed to be IPython code instead.
62 prompt, the entire block is assumed to consist of IPython code instead.
General Comments 0
You need to be logged in to leave comments. Login now