##// END OF EJS Templates
Clean up aliases for lexers.
chebee7i -
Show More
@@ -1,494 +1,507
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 """
2 """
3 Defines a variety of Pygments lexers for highlighting IPython code.
3 Defines a variety of Pygments lexers for highlighting IPython code.
4
4
5 This includes:
5 This includes:
6
6
7 IPythonLexer
7 IPythonLexer
8 IPython3Lexer
8 IPython3Lexer
9 Lexers for pure IPython (python + magic/shell commands)
9 Lexers for pure IPython (python + magic/shell commands)
10
10
11 IPythonPartialTracebackLexer
11 IPythonPartialTracebackLexer
12 IPythonTracebackLexer
12 IPythonTracebackLexer
13 Supports 2.x and 3.x via keyword `python3`. The partial traceback
13 Supports 2.x and 3.x via keyword `python3`. The partial traceback
14 lexer reads everything but the Python code appearing in a traceback.
14 lexer reads everything but the Python code appearing in a traceback.
15 The full lexer combines the partial lexer with an IPython lexer.
15 The full lexer combines the partial lexer with an IPython lexer.
16
16
17 IPythonConsoleLexer
17 IPythonConsoleLexer
18 A lexer for IPython console sessions, with support for tracebacks.
18 A lexer for IPython console sessions, with support for tracebacks.
19
19
20 IPyLexer
20 IPyLexer
21 A friendly lexer which examines the first line of text and from it,
21 A friendly lexer which examines the first line of text and from it,
22 decides whether to use an IPython lexer or an IPython console lexer.
22 decides whether to use an IPython lexer or an IPython console lexer.
23 This is probably the only lexer that needs to be explicitly added
23 This is probably the only lexer that needs to be explicitly added
24 to Pygments.
24 to Pygments.
25
25
26 """
26 """
27 #-----------------------------------------------------------------------------
27 #-----------------------------------------------------------------------------
28 # Copyright (c) 2013, the IPython Development Team.
28 # Copyright (c) 2013, the IPython Development Team.
29 #
29 #
30 # Distributed under the terms of the Modified BSD License.
30 # Distributed under the terms of the Modified BSD License.
31 #
31 #
32 # The full license is in the file COPYING.txt, distributed with this software.
32 # The full license is in the file COPYING.txt, distributed with this software.
33 #-----------------------------------------------------------------------------
33 #-----------------------------------------------------------------------------
34
34
35 # Standard library
35 # Standard library
36 import re
36 import re
37
37
38 # Third party
38 # Third party
39 from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
39 from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
40 from pygments.lexer import (
40 from pygments.lexer import (
41 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
41 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
42 )
42 )
43 from pygments.token import (
43 from pygments.token import (
44 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
44 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
45 )
45 )
46 from pygments.util import get_bool_opt
46 from pygments.util import get_bool_opt
47
47
48 # Local
48 # Local
49 from IPython.testing.skipdoctest import skip_doctest
49 from IPython.testing.skipdoctest import skip_doctest
50
50
51 line_re = re.compile('.*?\n')
51 line_re = re.compile('.*?\n')
52
52
53 ipython_tokens = [
53 ipython_tokens = [
54 (r'(\%+)(\w+)\s+(\.*)(\n)', bygroups(Operator, Keyword,
54 (r'(\%+)(\w+)\s+(\.*)(\n)', bygroups(Operator, Keyword,
55 using(BashLexer), Text)),
55 using(BashLexer), Text)),
56 (r'(\%+)(\w+)\b', bygroups(Operator, Keyword)),
56 (r'(\%+)(\w+)\b', bygroups(Operator, Keyword)),
57 (r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
57 (r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
58 ]
58 ]
59
59
60 def build_ipy_lexer(python3):
60 def build_ipy_lexer(python3):
61 """Builds IPython lexers depending on the value of `python3`.
61 """Builds IPython lexers depending on the value of `python3`.
62
62
63 The lexer inherits from an appropriate Python lexer and then adds
63 The lexer inherits from an appropriate Python lexer and then adds
64 information about IPython specific keywords (i.e. magic commands,
64 information about IPython specific keywords (i.e. magic commands,
65 shell commands, etc.)
65 shell commands, etc.)
66
66
67 Parameters
67 Parameters
68 ----------
68 ----------
69 python3 : bool
69 python3 : bool
70 If `True`, then build an IPython lexer from a Python 3 lexer.
70 If `True`, then build an IPython lexer from a Python 3 lexer.
71
71
72 """
72 """
73 # It would be nice to have a single IPython lexer class which takes
73 # It would be nice to have a single IPython lexer class which takes
74 # a boolean `python3`. But since there are two Python lexer classes,
74 # a boolean `python3`. But since there are two Python lexer classes,
75 # we will also have two IPython lexer classes.
75 # we will also have two IPython lexer classes.
76 if python3:
76 if python3:
77 PyLexer = Python3Lexer
77 PyLexer = Python3Lexer
78 clsname = 'IPython3Lexer'
78 clsname = 'IPython3Lexer'
79 name = 'IPython3'
79 name = 'IPython3'
80 aliases = ['ipython3']
80 aliases = ['ipython3']
81 doc = """IPython3 Lexer"""
81 doc = """IPython3 Lexer"""
82 else:
82 else:
83 PyLexer = PythonLexer
83 PyLexer = PythonLexer
84 clsname = 'IPythonLexer'
84 clsname = 'IPythonLexer'
85 name = 'IPython'
85 name = 'IPython'
86 aliases = ['ipython']
86 aliases = ['ipython2', 'ipython']
87 doc = """IPython Lexer"""
87 doc = """IPython Lexer"""
88
88
89 tokens = PyLexer.tokens.copy()
89 tokens = PyLexer.tokens.copy()
90 tokens['root'] = ipython_tokens + tokens['root']
90 tokens['root'] = ipython_tokens + tokens['root']
91
91
92 attrs = {'name': name, 'aliases': aliases,
92 attrs = {'name': name, 'aliases': aliases,
93 '__doc__': doc, 'tokens': tokens}
93 '__doc__': doc, 'tokens': tokens}
94
94
95 return type(name, (PyLexer,), attrs)
95 return type(name, (PyLexer,), attrs)
96
96
97
97
98 IPython3Lexer = build_ipy_lexer(python3=True)
98 IPython3Lexer = build_ipy_lexer(python3=True)
99 IPythonLexer = build_ipy_lexer(python3=False)
99 IPythonLexer = build_ipy_lexer(python3=False)
100
100
101
101
102 class IPythonPartialTracebackLexer(RegexLexer):
102 class IPythonPartialTracebackLexer(RegexLexer):
103 """
103 """
104 Partial lexer for IPython tracebacks.
104 Partial lexer for IPython tracebacks.
105
105
106 Handles all the non-python output. This works for both Python 2.x and 3.x.
106 Handles all the non-python output. This works for both Python 2.x and 3.x.
107
107
108 """
108 """
109 name = 'IPython Partial Traceback'
109 name = 'IPython Partial Traceback'
110
110
111 tokens = {
111 tokens = {
112 'root': [
112 'root': [
113 # Tracebacks for syntax errors have a different style.
113 # Tracebacks for syntax errors have a different style.
114 # For both types of tracebacks, we mark the first line with
114 # For both types of tracebacks, we mark the first line with
115 # Generic.Traceback. For syntax errors, we mark the filename
115 # Generic.Traceback. For syntax errors, we mark the filename
116 # as we mark the filenames for non-syntax tracebacks.
116 # as we mark the filenames for non-syntax tracebacks.
117 #
117 #
118 # These two regexps define how IPythonConsoleLexer finds a
118 # These two regexps define how IPythonConsoleLexer finds a
119 # traceback.
119 # traceback.
120 #
120 #
121 ## Non-syntax traceback
121 ## Non-syntax traceback
122 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
122 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
123 ## Syntax traceback
123 ## Syntax traceback
124 (r'^( File)(.*)(, line )(\d+\n)',
124 (r'^( File)(.*)(, line )(\d+\n)',
125 bygroups(Generic.Traceback, Name.Namespace,
125 bygroups(Generic.Traceback, Name.Namespace,
126 Generic.Traceback, Literal.Number.Integer)),
126 Generic.Traceback, Literal.Number.Integer)),
127
127
128 # (Exception Identifier)(Whitespace)(Traceback Message)
128 # (Exception Identifier)(Whitespace)(Traceback Message)
129 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
129 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
130 bygroups(Name.Exception, Generic.Whitespace, Text)),
130 bygroups(Name.Exception, Generic.Whitespace, Text)),
131 # (Module/Filename)(Text)(Callee)(Function Signature)
131 # (Module/Filename)(Text)(Callee)(Function Signature)
132 # Better options for callee and function signature?
132 # Better options for callee and function signature?
133 (r'(.*)( in )(.*)(\(.*\)\n)',
133 (r'(.*)( in )(.*)(\(.*\)\n)',
134 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
134 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
135 # Regular line: (Whitespace)(Line Number)(Python Code)
135 # Regular line: (Whitespace)(Line Number)(Python Code)
136 (r'(\s*?)(\d+)(.*?\n)',
136 (r'(\s*?)(\d+)(.*?\n)',
137 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
137 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
138 # Emphasized line: (Arrow)(Line Number)(Python Code)
138 # Emphasized line: (Arrow)(Line Number)(Python Code)
139 # Using Exception token so arrow color matches the Exception.
139 # Using Exception token so arrow color matches the Exception.
140 (r'(-*>?\s?)(\d+)(.*?\n)',
140 (r'(-*>?\s?)(\d+)(.*?\n)',
141 bygroups(Name.Exception, Literal.Number.Integer, Other)),
141 bygroups(Name.Exception, Literal.Number.Integer, Other)),
142 # (Exception Identifier)(Message)
142 # (Exception Identifier)(Message)
143 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
143 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
144 bygroups(Name.Exception, Text)),
144 bygroups(Name.Exception, Text)),
145 # Tag everything else as Other, will be handled later.
145 # Tag everything else as Other, will be handled later.
146 (r'.*\n', Other),
146 (r'.*\n', Other),
147 ],
147 ],
148 }
148 }
149
149
150
150
151 class IPythonTracebackLexer(DelegatingLexer):
151 class IPythonTracebackLexer(DelegatingLexer):
152 """
152 """
153 IPython traceback lexer.
153 IPython traceback lexer.
154
154
155 For doctests, the tracebacks can be snipped as much as desired with the
155 For doctests, the tracebacks can be snipped as much as desired with the
156 exception to the lines that designate a traceback. For non-syntax error
156 exception to the lines that designate a traceback. For non-syntax error
157 tracebacks, this is the line of hyphens. For syntax error tracebacks,
157 tracebacks, this is the line of hyphens. For syntax error tracebacks,
158 this is the line which lists the File and line number.
158 this is the line which lists the File and line number.
159
159
160 """
160 """
161 # The lexer inherits from DelegatingLexer. The "root" lexer is an
161 # The lexer inherits from DelegatingLexer. The "root" lexer is an
162 # appropriate IPython lexer, which depends on the value of the boolean
162 # appropriate IPython lexer, which depends on the value of the boolean
163 # `python3`. First, we parse with the partial IPython traceback lexer.
163 # `python3`. First, we parse with the partial IPython traceback lexer.
164 # Then, any code marked with the "Other" token is delegated to the root
164 # Then, any code marked with the "Other" token is delegated to the root
165 # lexer.
165 # lexer.
166 #
166 #
167 name = 'IPython Traceback'
167 name = 'IPython Traceback'
168 aliases = ['ipythontb']
168 aliases = ['ipythontb']
169
169
170 def __init__(self, **options):
170 def __init__(self, **options):
171 self.python3 = get_bool_opt(options, 'python3', False)
171 self.python3 = get_bool_opt(options, 'python3', False)
172 if self.python3:
173 self.aliases = ['ipythontb3']
174 else:
175 self.aliases = ['ipythontb2', 'ipythontb']
172
176
173 if self.python3:
177 if self.python3:
174 IPyLexer = IPython3Lexer
178 IPyLexer = IPython3Lexer
175 else:
179 else:
176 IPyLexer = IPythonLexer
180 IPyLexer = IPythonLexer
177
181
178 DelegatingLexer.__init__(self, IPyLexer,
182 DelegatingLexer.__init__(self, IPyLexer,
179 IPythonPartialTracebackLexer, **options)
183 IPythonPartialTracebackLexer, **options)
180
184
181 @skip_doctest
185 @skip_doctest
182 class IPythonConsoleLexer(Lexer):
186 class IPythonConsoleLexer(Lexer):
183 """
187 """
184 An IPython console lexer for IPython code-blocks and doctests, such as:
188 An IPython console lexer for IPython code-blocks and doctests, such as:
185
189
186 .. code-block:: rst
190 .. code-block:: rst
187
191
188 .. code-block:: ipythoncon
192 .. code-block:: ipythoncon
189
193
190 In [1]: a = 'foo'
194 In [1]: a = 'foo'
191
195
192 In [2]: a
196 In [2]: a
193 Out[2]: 'foo'
197 Out[2]: 'foo'
194
198
195 In [3]: print a
199 In [3]: print a
196 foo
200 foo
197
201
198 In [4]: 1 / 0
202 In [4]: 1 / 0
199
203
200
204
201 Support is also provided for IPython exceptions:
205 Support is also provided for IPython exceptions:
202
206
203 .. code-block:: rst
207 .. code-block:: rst
204
208
205 .. code-block:: ipythoncon
209 .. code-block:: ipythoncon
206
210
207 In [1]: raise Exception
211 In [1]: raise Exception
208 ---------------------------------------------------------------------------
212 ---------------------------------------------------------------------------
209 Exception Traceback (most recent call last)
213 Exception Traceback (most recent call last)
210 <ipython-input-1-fca2ab0ca76b> in <module>()
214 <ipython-input-1-fca2ab0ca76b> in <module>()
211 ----> 1 raise Exception
215 ----> 1 raise Exception
212
216
213 Exception:
217 Exception:
214
218
215 """
219 """
216 name = 'IPython console session'
220 name = 'IPython console session'
217 aliases = ['ipythoncon']
221 aliases = ['ipythoncon']
218 mimetypes = ['text/x-ipython-console']
222 mimetypes = ['text/x-ipython-console']
219
223
220 # The regexps used to determine what is input and what is output.
224 # The regexps used to determine what is input and what is output.
221 # The default prompts for IPython are:
225 # The default prompts for IPython are:
222 #
226 #
223 # c.PromptManager.in_template = 'In [\#]: '
227 # c.PromptManager.in_template = 'In [\#]: '
224 # c.PromptManager.in2_template = ' .\D.: '
228 # c.PromptManager.in2_template = ' .\D.: '
225 # c.PromptManager.out_template = 'Out[\#]: '
229 # c.PromptManager.out_template = 'Out[\#]: '
226 #
230 #
227 in1_regex = r'In \[[0-9]+\]: '
231 in1_regex = r'In \[[0-9]+\]: '
228 in2_regex = r' \.\.+\.: '
232 in2_regex = r' \.\.+\.: '
229 out_regex = r'Out\[[0-9]+\]: '
233 out_regex = r'Out\[[0-9]+\]: '
230
234
231 #: The regex to determine when a traceback starts.
235 #: The regex to determine when a traceback starts.
232 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
236 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
233
237
234 def __init__(self, **options):
238 def __init__(self, **options):
235 """Initialize the IPython console lexer.
239 """Initialize the IPython console lexer.
236
240
237 Parameters
241 Parameters
238 ----------
242 ----------
239 python3 : bool
243 python3 : bool
240 If `True`, then the console inputs are parsed using a Python 3
244 If `True`, then the console inputs are parsed using a Python 3
241 lexer. Otherwise, they are parsed using a Python 2 lexer.
245 lexer. Otherwise, they are parsed using a Python 2 lexer.
242 in1_regex : RegexObject
246 in1_regex : RegexObject
243 The compiled regular expression used to detect the start
247 The compiled regular expression used to detect the start
244 of inputs. Although the IPython configuration setting may have a
248 of inputs. Although the IPython configuration setting may have a
245 trailing whitespace, do not include it in the regex. If `None`,
249 trailing whitespace, do not include it in the regex. If `None`,
246 then the default input prompt is assumed.
250 then the default input prompt is assumed.
247 in2_regex : RegexObject
251 in2_regex : RegexObject
248 The compiled regular expression used to detect the continuation
252 The compiled regular expression used to detect the continuation
249 of inputs. Although the IPython configuration setting may have a
253 of inputs. Although the IPython configuration setting may have a
250 trailing whitespace, do not include it in the regex. If `None`,
254 trailing whitespace, do not include it in the regex. If `None`,
251 then the default input prompt is assumed.
255 then the default input prompt is assumed.
252 out_regex : RegexObject
256 out_regex : RegexObject
253 The compiled regular expression used to detect outputs. If `None`,
257 The compiled regular expression used to detect outputs. If `None`,
254 then the default output prompt is assumed.
258 then the default output prompt is assumed.
255
259
256 """
260 """
257 self.python3 = get_bool_opt(options, 'python3', False)
261 self.python3 = get_bool_opt(options, 'python3', False)
262 if self.python3:
263 self.aliases = ['ipythoncon3']
264 else:
265 self.aliases = ['ipythoncon2', 'ipythoncon']
258
266
259 in1_regex = options.get('in1_regex', self.in1_regex)
267 in1_regex = options.get('in1_regex', self.in1_regex)
260 in2_regex = options.get('in2_regex', self.in2_regex)
268 in2_regex = options.get('in2_regex', self.in2_regex)
261 out_regex = options.get('out_regex', self.out_regex)
269 out_regex = options.get('out_regex', self.out_regex)
262
270
263 # So that we can work with input and output prompts which have been
271 # So that we can work with input and output prompts which have been
264 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
272 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
265 # we do not do this, then such prompts will be tagged as 'output'.
273 # we do not do this, then such prompts will be tagged as 'output'.
266 # The reason can't just use the rstrip'd variants instead is because
274 # The reason can't just use the rstrip'd variants instead is because
267 # we want any whitespace associated with the prompt to be inserted
275 # we want any whitespace associated with the prompt to be inserted
268 # with the token. This allows formatted code to be modified so as hide
276 # with the token. This allows formatted code to be modified so as hide
269 # the appearance of prompts, with the whitespace included. One example
277 # the appearance of prompts, with the whitespace included. One example
270 # use of this is in copybutton.js from the standard lib Python docs.
278 # use of this is in copybutton.js from the standard lib Python docs.
271 in1_regex_rstrip = in1_regex.rstrip() + '\n'
279 in1_regex_rstrip = in1_regex.rstrip() + '\n'
272 in2_regex_rstrip = in2_regex.rstrip() + '\n'
280 in2_regex_rstrip = in2_regex.rstrip() + '\n'
273 out_regex_rstrip = out_regex.rstrip() + '\n'
281 out_regex_rstrip = out_regex.rstrip() + '\n'
274
282
275 # Compile and save them all.
283 # Compile and save them all.
276 attrs = ['in1_regex', 'in2_regex', 'out_regex',
284 attrs = ['in1_regex', 'in2_regex', 'out_regex',
277 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
285 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
278 for attr in attrs:
286 for attr in attrs:
279 self.__setattr__(attr, re.compile(locals()[attr]))
287 self.__setattr__(attr, re.compile(locals()[attr]))
280
288
281 Lexer.__init__(self, **options)
289 Lexer.__init__(self, **options)
282
290
283 if self.python3:
291 if self.python3:
284 pylexer = IPython3Lexer
292 pylexer = IPython3Lexer
285 tblexer = IPythonTracebackLexer
293 tblexer = IPythonTracebackLexer
286 else:
294 else:
287 pylexer = IPythonLexer
295 pylexer = IPythonLexer
288 tblexer = IPythonTracebackLexer
296 tblexer = IPythonTracebackLexer
289
297
290 self.pylexer = pylexer(**options)
298 self.pylexer = pylexer(**options)
291 self.tblexer = tblexer(**options)
299 self.tblexer = tblexer(**options)
292
300
293 self.reset()
301 self.reset()
294
302
295 def reset(self):
303 def reset(self):
296 self.mode = 'output'
304 self.mode = 'output'
297 self.index = 0
305 self.index = 0
298 self.buffer = u''
306 self.buffer = u''
299 self.insertions = []
307 self.insertions = []
300
308
301 def buffered_tokens(self):
309 def buffered_tokens(self):
302 """
310 """
303 Generator of unprocessed tokens after doing insertions and before
311 Generator of unprocessed tokens after doing insertions and before
304 changing to a new state.
312 changing to a new state.
305
313
306 """
314 """
307 if self.mode == 'output':
315 if self.mode == 'output':
308 tokens = [(0, Generic.Output, self.buffer)]
316 tokens = [(0, Generic.Output, self.buffer)]
309 elif self.mode == 'input':
317 elif self.mode == 'input':
310 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
318 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
311 else: # traceback
319 else: # traceback
312 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
320 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
313
321
314 for i, t, v in do_insertions(self.insertions, tokens):
322 for i, t, v in do_insertions(self.insertions, tokens):
315 # All token indexes are relative to the buffer.
323 # All token indexes are relative to the buffer.
316 yield self.index + i, t, v
324 yield self.index + i, t, v
317
325
318 # Clear it all
326 # Clear it all
319 self.index += len(self.buffer)
327 self.index += len(self.buffer)
320 self.buffer = u''
328 self.buffer = u''
321 self.insertions = []
329 self.insertions = []
322
330
323 def get_mci(self, line):
331 def get_mci(self, line):
324 """
332 """
325 Parses the line and returns a 3-tuple: (mode, code, insertion).
333 Parses the line and returns a 3-tuple: (mode, code, insertion).
326
334
327 `mode` is the next mode (or state) of the lexer, and is always equal
335 `mode` is the next mode (or state) of the lexer, and is always equal
328 to 'input', 'output', or 'tb'.
336 to 'input', 'output', or 'tb'.
329
337
330 `code` is a portion of the line that should be added to the buffer
338 `code` is a portion of the line that should be added to the buffer
331 corresponding to the next mode and eventually lexed by another lexer.
339 corresponding to the next mode and eventually lexed by another lexer.
332 For example, `code` could be Python code if `mode` were 'input'.
340 For example, `code` could be Python code if `mode` were 'input'.
333
341
334 `insertion` is a 3-tuple (index, token, text) representing an
342 `insertion` is a 3-tuple (index, token, text) representing an
335 unprocessed "token" that will be inserted into the stream of tokens
343 unprocessed "token" that will be inserted into the stream of tokens
336 that are created from the buffer once we change modes. This is usually
344 that are created from the buffer once we change modes. This is usually
337 the input or output prompt.
345 the input or output prompt.
338
346
339 In general, the next mode depends on current mode and on the contents
347 In general, the next mode depends on current mode and on the contents
340 of `line`.
348 of `line`.
341
349
342 """
350 """
343 # To reduce the number of regex match checks, we have multiple
351 # To reduce the number of regex match checks, we have multiple
344 # 'if' blocks instead of 'if-elif' blocks.
352 # 'if' blocks instead of 'if-elif' blocks.
345
353
346 ### Check for possible end of input
354 ### Check for possible end of input
347 ###
355 ###
348 in2_match = self.in2_regex.match(line)
356 in2_match = self.in2_regex.match(line)
349 in2_match_rstrip = self.in2_regex_rstrip.match(line)
357 in2_match_rstrip = self.in2_regex_rstrip.match(line)
350 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
358 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
351 in2_match_rstrip:
359 in2_match_rstrip:
352 end_input = True
360 end_input = True
353 else:
361 else:
354 end_input = False
362 end_input = False
355 if end_input and self.mode != 'tb':
363 if end_input and self.mode != 'tb':
356 # Only look for an end of input when not in tb mode.
364 # Only look for an end of input when not in tb mode.
357 # An ellipsis could appear within the traceback.
365 # An ellipsis could appear within the traceback.
358 mode = 'output'
366 mode = 'output'
359 code = u''
367 code = u''
360 insertion = (0, Generic.Prompt, line)
368 insertion = (0, Generic.Prompt, line)
361 return mode, code, insertion
369 return mode, code, insertion
362
370
363 ### Check for output prompt
371 ### Check for output prompt
364 ###
372 ###
365 out_match = self.out_regex.match(line)
373 out_match = self.out_regex.match(line)
366 out_match_rstrip = self.out_regex_rstrip.match(line)
374 out_match_rstrip = self.out_regex_rstrip.match(line)
367 if out_match or out_match_rstrip:
375 if out_match or out_match_rstrip:
368 mode = 'output'
376 mode = 'output'
369 if out_match:
377 if out_match:
370 idx = out_match.end()
378 idx = out_match.end()
371 else:
379 else:
372 idx = out_match_rstrip.end()
380 idx = out_match_rstrip.end()
373 code = line[idx:]
381 code = line[idx:]
374 # Use the 'heading' token for output. We cannot use Generic.Error
382 # Use the 'heading' token for output. We cannot use Generic.Error
375 # since it would conflict with exceptions.
383 # since it would conflict with exceptions.
376 insertion = (0, Generic.Heading, line[:idx])
384 insertion = (0, Generic.Heading, line[:idx])
377 return mode, code, insertion
385 return mode, code, insertion
378
386
379
387
380 ### Check for input or continuation prompt (non stripped version)
388 ### Check for input or continuation prompt (non stripped version)
381 ###
389 ###
382 in1_match = self.in1_regex.match(line)
390 in1_match = self.in1_regex.match(line)
383 if in1_match or (in2_match and self.mode != 'tb'):
391 if in1_match or (in2_match and self.mode != 'tb'):
384 # New input or when not in tb, continued input.
392 # New input or when not in tb, continued input.
385 # We do not check for continued input when in tb since it is
393 # We do not check for continued input when in tb since it is
386 # allowable to replace a long stack with an ellipsis.
394 # allowable to replace a long stack with an ellipsis.
387 mode = 'input'
395 mode = 'input'
388 if in1_match:
396 if in1_match:
389 idx = in1_match.end()
397 idx = in1_match.end()
390 else: # in2_match
398 else: # in2_match
391 idx = in2_match.end()
399 idx = in2_match.end()
392 code = line[idx:]
400 code = line[idx:]
393 insertion = (0, Generic.Prompt, line[:idx])
401 insertion = (0, Generic.Prompt, line[:idx])
394 return mode, code, insertion
402 return mode, code, insertion
395
403
396 ### Check for input or continuation prompt (stripped version)
404 ### Check for input or continuation prompt (stripped version)
397 ###
405 ###
398 in1_match_rstrip = self.in1_regex_rstrip.match(line)
406 in1_match_rstrip = self.in1_regex_rstrip.match(line)
399 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
407 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
400 # New input or when not in tb, continued input.
408 # New input or when not in tb, continued input.
401 # We do not check for continued input when in tb since it is
409 # We do not check for continued input when in tb since it is
402 # allowable to replace a long stack with an ellipsis.
410 # allowable to replace a long stack with an ellipsis.
403 mode = 'input'
411 mode = 'input'
404 if in1_match_rstrip:
412 if in1_match_rstrip:
405 idx = in1_match_rstrip.end()
413 idx = in1_match_rstrip.end()
406 else: # in2_match
414 else: # in2_match
407 idx = in2_match_rstrip.end()
415 idx = in2_match_rstrip.end()
408 code = line[idx:]
416 code = line[idx:]
409 insertion = (0, Generic.Prompt, line[:idx])
417 insertion = (0, Generic.Prompt, line[:idx])
410 return mode, code, insertion
418 return mode, code, insertion
411
419
412 ### Check for traceback
420 ### Check for traceback
413 ###
421 ###
414 if self.ipytb_start.match(line):
422 if self.ipytb_start.match(line):
415 mode = 'tb'
423 mode = 'tb'
416 code = line
424 code = line
417 insertion = None
425 insertion = None
418 return mode, code, insertion
426 return mode, code, insertion
419
427
420 ### All other stuff...
428 ### All other stuff...
421 ###
429 ###
422 if self.mode in ('input', 'output'):
430 if self.mode in ('input', 'output'):
423 # We assume all other text is output. Multiline input that
431 # We assume all other text is output. Multiline input that
424 # does not use the continuation marker cannot be detected.
432 # does not use the continuation marker cannot be detected.
425 # For example, the 3 in the following is clearly output:
433 # For example, the 3 in the following is clearly output:
426 #
434 #
427 # In [1]: print 3
435 # In [1]: print 3
428 # 3
436 # 3
429 #
437 #
430 # But the following second line is part of the input:
438 # But the following second line is part of the input:
431 #
439 #
432 # In [2]: while True:
440 # In [2]: while True:
433 # print True
441 # print True
434 #
442 #
435 # In both cases, the 2nd line will be 'output'.
443 # In both cases, the 2nd line will be 'output'.
436 #
444 #
437 mode = 'output'
445 mode = 'output'
438 else:
446 else:
439 mode = 'tb'
447 mode = 'tb'
440
448
441 code = line
449 code = line
442 insertion = None
450 insertion = None
443
451
444 return mode, code, insertion
452 return mode, code, insertion
445
453
446 def get_tokens_unprocessed(self, text):
454 def get_tokens_unprocessed(self, text):
447 self.reset()
455 self.reset()
448 for match in line_re.finditer(text):
456 for match in line_re.finditer(text):
449 line = match.group()
457 line = match.group()
450 mode, code, insertion = self.get_mci(line)
458 mode, code, insertion = self.get_mci(line)
451
459
452 if mode != self.mode:
460 if mode != self.mode:
453 # Yield buffered tokens before transitioning to new mode.
461 # Yield buffered tokens before transitioning to new mode.
454 for token in self.buffered_tokens():
462 for token in self.buffered_tokens():
455 yield token
463 yield token
456 self.mode = mode
464 self.mode = mode
457
465
458 if insertion:
466 if insertion:
459 self.insertions.append((len(self.buffer), [insertion]))
467 self.insertions.append((len(self.buffer), [insertion]))
460 self.buffer += code
468 self.buffer += code
461 else:
469 else:
462 for token in self.buffered_tokens():
470 for token in self.buffered_tokens():
463 yield token
471 yield token
464
472
465 class IPyLexer(Lexer):
473 class IPyLexer(Lexer):
466 """
474 """
467 Primary lexer for all IPython-like code.
475 Primary lexer for all IPython-like code.
468
476
469 This is a simple helper lexer. If the first line of the text begins with
477 This is a simple helper lexer. If the first line of the text begins with
470 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
478 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
471 lexer. If not, then the entire text is parsed with an IPython lexer.
479 lexer. If not, then the entire text is parsed with an IPython lexer.
472
480
473 The goal is to reduce the number of lexers that are registered
481 The goal is to reduce the number of lexers that are registered
474 with Pygments.
482 with Pygments.
475
483
476 """
484 """
477 name = 'IPy session'
485 name = 'IPy session'
478 aliases = ['ipy']
486 aliases = ['ipy']
479
487
480 def __init__(self, **options):
488 def __init__(self, **options):
481 self.python3 = get_bool_opt(options, 'python3', False)
489 self.python3 = get_bool_opt(options, 'python3', False)
490 if self.python3:
491 self.aliases = ['ipy3']
492 else:
493 self.aliases = ['ipy2', 'ipy']
494
482 Lexer.__init__(self, **options)
495 Lexer.__init__(self, **options)
483
496
484 self.IPythonLexer = IPythonLexer(**options)
497 self.IPythonLexer = IPythonLexer(**options)
485 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
498 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
486
499
487 def get_tokens_unprocessed(self, text):
500 def get_tokens_unprocessed(self, text):
488 if re.match(r'(In \[[0-9]+\]:)', text.strip()):
501 if re.match(r'(In \[[0-9]+\]:)', text.strip()):
489 lex = self.IPythonConsoleLexer
502 lex = self.IPythonConsoleLexer
490 else:
503 else:
491 lex = self.IPythonLexer
504 lex = self.IPythonLexer
492 for token in lex.get_tokens_unprocessed(text):
505 for token in lex.get_tokens_unprocessed(text):
493 yield token
506 yield token
494
507
@@ -1,27 +1,27
1 """
1 """
2 reST directive for syntax-highlighting ipython interactive sessions.
2 reST directive for syntax-highlighting ipython interactive sessions.
3
3
4 """
4 """
5
5
6 from sphinx import highlighting
6 from sphinx import highlighting
7 from ..nbconvert.utils.lexers import IPyLexer
7 from ..nbconvert.utils.lexers import IPyLexer
8
8
9 def setup(app):
9 def setup(app):
10 """Setup as a sphinx extension."""
10 """Setup as a sphinx extension."""
11
11
12 # This is only a lexer, so adding it below to pygments appears sufficient.
12 # This is only a lexer, so adding it below to pygments appears sufficient.
13 # But if somebody knows what the right API usage should be to do that via
13 # But if somebody knows what the right API usage should be to do that via
14 # sphinx, by all means fix it here. At least having this setup.py
14 # sphinx, by all means fix it here. At least having this setup.py
15 # suppresses the sphinx warning we'd get without it.
15 # suppresses the sphinx warning we'd get without it.
16 pass
16 pass
17
17
18 # Register the extension as a valid pygments lexer.
18 # Register the extension as a valid pygments lexer.
19 # Alternatively, we could register the lexer with pygments instead. This would
19 # Alternatively, we could register the lexer with pygments instead. This would
20 # require using setuptools entrypoints: http://pygments.org/docs/plugins
20 # require using setuptools entrypoints: http://pygments.org/docs/plugins
21
21
22 ipy = IPyLexer(python3=False)
22 ipy2 = IPyLexer(python3=False)
23 ipy3 = IPyLexer(python3=True)
23 ipy3 = IPyLexer(python3=True)
24 ipy3.aliases = ['ipy3']
25
24
26 highlighting.lexers['ipython'] = ipy
25 highlighting.lexers['ipython'] = ipy2
26 highlighting.lexers['ipython2'] = ipy2
27 highlighting.lexers['ipython3'] = ipy3
27 highlighting.lexers['ipython3'] = ipy3
General Comments 0
You need to be logged in to leave comments. Login now