##// END OF EJS Templates
Closes #7558: Added a rule for cell magics.
Lev Abalkin -
Show More
@@ -1,506 +1,507 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 """
2 """
3 Defines a variety of Pygments lexers for highlighting IPython code.
3 Defines a variety of Pygments lexers for highlighting IPython code.
4
4
5 This includes:
5 This includes:
6
6
7 IPythonLexer, IPython3Lexer
7 IPythonLexer, IPython3Lexer
8 Lexers for pure IPython (python + magic/shell commands)
8 Lexers for pure IPython (python + magic/shell commands)
9
9
10 IPythonPartialTracebackLexer, IPythonTracebackLexer
10 IPythonPartialTracebackLexer, IPythonTracebackLexer
11 Supports 2.x and 3.x via keyword `python3`. The partial traceback
11 Supports 2.x and 3.x via keyword `python3`. The partial traceback
12 lexer reads everything but the Python code appearing in a traceback.
12 lexer reads everything but the Python code appearing in a traceback.
13 The full lexer combines the partial lexer with an IPython lexer.
13 The full lexer combines the partial lexer with an IPython lexer.
14
14
15 IPythonConsoleLexer
15 IPythonConsoleLexer
16 A lexer for IPython console sessions, with support for tracebacks.
16 A lexer for IPython console sessions, with support for tracebacks.
17
17
18 IPyLexer
18 IPyLexer
19 A friendly lexer which examines the first line of text and from it,
19 A friendly lexer which examines the first line of text and from it,
20 decides whether to use an IPython lexer or an IPython console lexer.
20 decides whether to use an IPython lexer or an IPython console lexer.
21 This is probably the only lexer that needs to be explicitly added
21 This is probably the only lexer that needs to be explicitly added
22 to Pygments.
22 to Pygments.
23
23
24 """
24 """
25 #-----------------------------------------------------------------------------
25 #-----------------------------------------------------------------------------
26 # Copyright (c) 2013, the IPython Development Team.
26 # Copyright (c) 2013, the IPython Development Team.
27 #
27 #
28 # Distributed under the terms of the Modified BSD License.
28 # Distributed under the terms of the Modified BSD License.
29 #
29 #
30 # The full license is in the file COPYING.txt, distributed with this software.
30 # The full license is in the file COPYING.txt, distributed with this software.
31 #-----------------------------------------------------------------------------
31 #-----------------------------------------------------------------------------
32
32
33 # Standard library
33 # Standard library
34 import re
34 import re
35
35
36 # Third party
36 # Third party
37 from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
37 from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
38 from pygments.lexer import (
38 from pygments.lexer import (
39 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
39 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
40 )
40 )
41 from pygments.token import (
41 from pygments.token import (
42 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
42 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
43 )
43 )
44 from pygments.util import get_bool_opt
44 from pygments.util import get_bool_opt
45
45
46 # Local
46 # Local
47 from IPython.testing.skipdoctest import skip_doctest
47 from IPython.testing.skipdoctest import skip_doctest
48
48
49 line_re = re.compile('.*?\n')
49 line_re = re.compile('.*?\n')
50
50
51 ipython_tokens = [
51 ipython_tokens = [
52 (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
52 (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
53 (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
53 using(BashLexer), Text)),
54 using(BashLexer), Text)),
54 (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
55 (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
55 (r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
56 (r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
56 (r'^(.+)(=)(\s*)(!)(.+)(\n)', bygroups(
57 (r'^(.+)(=)(\s*)(!)(.+)(\n)', bygroups(
57 # With the limited syntax allowed on the l.h.s. of a shell capture,
58 # With the limited syntax allowed on the l.h.s. of a shell capture,
58 # we don't need to differentiate between Python 2 and 3.
59 # we don't need to differentiate between Python 2 and 3.
59 using(Python3Lexer), Operator, Text, Operator, using(BashLexer), Text)),
60 using(Python3Lexer), Operator, Text, Operator, using(BashLexer), Text)),
60 ]
61 ]
61
62
62 def build_ipy_lexer(python3):
63 def build_ipy_lexer(python3):
63 """Builds IPython lexers depending on the value of `python3`.
64 """Builds IPython lexers depending on the value of `python3`.
64
65
65 The lexer inherits from an appropriate Python lexer and then adds
66 The lexer inherits from an appropriate Python lexer and then adds
66 information about IPython specific keywords (i.e. magic commands,
67 information about IPython specific keywords (i.e. magic commands,
67 shell commands, etc.)
68 shell commands, etc.)
68
69
69 Parameters
70 Parameters
70 ----------
71 ----------
71 python3 : bool
72 python3 : bool
72 If `True`, then build an IPython lexer from a Python 3 lexer.
73 If `True`, then build an IPython lexer from a Python 3 lexer.
73
74
74 """
75 """
75 # It would be nice to have a single IPython lexer class which takes
76 # It would be nice to have a single IPython lexer class which takes
76 # a boolean `python3`. But since there are two Python lexer classes,
77 # a boolean `python3`. But since there are two Python lexer classes,
77 # we will also have two IPython lexer classes.
78 # we will also have two IPython lexer classes.
78 if python3:
79 if python3:
79 PyLexer = Python3Lexer
80 PyLexer = Python3Lexer
80 clsname = 'IPython3Lexer'
81 clsname = 'IPython3Lexer'
81 name = 'IPython3'
82 name = 'IPython3'
82 aliases = ['ipython3']
83 aliases = ['ipython3']
83 doc = """IPython3 Lexer"""
84 doc = """IPython3 Lexer"""
84 else:
85 else:
85 PyLexer = PythonLexer
86 PyLexer = PythonLexer
86 clsname = 'IPythonLexer'
87 clsname = 'IPythonLexer'
87 name = 'IPython'
88 name = 'IPython'
88 aliases = ['ipython2', 'ipython']
89 aliases = ['ipython2', 'ipython']
89 doc = """IPython Lexer"""
90 doc = """IPython Lexer"""
90
91
91 tokens = PyLexer.tokens.copy()
92 tokens = PyLexer.tokens.copy()
92 tokens['root'] = ipython_tokens + tokens['root']
93 tokens['root'] = ipython_tokens + tokens['root']
93
94
94 attrs = {'name': name, 'aliases': aliases,
95 attrs = {'name': name, 'aliases': aliases,
95 '__doc__': doc, 'tokens': tokens}
96 '__doc__': doc, 'tokens': tokens}
96
97
97 return type(name, (PyLexer,), attrs)
98 return type(name, (PyLexer,), attrs)
98
99
99
100
100 IPython3Lexer = build_ipy_lexer(python3=True)
101 IPython3Lexer = build_ipy_lexer(python3=True)
101 IPythonLexer = build_ipy_lexer(python3=False)
102 IPythonLexer = build_ipy_lexer(python3=False)
102
103
103
104
104 class IPythonPartialTracebackLexer(RegexLexer):
105 class IPythonPartialTracebackLexer(RegexLexer):
105 """
106 """
106 Partial lexer for IPython tracebacks.
107 Partial lexer for IPython tracebacks.
107
108
108 Handles all the non-python output. This works for both Python 2.x and 3.x.
109 Handles all the non-python output. This works for both Python 2.x and 3.x.
109
110
110 """
111 """
111 name = 'IPython Partial Traceback'
112 name = 'IPython Partial Traceback'
112
113
113 tokens = {
114 tokens = {
114 'root': [
115 'root': [
115 # Tracebacks for syntax errors have a different style.
116 # Tracebacks for syntax errors have a different style.
116 # For both types of tracebacks, we mark the first line with
117 # For both types of tracebacks, we mark the first line with
117 # Generic.Traceback. For syntax errors, we mark the filename
118 # Generic.Traceback. For syntax errors, we mark the filename
118 # as we mark the filenames for non-syntax tracebacks.
119 # as we mark the filenames for non-syntax tracebacks.
119 #
120 #
120 # These two regexps define how IPythonConsoleLexer finds a
121 # These two regexps define how IPythonConsoleLexer finds a
121 # traceback.
122 # traceback.
122 #
123 #
123 ## Non-syntax traceback
124 ## Non-syntax traceback
124 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
125 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
125 ## Syntax traceback
126 ## Syntax traceback
126 (r'^( File)(.*)(, line )(\d+\n)',
127 (r'^( File)(.*)(, line )(\d+\n)',
127 bygroups(Generic.Traceback, Name.Namespace,
128 bygroups(Generic.Traceback, Name.Namespace,
128 Generic.Traceback, Literal.Number.Integer)),
129 Generic.Traceback, Literal.Number.Integer)),
129
130
130 # (Exception Identifier)(Whitespace)(Traceback Message)
131 # (Exception Identifier)(Whitespace)(Traceback Message)
131 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
132 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
132 bygroups(Name.Exception, Generic.Whitespace, Text)),
133 bygroups(Name.Exception, Generic.Whitespace, Text)),
133 # (Module/Filename)(Text)(Callee)(Function Signature)
134 # (Module/Filename)(Text)(Callee)(Function Signature)
134 # Better options for callee and function signature?
135 # Better options for callee and function signature?
135 (r'(.*)( in )(.*)(\(.*\)\n)',
136 (r'(.*)( in )(.*)(\(.*\)\n)',
136 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
137 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
137 # Regular line: (Whitespace)(Line Number)(Python Code)
138 # Regular line: (Whitespace)(Line Number)(Python Code)
138 (r'(\s*?)(\d+)(.*?\n)',
139 (r'(\s*?)(\d+)(.*?\n)',
139 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
140 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
140 # Emphasized line: (Arrow)(Line Number)(Python Code)
141 # Emphasized line: (Arrow)(Line Number)(Python Code)
141 # Using Exception token so arrow color matches the Exception.
142 # Using Exception token so arrow color matches the Exception.
142 (r'(-*>?\s?)(\d+)(.*?\n)',
143 (r'(-*>?\s?)(\d+)(.*?\n)',
143 bygroups(Name.Exception, Literal.Number.Integer, Other)),
144 bygroups(Name.Exception, Literal.Number.Integer, Other)),
144 # (Exception Identifier)(Message)
145 # (Exception Identifier)(Message)
145 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
146 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
146 bygroups(Name.Exception, Text)),
147 bygroups(Name.Exception, Text)),
147 # Tag everything else as Other, will be handled later.
148 # Tag everything else as Other, will be handled later.
148 (r'.*\n', Other),
149 (r'.*\n', Other),
149 ],
150 ],
150 }
151 }
151
152
152
153
153 class IPythonTracebackLexer(DelegatingLexer):
154 class IPythonTracebackLexer(DelegatingLexer):
154 """
155 """
155 IPython traceback lexer.
156 IPython traceback lexer.
156
157
157 For doctests, the tracebacks can be snipped as much as desired with the
158 For doctests, the tracebacks can be snipped as much as desired with the
158 exception to the lines that designate a traceback. For non-syntax error
159 exception to the lines that designate a traceback. For non-syntax error
159 tracebacks, this is the line of hyphens. For syntax error tracebacks,
160 tracebacks, this is the line of hyphens. For syntax error tracebacks,
160 this is the line which lists the File and line number.
161 this is the line which lists the File and line number.
161
162
162 """
163 """
163 # The lexer inherits from DelegatingLexer. The "root" lexer is an
164 # The lexer inherits from DelegatingLexer. The "root" lexer is an
164 # appropriate IPython lexer, which depends on the value of the boolean
165 # appropriate IPython lexer, which depends on the value of the boolean
165 # `python3`. First, we parse with the partial IPython traceback lexer.
166 # `python3`. First, we parse with the partial IPython traceback lexer.
166 # Then, any code marked with the "Other" token is delegated to the root
167 # Then, any code marked with the "Other" token is delegated to the root
167 # lexer.
168 # lexer.
168 #
169 #
169 name = 'IPython Traceback'
170 name = 'IPython Traceback'
170 aliases = ['ipythontb']
171 aliases = ['ipythontb']
171
172
172 def __init__(self, **options):
173 def __init__(self, **options):
173 self.python3 = get_bool_opt(options, 'python3', False)
174 self.python3 = get_bool_opt(options, 'python3', False)
174 if self.python3:
175 if self.python3:
175 self.aliases = ['ipython3tb']
176 self.aliases = ['ipython3tb']
176 else:
177 else:
177 self.aliases = ['ipython2tb', 'ipythontb']
178 self.aliases = ['ipython2tb', 'ipythontb']
178
179
179 if self.python3:
180 if self.python3:
180 IPyLexer = IPython3Lexer
181 IPyLexer = IPython3Lexer
181 else:
182 else:
182 IPyLexer = IPythonLexer
183 IPyLexer = IPythonLexer
183
184
184 DelegatingLexer.__init__(self, IPyLexer,
185 DelegatingLexer.__init__(self, IPyLexer,
185 IPythonPartialTracebackLexer, **options)
186 IPythonPartialTracebackLexer, **options)
186
187
187 @skip_doctest
188 @skip_doctest
188 class IPythonConsoleLexer(Lexer):
189 class IPythonConsoleLexer(Lexer):
189 """
190 """
190 An IPython console lexer for IPython code-blocks and doctests, such as:
191 An IPython console lexer for IPython code-blocks and doctests, such as:
191
192
192 .. code-block:: rst
193 .. code-block:: rst
193
194
194 .. code-block:: ipythonconsole
195 .. code-block:: ipythonconsole
195
196
196 In [1]: a = 'foo'
197 In [1]: a = 'foo'
197
198
198 In [2]: a
199 In [2]: a
199 Out[2]: 'foo'
200 Out[2]: 'foo'
200
201
201 In [3]: print a
202 In [3]: print a
202 foo
203 foo
203
204
204 In [4]: 1 / 0
205 In [4]: 1 / 0
205
206
206
207
207 Support is also provided for IPython exceptions:
208 Support is also provided for IPython exceptions:
208
209
209 .. code-block:: rst
210 .. code-block:: rst
210
211
211 .. code-block:: ipythonconsole
212 .. code-block:: ipythonconsole
212
213
213 In [1]: raise Exception
214 In [1]: raise Exception
214
215
215 ---------------------------------------------------------------------------
216 ---------------------------------------------------------------------------
216 Exception Traceback (most recent call last)
217 Exception Traceback (most recent call last)
217 <ipython-input-1-fca2ab0ca76b> in <module>()
218 <ipython-input-1-fca2ab0ca76b> in <module>()
218 ----> 1 raise Exception
219 ----> 1 raise Exception
219
220
220 Exception:
221 Exception:
221
222
222 """
223 """
223 name = 'IPython console session'
224 name = 'IPython console session'
224 aliases = ['ipythonconsole']
225 aliases = ['ipythonconsole']
225 mimetypes = ['text/x-ipython-console']
226 mimetypes = ['text/x-ipython-console']
226
227
227 # The regexps used to determine what is input and what is output.
228 # The regexps used to determine what is input and what is output.
228 # The default prompts for IPython are:
229 # The default prompts for IPython are:
229 #
230 #
230 # c.PromptManager.in_template = 'In [\#]: '
231 # c.PromptManager.in_template = 'In [\#]: '
231 # c.PromptManager.in2_template = ' .\D.: '
232 # c.PromptManager.in2_template = ' .\D.: '
232 # c.PromptManager.out_template = 'Out[\#]: '
233 # c.PromptManager.out_template = 'Out[\#]: '
233 #
234 #
234 in1_regex = r'In \[[0-9]+\]: '
235 in1_regex = r'In \[[0-9]+\]: '
235 in2_regex = r' \.\.+\.: '
236 in2_regex = r' \.\.+\.: '
236 out_regex = r'Out\[[0-9]+\]: '
237 out_regex = r'Out\[[0-9]+\]: '
237
238
238 #: The regex to determine when a traceback starts.
239 #: The regex to determine when a traceback starts.
239 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
240 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
240
241
241 def __init__(self, **options):
242 def __init__(self, **options):
242 """Initialize the IPython console lexer.
243 """Initialize the IPython console lexer.
243
244
244 Parameters
245 Parameters
245 ----------
246 ----------
246 python3 : bool
247 python3 : bool
247 If `True`, then the console inputs are parsed using a Python 3
248 If `True`, then the console inputs are parsed using a Python 3
248 lexer. Otherwise, they are parsed using a Python 2 lexer.
249 lexer. Otherwise, they are parsed using a Python 2 lexer.
249 in1_regex : RegexObject
250 in1_regex : RegexObject
250 The compiled regular expression used to detect the start
251 The compiled regular expression used to detect the start
251 of inputs. Although the IPython configuration setting may have a
252 of inputs. Although the IPython configuration setting may have a
252 trailing whitespace, do not include it in the regex. If `None`,
253 trailing whitespace, do not include it in the regex. If `None`,
253 then the default input prompt is assumed.
254 then the default input prompt is assumed.
254 in2_regex : RegexObject
255 in2_regex : RegexObject
255 The compiled regular expression used to detect the continuation
256 The compiled regular expression used to detect the continuation
256 of inputs. Although the IPython configuration setting may have a
257 of inputs. Although the IPython configuration setting may have a
257 trailing whitespace, do not include it in the regex. If `None`,
258 trailing whitespace, do not include it in the regex. If `None`,
258 then the default input prompt is assumed.
259 then the default input prompt is assumed.
259 out_regex : RegexObject
260 out_regex : RegexObject
260 The compiled regular expression used to detect outputs. If `None`,
261 The compiled regular expression used to detect outputs. If `None`,
261 then the default output prompt is assumed.
262 then the default output prompt is assumed.
262
263
263 """
264 """
264 self.python3 = get_bool_opt(options, 'python3', False)
265 self.python3 = get_bool_opt(options, 'python3', False)
265 if self.python3:
266 if self.python3:
266 self.aliases = ['ipython3console']
267 self.aliases = ['ipython3console']
267 else:
268 else:
268 self.aliases = ['ipython2console', 'ipythonconsole']
269 self.aliases = ['ipython2console', 'ipythonconsole']
269
270
270 in1_regex = options.get('in1_regex', self.in1_regex)
271 in1_regex = options.get('in1_regex', self.in1_regex)
271 in2_regex = options.get('in2_regex', self.in2_regex)
272 in2_regex = options.get('in2_regex', self.in2_regex)
272 out_regex = options.get('out_regex', self.out_regex)
273 out_regex = options.get('out_regex', self.out_regex)
273
274
274 # So that we can work with input and output prompts which have been
275 # So that we can work with input and output prompts which have been
275 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
276 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
276 # we do not do this, then such prompts will be tagged as 'output'.
277 # we do not do this, then such prompts will be tagged as 'output'.
277 # The reason can't just use the rstrip'd variants instead is because
278 # The reason can't just use the rstrip'd variants instead is because
278 # we want any whitespace associated with the prompt to be inserted
279 # we want any whitespace associated with the prompt to be inserted
279 # with the token. This allows formatted code to be modified so as hide
280 # with the token. This allows formatted code to be modified so as hide
280 # the appearance of prompts, with the whitespace included. One example
281 # the appearance of prompts, with the whitespace included. One example
281 # use of this is in copybutton.js from the standard lib Python docs.
282 # use of this is in copybutton.js from the standard lib Python docs.
282 in1_regex_rstrip = in1_regex.rstrip() + '\n'
283 in1_regex_rstrip = in1_regex.rstrip() + '\n'
283 in2_regex_rstrip = in2_regex.rstrip() + '\n'
284 in2_regex_rstrip = in2_regex.rstrip() + '\n'
284 out_regex_rstrip = out_regex.rstrip() + '\n'
285 out_regex_rstrip = out_regex.rstrip() + '\n'
285
286
286 # Compile and save them all.
287 # Compile and save them all.
287 attrs = ['in1_regex', 'in2_regex', 'out_regex',
288 attrs = ['in1_regex', 'in2_regex', 'out_regex',
288 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
289 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
289 for attr in attrs:
290 for attr in attrs:
290 self.__setattr__(attr, re.compile(locals()[attr]))
291 self.__setattr__(attr, re.compile(locals()[attr]))
291
292
292 Lexer.__init__(self, **options)
293 Lexer.__init__(self, **options)
293
294
294 if self.python3:
295 if self.python3:
295 pylexer = IPython3Lexer
296 pylexer = IPython3Lexer
296 tblexer = IPythonTracebackLexer
297 tblexer = IPythonTracebackLexer
297 else:
298 else:
298 pylexer = IPythonLexer
299 pylexer = IPythonLexer
299 tblexer = IPythonTracebackLexer
300 tblexer = IPythonTracebackLexer
300
301
301 self.pylexer = pylexer(**options)
302 self.pylexer = pylexer(**options)
302 self.tblexer = tblexer(**options)
303 self.tblexer = tblexer(**options)
303
304
304 self.reset()
305 self.reset()
305
306
306 def reset(self):
307 def reset(self):
307 self.mode = 'output'
308 self.mode = 'output'
308 self.index = 0
309 self.index = 0
309 self.buffer = u''
310 self.buffer = u''
310 self.insertions = []
311 self.insertions = []
311
312
312 def buffered_tokens(self):
313 def buffered_tokens(self):
313 """
314 """
314 Generator of unprocessed tokens after doing insertions and before
315 Generator of unprocessed tokens after doing insertions and before
315 changing to a new state.
316 changing to a new state.
316
317
317 """
318 """
318 if self.mode == 'output':
319 if self.mode == 'output':
319 tokens = [(0, Generic.Output, self.buffer)]
320 tokens = [(0, Generic.Output, self.buffer)]
320 elif self.mode == 'input':
321 elif self.mode == 'input':
321 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
322 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
322 else: # traceback
323 else: # traceback
323 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
324 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
324
325
325 for i, t, v in do_insertions(self.insertions, tokens):
326 for i, t, v in do_insertions(self.insertions, tokens):
326 # All token indexes are relative to the buffer.
327 # All token indexes are relative to the buffer.
327 yield self.index + i, t, v
328 yield self.index + i, t, v
328
329
329 # Clear it all
330 # Clear it all
330 self.index += len(self.buffer)
331 self.index += len(self.buffer)
331 self.buffer = u''
332 self.buffer = u''
332 self.insertions = []
333 self.insertions = []
333
334
334 def get_mci(self, line):
335 def get_mci(self, line):
335 """
336 """
336 Parses the line and returns a 3-tuple: (mode, code, insertion).
337 Parses the line and returns a 3-tuple: (mode, code, insertion).
337
338
338 `mode` is the next mode (or state) of the lexer, and is always equal
339 `mode` is the next mode (or state) of the lexer, and is always equal
339 to 'input', 'output', or 'tb'.
340 to 'input', 'output', or 'tb'.
340
341
341 `code` is a portion of the line that should be added to the buffer
342 `code` is a portion of the line that should be added to the buffer
342 corresponding to the next mode and eventually lexed by another lexer.
343 corresponding to the next mode and eventually lexed by another lexer.
343 For example, `code` could be Python code if `mode` were 'input'.
344 For example, `code` could be Python code if `mode` were 'input'.
344
345
345 `insertion` is a 3-tuple (index, token, text) representing an
346 `insertion` is a 3-tuple (index, token, text) representing an
346 unprocessed "token" that will be inserted into the stream of tokens
347 unprocessed "token" that will be inserted into the stream of tokens
347 that are created from the buffer once we change modes. This is usually
348 that are created from the buffer once we change modes. This is usually
348 the input or output prompt.
349 the input or output prompt.
349
350
350 In general, the next mode depends on current mode and on the contents
351 In general, the next mode depends on current mode and on the contents
351 of `line`.
352 of `line`.
352
353
353 """
354 """
354 # To reduce the number of regex match checks, we have multiple
355 # To reduce the number of regex match checks, we have multiple
355 # 'if' blocks instead of 'if-elif' blocks.
356 # 'if' blocks instead of 'if-elif' blocks.
356
357
357 # Check for possible end of input
358 # Check for possible end of input
358 in2_match = self.in2_regex.match(line)
359 in2_match = self.in2_regex.match(line)
359 in2_match_rstrip = self.in2_regex_rstrip.match(line)
360 in2_match_rstrip = self.in2_regex_rstrip.match(line)
360 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
361 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
361 in2_match_rstrip:
362 in2_match_rstrip:
362 end_input = True
363 end_input = True
363 else:
364 else:
364 end_input = False
365 end_input = False
365 if end_input and self.mode != 'tb':
366 if end_input and self.mode != 'tb':
366 # Only look for an end of input when not in tb mode.
367 # Only look for an end of input when not in tb mode.
367 # An ellipsis could appear within the traceback.
368 # An ellipsis could appear within the traceback.
368 mode = 'output'
369 mode = 'output'
369 code = u''
370 code = u''
370 insertion = (0, Generic.Prompt, line)
371 insertion = (0, Generic.Prompt, line)
371 return mode, code, insertion
372 return mode, code, insertion
372
373
373 # Check for output prompt
374 # Check for output prompt
374 out_match = self.out_regex.match(line)
375 out_match = self.out_regex.match(line)
375 out_match_rstrip = self.out_regex_rstrip.match(line)
376 out_match_rstrip = self.out_regex_rstrip.match(line)
376 if out_match or out_match_rstrip:
377 if out_match or out_match_rstrip:
377 mode = 'output'
378 mode = 'output'
378 if out_match:
379 if out_match:
379 idx = out_match.end()
380 idx = out_match.end()
380 else:
381 else:
381 idx = out_match_rstrip.end()
382 idx = out_match_rstrip.end()
382 code = line[idx:]
383 code = line[idx:]
383 # Use the 'heading' token for output. We cannot use Generic.Error
384 # Use the 'heading' token for output. We cannot use Generic.Error
384 # since it would conflict with exceptions.
385 # since it would conflict with exceptions.
385 insertion = (0, Generic.Heading, line[:idx])
386 insertion = (0, Generic.Heading, line[:idx])
386 return mode, code, insertion
387 return mode, code, insertion
387
388
388
389
389 # Check for input or continuation prompt (non stripped version)
390 # Check for input or continuation prompt (non stripped version)
390 in1_match = self.in1_regex.match(line)
391 in1_match = self.in1_regex.match(line)
391 if in1_match or (in2_match and self.mode != 'tb'):
392 if in1_match or (in2_match and self.mode != 'tb'):
392 # New input or when not in tb, continued input.
393 # New input or when not in tb, continued input.
393 # We do not check for continued input when in tb since it is
394 # We do not check for continued input when in tb since it is
394 # allowable to replace a long stack with an ellipsis.
395 # allowable to replace a long stack with an ellipsis.
395 mode = 'input'
396 mode = 'input'
396 if in1_match:
397 if in1_match:
397 idx = in1_match.end()
398 idx = in1_match.end()
398 else: # in2_match
399 else: # in2_match
399 idx = in2_match.end()
400 idx = in2_match.end()
400 code = line[idx:]
401 code = line[idx:]
401 insertion = (0, Generic.Prompt, line[:idx])
402 insertion = (0, Generic.Prompt, line[:idx])
402 return mode, code, insertion
403 return mode, code, insertion
403
404
404 # Check for input or continuation prompt (stripped version)
405 # Check for input or continuation prompt (stripped version)
405 in1_match_rstrip = self.in1_regex_rstrip.match(line)
406 in1_match_rstrip = self.in1_regex_rstrip.match(line)
406 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
407 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
407 # New input or when not in tb, continued input.
408 # New input or when not in tb, continued input.
408 # We do not check for continued input when in tb since it is
409 # We do not check for continued input when in tb since it is
409 # allowable to replace a long stack with an ellipsis.
410 # allowable to replace a long stack with an ellipsis.
410 mode = 'input'
411 mode = 'input'
411 if in1_match_rstrip:
412 if in1_match_rstrip:
412 idx = in1_match_rstrip.end()
413 idx = in1_match_rstrip.end()
413 else: # in2_match
414 else: # in2_match
414 idx = in2_match_rstrip.end()
415 idx = in2_match_rstrip.end()
415 code = line[idx:]
416 code = line[idx:]
416 insertion = (0, Generic.Prompt, line[:idx])
417 insertion = (0, Generic.Prompt, line[:idx])
417 return mode, code, insertion
418 return mode, code, insertion
418
419
419 # Check for traceback
420 # Check for traceback
420 if self.ipytb_start.match(line):
421 if self.ipytb_start.match(line):
421 mode = 'tb'
422 mode = 'tb'
422 code = line
423 code = line
423 insertion = None
424 insertion = None
424 return mode, code, insertion
425 return mode, code, insertion
425
426
426 # All other stuff...
427 # All other stuff...
427 if self.mode in ('input', 'output'):
428 if self.mode in ('input', 'output'):
428 # We assume all other text is output. Multiline input that
429 # We assume all other text is output. Multiline input that
429 # does not use the continuation marker cannot be detected.
430 # does not use the continuation marker cannot be detected.
430 # For example, the 3 in the following is clearly output:
431 # For example, the 3 in the following is clearly output:
431 #
432 #
432 # In [1]: print 3
433 # In [1]: print 3
433 # 3
434 # 3
434 #
435 #
435 # But the following second line is part of the input:
436 # But the following second line is part of the input:
436 #
437 #
437 # In [2]: while True:
438 # In [2]: while True:
438 # print True
439 # print True
439 #
440 #
440 # In both cases, the 2nd line will be 'output'.
441 # In both cases, the 2nd line will be 'output'.
441 #
442 #
442 mode = 'output'
443 mode = 'output'
443 else:
444 else:
444 mode = 'tb'
445 mode = 'tb'
445
446
446 code = line
447 code = line
447 insertion = None
448 insertion = None
448
449
449 return mode, code, insertion
450 return mode, code, insertion
450
451
451 def get_tokens_unprocessed(self, text):
452 def get_tokens_unprocessed(self, text):
452 self.reset()
453 self.reset()
453 for match in line_re.finditer(text):
454 for match in line_re.finditer(text):
454 line = match.group()
455 line = match.group()
455 mode, code, insertion = self.get_mci(line)
456 mode, code, insertion = self.get_mci(line)
456
457
457 if mode != self.mode:
458 if mode != self.mode:
458 # Yield buffered tokens before transitioning to new mode.
459 # Yield buffered tokens before transitioning to new mode.
459 for token in self.buffered_tokens():
460 for token in self.buffered_tokens():
460 yield token
461 yield token
461 self.mode = mode
462 self.mode = mode
462
463
463 if insertion:
464 if insertion:
464 self.insertions.append((len(self.buffer), [insertion]))
465 self.insertions.append((len(self.buffer), [insertion]))
465 self.buffer += code
466 self.buffer += code
466 else:
467 else:
467 for token in self.buffered_tokens():
468 for token in self.buffered_tokens():
468 yield token
469 yield token
469
470
470 class IPyLexer(Lexer):
471 class IPyLexer(Lexer):
471 """
472 """
472 Primary lexer for all IPython-like code.
473 Primary lexer for all IPython-like code.
473
474
474 This is a simple helper lexer. If the first line of the text begins with
475 This is a simple helper lexer. If the first line of the text begins with
475 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
476 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
476 lexer. If not, then the entire text is parsed with an IPython lexer.
477 lexer. If not, then the entire text is parsed with an IPython lexer.
477
478
478 The goal is to reduce the number of lexers that are registered
479 The goal is to reduce the number of lexers that are registered
479 with Pygments.
480 with Pygments.
480
481
481 """
482 """
482 name = 'IPy session'
483 name = 'IPy session'
483 aliases = ['ipy']
484 aliases = ['ipy']
484
485
485 def __init__(self, **options):
486 def __init__(self, **options):
486 self.python3 = get_bool_opt(options, 'python3', False)
487 self.python3 = get_bool_opt(options, 'python3', False)
487 if self.python3:
488 if self.python3:
488 self.aliases = ['ipy3']
489 self.aliases = ['ipy3']
489 else:
490 else:
490 self.aliases = ['ipy2', 'ipy']
491 self.aliases = ['ipy2', 'ipy']
491
492
492 Lexer.__init__(self, **options)
493 Lexer.__init__(self, **options)
493
494
494 self.IPythonLexer = IPythonLexer(**options)
495 self.IPythonLexer = IPythonLexer(**options)
495 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
496 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
496
497
497 def get_tokens_unprocessed(self, text):
498 def get_tokens_unprocessed(self, text):
498 # Search for the input prompt anywhere...this allows code blocks to
499 # Search for the input prompt anywhere...this allows code blocks to
499 # begin with comments as well.
500 # begin with comments as well.
500 if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
501 if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
501 lex = self.IPythonConsoleLexer
502 lex = self.IPythonConsoleLexer
502 else:
503 else:
503 lex = self.IPythonLexer
504 lex = self.IPythonLexer
504 for token in lex.get_tokens_unprocessed(text):
505 for token in lex.get_tokens_unprocessed(text):
505 yield token
506 yield token
506
507
@@ -1,79 +1,88 b''
1 """Test lexers module"""
1 """Test lexers module"""
2 #-----------------------------------------------------------------------------
2 #-----------------------------------------------------------------------------
3 # Copyright (C) 2014 The IPython Development Team
3 # Copyright (C) 2014 The IPython Development Team
4 #
4 #
5 # Distributed under the terms of the BSD License. The full license is in
5 # Distributed under the terms of the BSD License. The full license is in
6 # the file COPYING, distributed as part of this software.
6 # the file COPYING, distributed as part of this software.
7 #-----------------------------------------------------------------------------
7 #-----------------------------------------------------------------------------
8
8
9 #-----------------------------------------------------------------------------
9 #-----------------------------------------------------------------------------
10 # Imports
10 # Imports
11 #-----------------------------------------------------------------------------
11 #-----------------------------------------------------------------------------
12 from pygments.token import Token
12 from pygments.token import Token
13
13
14 from IPython.nbconvert.tests.base import TestsBase
14 from IPython.nbconvert.tests.base import TestsBase
15 from .. import lexers
15 from .. import lexers
16
16
17
17
18 #-----------------------------------------------------------------------------
18 #-----------------------------------------------------------------------------
19 # Classes and functions
19 # Classes and functions
20 #-----------------------------------------------------------------------------
20 #-----------------------------------------------------------------------------
21 class TestLexers(TestsBase):
21 class TestLexers(TestsBase):
22 """Collection of lexers tests"""
22 """Collection of lexers tests"""
23 def setUp(self):
23 def setUp(self):
24 self.lexer = lexers.IPythonLexer()
24 self.lexer = lexers.IPythonLexer()
25
25
26 def testIPythonLexer(self):
26 def testIPythonLexer(self):
27 fragment = '!echo $HOME\n'
27 fragment = '!echo $HOME\n'
28 tokens = [
28 tokens = [
29 (Token.Operator, '!'),
29 (Token.Operator, '!'),
30 (Token.Name.Builtin, 'echo'),
30 (Token.Name.Builtin, 'echo'),
31 (Token.Text, ' '),
31 (Token.Text, ' '),
32 (Token.Name.Variable, '$HOME'),
32 (Token.Name.Variable, '$HOME'),
33 (Token.Text, '\n'),
33 (Token.Text, '\n'),
34 ]
34 ]
35 self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
35 self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
36
36
37 fragment_2 = 'x = ' + fragment
37 fragment_2 = 'x = ' + fragment
38 tokens_2 = [
38 tokens_2 = [
39 (Token.Name, 'x'),
39 (Token.Name, 'x'),
40 (Token.Text, ' '),
40 (Token.Text, ' '),
41 (Token.Operator, '='),
41 (Token.Operator, '='),
42 (Token.Text, ' '),
42 (Token.Text, ' '),
43 ] + tokens
43 ] + tokens
44 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
44 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
45
45
46 fragment_2 = 'x, = ' + fragment
46 fragment_2 = 'x, = ' + fragment
47 tokens_2 = [
47 tokens_2 = [
48 (Token.Name, 'x'),
48 (Token.Name, 'x'),
49 (Token.Punctuation, ','),
49 (Token.Punctuation, ','),
50 (Token.Text, ' '),
50 (Token.Text, ' '),
51 (Token.Operator, '='),
51 (Token.Operator, '='),
52 (Token.Text, ' '),
52 (Token.Text, ' '),
53 ] + tokens
53 ] + tokens
54 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
54 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
55
55
56 fragment_2 = 'x, = %sx ' + fragment[1:]
56 fragment_2 = 'x, = %sx ' + fragment[1:]
57 tokens_2 = [
57 tokens_2 = [
58 (Token.Name, 'x'),
58 (Token.Name, 'x'),
59 (Token.Punctuation, ','),
59 (Token.Punctuation, ','),
60 (Token.Text, ' '),
60 (Token.Text, ' '),
61 (Token.Operator, '='),
61 (Token.Operator, '='),
62 (Token.Text, ' '),
62 (Token.Text, ' '),
63 (Token.Operator, '%'),
63 (Token.Operator, '%'),
64 (Token.Keyword, 'sx'),
64 (Token.Keyword, 'sx'),
65 (Token.Text, ' '),
65 (Token.Text, ' '),
66 ] + tokens[1:]
66 ] + tokens[1:]
67 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
67 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
68
68
69 fragment_2 = 'f = %R function () {}\n'
69 fragment_2 = 'f = %R function () {}\n'
70 tokens_2 = [
70 tokens_2 = [
71 (Token.Name, 'f'),
71 (Token.Name, 'f'),
72 (Token.Text, ' '),
72 (Token.Text, ' '),
73 (Token.Operator, '='),
73 (Token.Operator, '='),
74 (Token.Text, ' '),
74 (Token.Text, ' '),
75 (Token.Operator, '%'),
75 (Token.Operator, '%'),
76 (Token.Keyword, 'R'),
76 (Token.Keyword, 'R'),
77 (Token.Text, ' function () {}\n'),
77 (Token.Text, ' function () {}\n'),
78 ]
78 ]
79 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
79 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
80
81 fragment_2 = '\t%%xyz\n$foo\n'
82 tokens_2 = [
83 (Token.Text, '\t'),
84 (Token.Operator, '%%'),
85 (Token.Keyword, 'xyz'),
86 (Token.Text, '\n$foo\n'),
87 ]
88 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
General Comments 0
You need to be logged in to leave comments. Login now