##// END OF EJS Templates
#7558: Simplified handling of the ! escape.
Lev Abalkin -
Show More
@@ -1,507 +1,504
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 """
2 """
3 Defines a variety of Pygments lexers for highlighting IPython code.
3 Defines a variety of Pygments lexers for highlighting IPython code.
4
4
5 This includes:
5 This includes:
6
6
7 IPythonLexer, IPython3Lexer
7 IPythonLexer, IPython3Lexer
8 Lexers for pure IPython (python + magic/shell commands)
8 Lexers for pure IPython (python + magic/shell commands)
9
9
10 IPythonPartialTracebackLexer, IPythonTracebackLexer
10 IPythonPartialTracebackLexer, IPythonTracebackLexer
11 Supports 2.x and 3.x via keyword `python3`. The partial traceback
11 Supports 2.x and 3.x via keyword `python3`. The partial traceback
12 lexer reads everything but the Python code appearing in a traceback.
12 lexer reads everything but the Python code appearing in a traceback.
13 The full lexer combines the partial lexer with an IPython lexer.
13 The full lexer combines the partial lexer with an IPython lexer.
14
14
15 IPythonConsoleLexer
15 IPythonConsoleLexer
16 A lexer for IPython console sessions, with support for tracebacks.
16 A lexer for IPython console sessions, with support for tracebacks.
17
17
18 IPyLexer
18 IPyLexer
19 A friendly lexer which examines the first line of text and from it,
19 A friendly lexer which examines the first line of text and from it,
20 decides whether to use an IPython lexer or an IPython console lexer.
20 decides whether to use an IPython lexer or an IPython console lexer.
21 This is probably the only lexer that needs to be explicitly added
21 This is probably the only lexer that needs to be explicitly added
22 to Pygments.
22 to Pygments.
23
23
24 """
24 """
25 #-----------------------------------------------------------------------------
25 #-----------------------------------------------------------------------------
26 # Copyright (c) 2013, the IPython Development Team.
26 # Copyright (c) 2013, the IPython Development Team.
27 #
27 #
28 # Distributed under the terms of the Modified BSD License.
28 # Distributed under the terms of the Modified BSD License.
29 #
29 #
30 # The full license is in the file COPYING.txt, distributed with this software.
30 # The full license is in the file COPYING.txt, distributed with this software.
31 #-----------------------------------------------------------------------------
31 #-----------------------------------------------------------------------------
32
32
33 # Standard library
33 # Standard library
34 import re
34 import re
35
35
36 # Third party
36 # Third party
37 from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
37 from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
38 from pygments.lexer import (
38 from pygments.lexer import (
39 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
39 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
40 )
40 )
41 from pygments.token import (
41 from pygments.token import (
42 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
42 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
43 )
43 )
44 from pygments.util import get_bool_opt
44 from pygments.util import get_bool_opt
45
45
46 # Local
46 # Local
47 from IPython.testing.skipdoctest import skip_doctest
47 from IPython.testing.skipdoctest import skip_doctest
48
48
49 line_re = re.compile('.*?\n')
49 line_re = re.compile('.*?\n')
50
50
51 ipython_tokens = [
51 ipython_tokens = [
52 (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
52 (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
53 (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
53 (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
54 using(BashLexer), Text)),
54 using(BashLexer), Text)),
55 (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
55 (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
56 (r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
56 (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
57 (r'^(.+)(=)(\s*)(!)(.+)(\n)', bygroups(
57 (r'(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
58 # With the limited syntax allowed on the l.h.s. of a shell capture,
59 # we don't need to differentiate between Python 2 and 3.
60 using(Python3Lexer), Operator, Text, Operator, using(BashLexer), Text)),
61 ]
58 ]
62
59
63 def build_ipy_lexer(python3):
60 def build_ipy_lexer(python3):
64 """Builds IPython lexers depending on the value of `python3`.
61 """Builds IPython lexers depending on the value of `python3`.
65
62
66 The lexer inherits from an appropriate Python lexer and then adds
63 The lexer inherits from an appropriate Python lexer and then adds
67 information about IPython specific keywords (i.e. magic commands,
64 information about IPython specific keywords (i.e. magic commands,
68 shell commands, etc.)
65 shell commands, etc.)
69
66
70 Parameters
67 Parameters
71 ----------
68 ----------
72 python3 : bool
69 python3 : bool
73 If `True`, then build an IPython lexer from a Python 3 lexer.
70 If `True`, then build an IPython lexer from a Python 3 lexer.
74
71
75 """
72 """
76 # It would be nice to have a single IPython lexer class which takes
73 # It would be nice to have a single IPython lexer class which takes
77 # a boolean `python3`. But since there are two Python lexer classes,
74 # a boolean `python3`. But since there are two Python lexer classes,
78 # we will also have two IPython lexer classes.
75 # we will also have two IPython lexer classes.
79 if python3:
76 if python3:
80 PyLexer = Python3Lexer
77 PyLexer = Python3Lexer
81 clsname = 'IPython3Lexer'
78 clsname = 'IPython3Lexer'
82 name = 'IPython3'
79 name = 'IPython3'
83 aliases = ['ipython3']
80 aliases = ['ipython3']
84 doc = """IPython3 Lexer"""
81 doc = """IPython3 Lexer"""
85 else:
82 else:
86 PyLexer = PythonLexer
83 PyLexer = PythonLexer
87 clsname = 'IPythonLexer'
84 clsname = 'IPythonLexer'
88 name = 'IPython'
85 name = 'IPython'
89 aliases = ['ipython2', 'ipython']
86 aliases = ['ipython2', 'ipython']
90 doc = """IPython Lexer"""
87 doc = """IPython Lexer"""
91
88
92 tokens = PyLexer.tokens.copy()
89 tokens = PyLexer.tokens.copy()
93 tokens['root'] = ipython_tokens + tokens['root']
90 tokens['root'] = ipython_tokens + tokens['root']
94
91
95 attrs = {'name': name, 'aliases': aliases,
92 attrs = {'name': name, 'aliases': aliases,
96 '__doc__': doc, 'tokens': tokens}
93 '__doc__': doc, 'tokens': tokens}
97
94
98 return type(name, (PyLexer,), attrs)
95 return type(name, (PyLexer,), attrs)
99
96
100
97
101 IPython3Lexer = build_ipy_lexer(python3=True)
98 IPython3Lexer = build_ipy_lexer(python3=True)
102 IPythonLexer = build_ipy_lexer(python3=False)
99 IPythonLexer = build_ipy_lexer(python3=False)
103
100
104
101
105 class IPythonPartialTracebackLexer(RegexLexer):
102 class IPythonPartialTracebackLexer(RegexLexer):
106 """
103 """
107 Partial lexer for IPython tracebacks.
104 Partial lexer for IPython tracebacks.
108
105
109 Handles all the non-python output. This works for both Python 2.x and 3.x.
106 Handles all the non-python output. This works for both Python 2.x and 3.x.
110
107
111 """
108 """
112 name = 'IPython Partial Traceback'
109 name = 'IPython Partial Traceback'
113
110
114 tokens = {
111 tokens = {
115 'root': [
112 'root': [
116 # Tracebacks for syntax errors have a different style.
113 # Tracebacks for syntax errors have a different style.
117 # For both types of tracebacks, we mark the first line with
114 # For both types of tracebacks, we mark the first line with
118 # Generic.Traceback. For syntax errors, we mark the filename
115 # Generic.Traceback. For syntax errors, we mark the filename
119 # as we mark the filenames for non-syntax tracebacks.
116 # as we mark the filenames for non-syntax tracebacks.
120 #
117 #
121 # These two regexps define how IPythonConsoleLexer finds a
118 # These two regexps define how IPythonConsoleLexer finds a
122 # traceback.
119 # traceback.
123 #
120 #
124 ## Non-syntax traceback
121 ## Non-syntax traceback
125 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
122 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
126 ## Syntax traceback
123 ## Syntax traceback
127 (r'^( File)(.*)(, line )(\d+\n)',
124 (r'^( File)(.*)(, line )(\d+\n)',
128 bygroups(Generic.Traceback, Name.Namespace,
125 bygroups(Generic.Traceback, Name.Namespace,
129 Generic.Traceback, Literal.Number.Integer)),
126 Generic.Traceback, Literal.Number.Integer)),
130
127
131 # (Exception Identifier)(Whitespace)(Traceback Message)
128 # (Exception Identifier)(Whitespace)(Traceback Message)
132 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
129 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
133 bygroups(Name.Exception, Generic.Whitespace, Text)),
130 bygroups(Name.Exception, Generic.Whitespace, Text)),
134 # (Module/Filename)(Text)(Callee)(Function Signature)
131 # (Module/Filename)(Text)(Callee)(Function Signature)
135 # Better options for callee and function signature?
132 # Better options for callee and function signature?
136 (r'(.*)( in )(.*)(\(.*\)\n)',
133 (r'(.*)( in )(.*)(\(.*\)\n)',
137 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
134 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
138 # Regular line: (Whitespace)(Line Number)(Python Code)
135 # Regular line: (Whitespace)(Line Number)(Python Code)
139 (r'(\s*?)(\d+)(.*?\n)',
136 (r'(\s*?)(\d+)(.*?\n)',
140 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
137 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
141 # Emphasized line: (Arrow)(Line Number)(Python Code)
138 # Emphasized line: (Arrow)(Line Number)(Python Code)
142 # Using Exception token so arrow color matches the Exception.
139 # Using Exception token so arrow color matches the Exception.
143 (r'(-*>?\s?)(\d+)(.*?\n)',
140 (r'(-*>?\s?)(\d+)(.*?\n)',
144 bygroups(Name.Exception, Literal.Number.Integer, Other)),
141 bygroups(Name.Exception, Literal.Number.Integer, Other)),
145 # (Exception Identifier)(Message)
142 # (Exception Identifier)(Message)
146 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
143 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
147 bygroups(Name.Exception, Text)),
144 bygroups(Name.Exception, Text)),
148 # Tag everything else as Other, will be handled later.
145 # Tag everything else as Other, will be handled later.
149 (r'.*\n', Other),
146 (r'.*\n', Other),
150 ],
147 ],
151 }
148 }
152
149
153
150
154 class IPythonTracebackLexer(DelegatingLexer):
151 class IPythonTracebackLexer(DelegatingLexer):
155 """
152 """
156 IPython traceback lexer.
153 IPython traceback lexer.
157
154
158 For doctests, the tracebacks can be snipped as much as desired with the
155 For doctests, the tracebacks can be snipped as much as desired with the
159 exception to the lines that designate a traceback. For non-syntax error
156 exception to the lines that designate a traceback. For non-syntax error
160 tracebacks, this is the line of hyphens. For syntax error tracebacks,
157 tracebacks, this is the line of hyphens. For syntax error tracebacks,
161 this is the line which lists the File and line number.
158 this is the line which lists the File and line number.
162
159
163 """
160 """
164 # The lexer inherits from DelegatingLexer. The "root" lexer is an
161 # The lexer inherits from DelegatingLexer. The "root" lexer is an
165 # appropriate IPython lexer, which depends on the value of the boolean
162 # appropriate IPython lexer, which depends on the value of the boolean
166 # `python3`. First, we parse with the partial IPython traceback lexer.
163 # `python3`. First, we parse with the partial IPython traceback lexer.
167 # Then, any code marked with the "Other" token is delegated to the root
164 # Then, any code marked with the "Other" token is delegated to the root
168 # lexer.
165 # lexer.
169 #
166 #
170 name = 'IPython Traceback'
167 name = 'IPython Traceback'
171 aliases = ['ipythontb']
168 aliases = ['ipythontb']
172
169
173 def __init__(self, **options):
170 def __init__(self, **options):
174 self.python3 = get_bool_opt(options, 'python3', False)
171 self.python3 = get_bool_opt(options, 'python3', False)
175 if self.python3:
172 if self.python3:
176 self.aliases = ['ipython3tb']
173 self.aliases = ['ipython3tb']
177 else:
174 else:
178 self.aliases = ['ipython2tb', 'ipythontb']
175 self.aliases = ['ipython2tb', 'ipythontb']
179
176
180 if self.python3:
177 if self.python3:
181 IPyLexer = IPython3Lexer
178 IPyLexer = IPython3Lexer
182 else:
179 else:
183 IPyLexer = IPythonLexer
180 IPyLexer = IPythonLexer
184
181
185 DelegatingLexer.__init__(self, IPyLexer,
182 DelegatingLexer.__init__(self, IPyLexer,
186 IPythonPartialTracebackLexer, **options)
183 IPythonPartialTracebackLexer, **options)
187
184
188 @skip_doctest
185 @skip_doctest
189 class IPythonConsoleLexer(Lexer):
186 class IPythonConsoleLexer(Lexer):
190 """
187 """
191 An IPython console lexer for IPython code-blocks and doctests, such as:
188 An IPython console lexer for IPython code-blocks and doctests, such as:
192
189
193 .. code-block:: rst
190 .. code-block:: rst
194
191
195 .. code-block:: ipythonconsole
192 .. code-block:: ipythonconsole
196
193
197 In [1]: a = 'foo'
194 In [1]: a = 'foo'
198
195
199 In [2]: a
196 In [2]: a
200 Out[2]: 'foo'
197 Out[2]: 'foo'
201
198
202 In [3]: print a
199 In [3]: print a
203 foo
200 foo
204
201
205 In [4]: 1 / 0
202 In [4]: 1 / 0
206
203
207
204
208 Support is also provided for IPython exceptions:
205 Support is also provided for IPython exceptions:
209
206
210 .. code-block:: rst
207 .. code-block:: rst
211
208
212 .. code-block:: ipythonconsole
209 .. code-block:: ipythonconsole
213
210
214 In [1]: raise Exception
211 In [1]: raise Exception
215
212
216 ---------------------------------------------------------------------------
213 ---------------------------------------------------------------------------
217 Exception Traceback (most recent call last)
214 Exception Traceback (most recent call last)
218 <ipython-input-1-fca2ab0ca76b> in <module>()
215 <ipython-input-1-fca2ab0ca76b> in <module>()
219 ----> 1 raise Exception
216 ----> 1 raise Exception
220
217
221 Exception:
218 Exception:
222
219
223 """
220 """
224 name = 'IPython console session'
221 name = 'IPython console session'
225 aliases = ['ipythonconsole']
222 aliases = ['ipythonconsole']
226 mimetypes = ['text/x-ipython-console']
223 mimetypes = ['text/x-ipython-console']
227
224
228 # The regexps used to determine what is input and what is output.
225 # The regexps used to determine what is input and what is output.
229 # The default prompts for IPython are:
226 # The default prompts for IPython are:
230 #
227 #
231 # c.PromptManager.in_template = 'In [\#]: '
228 # c.PromptManager.in_template = 'In [\#]: '
232 # c.PromptManager.in2_template = ' .\D.: '
229 # c.PromptManager.in2_template = ' .\D.: '
233 # c.PromptManager.out_template = 'Out[\#]: '
230 # c.PromptManager.out_template = 'Out[\#]: '
234 #
231 #
235 in1_regex = r'In \[[0-9]+\]: '
232 in1_regex = r'In \[[0-9]+\]: '
236 in2_regex = r' \.\.+\.: '
233 in2_regex = r' \.\.+\.: '
237 out_regex = r'Out\[[0-9]+\]: '
234 out_regex = r'Out\[[0-9]+\]: '
238
235
239 #: The regex to determine when a traceback starts.
236 #: The regex to determine when a traceback starts.
240 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
237 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
241
238
242 def __init__(self, **options):
239 def __init__(self, **options):
243 """Initialize the IPython console lexer.
240 """Initialize the IPython console lexer.
244
241
245 Parameters
242 Parameters
246 ----------
243 ----------
247 python3 : bool
244 python3 : bool
248 If `True`, then the console inputs are parsed using a Python 3
245 If `True`, then the console inputs are parsed using a Python 3
249 lexer. Otherwise, they are parsed using a Python 2 lexer.
246 lexer. Otherwise, they are parsed using a Python 2 lexer.
250 in1_regex : RegexObject
247 in1_regex : RegexObject
251 The compiled regular expression used to detect the start
248 The compiled regular expression used to detect the start
252 of inputs. Although the IPython configuration setting may have a
249 of inputs. Although the IPython configuration setting may have a
253 trailing whitespace, do not include it in the regex. If `None`,
250 trailing whitespace, do not include it in the regex. If `None`,
254 then the default input prompt is assumed.
251 then the default input prompt is assumed.
255 in2_regex : RegexObject
252 in2_regex : RegexObject
256 The compiled regular expression used to detect the continuation
253 The compiled regular expression used to detect the continuation
257 of inputs. Although the IPython configuration setting may have a
254 of inputs. Although the IPython configuration setting may have a
258 trailing whitespace, do not include it in the regex. If `None`,
255 trailing whitespace, do not include it in the regex. If `None`,
259 then the default input prompt is assumed.
256 then the default input prompt is assumed.
260 out_regex : RegexObject
257 out_regex : RegexObject
261 The compiled regular expression used to detect outputs. If `None`,
258 The compiled regular expression used to detect outputs. If `None`,
262 then the default output prompt is assumed.
259 then the default output prompt is assumed.
263
260
264 """
261 """
265 self.python3 = get_bool_opt(options, 'python3', False)
262 self.python3 = get_bool_opt(options, 'python3', False)
266 if self.python3:
263 if self.python3:
267 self.aliases = ['ipython3console']
264 self.aliases = ['ipython3console']
268 else:
265 else:
269 self.aliases = ['ipython2console', 'ipythonconsole']
266 self.aliases = ['ipython2console', 'ipythonconsole']
270
267
271 in1_regex = options.get('in1_regex', self.in1_regex)
268 in1_regex = options.get('in1_regex', self.in1_regex)
272 in2_regex = options.get('in2_regex', self.in2_regex)
269 in2_regex = options.get('in2_regex', self.in2_regex)
273 out_regex = options.get('out_regex', self.out_regex)
270 out_regex = options.get('out_regex', self.out_regex)
274
271
275 # So that we can work with input and output prompts which have been
272 # So that we can work with input and output prompts which have been
276 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
273 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
277 # we do not do this, then such prompts will be tagged as 'output'.
274 # we do not do this, then such prompts will be tagged as 'output'.
278 # The reason can't just use the rstrip'd variants instead is because
275 # The reason can't just use the rstrip'd variants instead is because
279 # we want any whitespace associated with the prompt to be inserted
276 # we want any whitespace associated with the prompt to be inserted
280 # with the token. This allows formatted code to be modified so as hide
277 # with the token. This allows formatted code to be modified so as hide
281 # the appearance of prompts, with the whitespace included. One example
278 # the appearance of prompts, with the whitespace included. One example
282 # use of this is in copybutton.js from the standard lib Python docs.
279 # use of this is in copybutton.js from the standard lib Python docs.
283 in1_regex_rstrip = in1_regex.rstrip() + '\n'
280 in1_regex_rstrip = in1_regex.rstrip() + '\n'
284 in2_regex_rstrip = in2_regex.rstrip() + '\n'
281 in2_regex_rstrip = in2_regex.rstrip() + '\n'
285 out_regex_rstrip = out_regex.rstrip() + '\n'
282 out_regex_rstrip = out_regex.rstrip() + '\n'
286
283
287 # Compile and save them all.
284 # Compile and save them all.
288 attrs = ['in1_regex', 'in2_regex', 'out_regex',
285 attrs = ['in1_regex', 'in2_regex', 'out_regex',
289 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
286 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
290 for attr in attrs:
287 for attr in attrs:
291 self.__setattr__(attr, re.compile(locals()[attr]))
288 self.__setattr__(attr, re.compile(locals()[attr]))
292
289
293 Lexer.__init__(self, **options)
290 Lexer.__init__(self, **options)
294
291
295 if self.python3:
292 if self.python3:
296 pylexer = IPython3Lexer
293 pylexer = IPython3Lexer
297 tblexer = IPythonTracebackLexer
294 tblexer = IPythonTracebackLexer
298 else:
295 else:
299 pylexer = IPythonLexer
296 pylexer = IPythonLexer
300 tblexer = IPythonTracebackLexer
297 tblexer = IPythonTracebackLexer
301
298
302 self.pylexer = pylexer(**options)
299 self.pylexer = pylexer(**options)
303 self.tblexer = tblexer(**options)
300 self.tblexer = tblexer(**options)
304
301
305 self.reset()
302 self.reset()
306
303
307 def reset(self):
304 def reset(self):
308 self.mode = 'output'
305 self.mode = 'output'
309 self.index = 0
306 self.index = 0
310 self.buffer = u''
307 self.buffer = u''
311 self.insertions = []
308 self.insertions = []
312
309
313 def buffered_tokens(self):
310 def buffered_tokens(self):
314 """
311 """
315 Generator of unprocessed tokens after doing insertions and before
312 Generator of unprocessed tokens after doing insertions and before
316 changing to a new state.
313 changing to a new state.
317
314
318 """
315 """
319 if self.mode == 'output':
316 if self.mode == 'output':
320 tokens = [(0, Generic.Output, self.buffer)]
317 tokens = [(0, Generic.Output, self.buffer)]
321 elif self.mode == 'input':
318 elif self.mode == 'input':
322 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
319 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
323 else: # traceback
320 else: # traceback
324 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
321 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
325
322
326 for i, t, v in do_insertions(self.insertions, tokens):
323 for i, t, v in do_insertions(self.insertions, tokens):
327 # All token indexes are relative to the buffer.
324 # All token indexes are relative to the buffer.
328 yield self.index + i, t, v
325 yield self.index + i, t, v
329
326
330 # Clear it all
327 # Clear it all
331 self.index += len(self.buffer)
328 self.index += len(self.buffer)
332 self.buffer = u''
329 self.buffer = u''
333 self.insertions = []
330 self.insertions = []
334
331
335 def get_mci(self, line):
332 def get_mci(self, line):
336 """
333 """
337 Parses the line and returns a 3-tuple: (mode, code, insertion).
334 Parses the line and returns a 3-tuple: (mode, code, insertion).
338
335
339 `mode` is the next mode (or state) of the lexer, and is always equal
336 `mode` is the next mode (or state) of the lexer, and is always equal
340 to 'input', 'output', or 'tb'.
337 to 'input', 'output', or 'tb'.
341
338
342 `code` is a portion of the line that should be added to the buffer
339 `code` is a portion of the line that should be added to the buffer
343 corresponding to the next mode and eventually lexed by another lexer.
340 corresponding to the next mode and eventually lexed by another lexer.
344 For example, `code` could be Python code if `mode` were 'input'.
341 For example, `code` could be Python code if `mode` were 'input'.
345
342
346 `insertion` is a 3-tuple (index, token, text) representing an
343 `insertion` is a 3-tuple (index, token, text) representing an
347 unprocessed "token" that will be inserted into the stream of tokens
344 unprocessed "token" that will be inserted into the stream of tokens
348 that are created from the buffer once we change modes. This is usually
345 that are created from the buffer once we change modes. This is usually
349 the input or output prompt.
346 the input or output prompt.
350
347
351 In general, the next mode depends on current mode and on the contents
348 In general, the next mode depends on current mode and on the contents
352 of `line`.
349 of `line`.
353
350
354 """
351 """
355 # To reduce the number of regex match checks, we have multiple
352 # To reduce the number of regex match checks, we have multiple
356 # 'if' blocks instead of 'if-elif' blocks.
353 # 'if' blocks instead of 'if-elif' blocks.
357
354
358 # Check for possible end of input
355 # Check for possible end of input
359 in2_match = self.in2_regex.match(line)
356 in2_match = self.in2_regex.match(line)
360 in2_match_rstrip = self.in2_regex_rstrip.match(line)
357 in2_match_rstrip = self.in2_regex_rstrip.match(line)
361 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
358 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
362 in2_match_rstrip:
359 in2_match_rstrip:
363 end_input = True
360 end_input = True
364 else:
361 else:
365 end_input = False
362 end_input = False
366 if end_input and self.mode != 'tb':
363 if end_input and self.mode != 'tb':
367 # Only look for an end of input when not in tb mode.
364 # Only look for an end of input when not in tb mode.
368 # An ellipsis could appear within the traceback.
365 # An ellipsis could appear within the traceback.
369 mode = 'output'
366 mode = 'output'
370 code = u''
367 code = u''
371 insertion = (0, Generic.Prompt, line)
368 insertion = (0, Generic.Prompt, line)
372 return mode, code, insertion
369 return mode, code, insertion
373
370
374 # Check for output prompt
371 # Check for output prompt
375 out_match = self.out_regex.match(line)
372 out_match = self.out_regex.match(line)
376 out_match_rstrip = self.out_regex_rstrip.match(line)
373 out_match_rstrip = self.out_regex_rstrip.match(line)
377 if out_match or out_match_rstrip:
374 if out_match or out_match_rstrip:
378 mode = 'output'
375 mode = 'output'
379 if out_match:
376 if out_match:
380 idx = out_match.end()
377 idx = out_match.end()
381 else:
378 else:
382 idx = out_match_rstrip.end()
379 idx = out_match_rstrip.end()
383 code = line[idx:]
380 code = line[idx:]
384 # Use the 'heading' token for output. We cannot use Generic.Error
381 # Use the 'heading' token for output. We cannot use Generic.Error
385 # since it would conflict with exceptions.
382 # since it would conflict with exceptions.
386 insertion = (0, Generic.Heading, line[:idx])
383 insertion = (0, Generic.Heading, line[:idx])
387 return mode, code, insertion
384 return mode, code, insertion
388
385
389
386
390 # Check for input or continuation prompt (non stripped version)
387 # Check for input or continuation prompt (non stripped version)
391 in1_match = self.in1_regex.match(line)
388 in1_match = self.in1_regex.match(line)
392 if in1_match or (in2_match and self.mode != 'tb'):
389 if in1_match or (in2_match and self.mode != 'tb'):
393 # New input or when not in tb, continued input.
390 # New input or when not in tb, continued input.
394 # We do not check for continued input when in tb since it is
391 # We do not check for continued input when in tb since it is
395 # allowable to replace a long stack with an ellipsis.
392 # allowable to replace a long stack with an ellipsis.
396 mode = 'input'
393 mode = 'input'
397 if in1_match:
394 if in1_match:
398 idx = in1_match.end()
395 idx = in1_match.end()
399 else: # in2_match
396 else: # in2_match
400 idx = in2_match.end()
397 idx = in2_match.end()
401 code = line[idx:]
398 code = line[idx:]
402 insertion = (0, Generic.Prompt, line[:idx])
399 insertion = (0, Generic.Prompt, line[:idx])
403 return mode, code, insertion
400 return mode, code, insertion
404
401
405 # Check for input or continuation prompt (stripped version)
402 # Check for input or continuation prompt (stripped version)
406 in1_match_rstrip = self.in1_regex_rstrip.match(line)
403 in1_match_rstrip = self.in1_regex_rstrip.match(line)
407 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
404 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
408 # New input or when not in tb, continued input.
405 # New input or when not in tb, continued input.
409 # We do not check for continued input when in tb since it is
406 # We do not check for continued input when in tb since it is
410 # allowable to replace a long stack with an ellipsis.
407 # allowable to replace a long stack with an ellipsis.
411 mode = 'input'
408 mode = 'input'
412 if in1_match_rstrip:
409 if in1_match_rstrip:
413 idx = in1_match_rstrip.end()
410 idx = in1_match_rstrip.end()
414 else: # in2_match
411 else: # in2_match
415 idx = in2_match_rstrip.end()
412 idx = in2_match_rstrip.end()
416 code = line[idx:]
413 code = line[idx:]
417 insertion = (0, Generic.Prompt, line[:idx])
414 insertion = (0, Generic.Prompt, line[:idx])
418 return mode, code, insertion
415 return mode, code, insertion
419
416
420 # Check for traceback
417 # Check for traceback
421 if self.ipytb_start.match(line):
418 if self.ipytb_start.match(line):
422 mode = 'tb'
419 mode = 'tb'
423 code = line
420 code = line
424 insertion = None
421 insertion = None
425 return mode, code, insertion
422 return mode, code, insertion
426
423
427 # All other stuff...
424 # All other stuff...
428 if self.mode in ('input', 'output'):
425 if self.mode in ('input', 'output'):
429 # We assume all other text is output. Multiline input that
426 # We assume all other text is output. Multiline input that
430 # does not use the continuation marker cannot be detected.
427 # does not use the continuation marker cannot be detected.
431 # For example, the 3 in the following is clearly output:
428 # For example, the 3 in the following is clearly output:
432 #
429 #
433 # In [1]: print 3
430 # In [1]: print 3
434 # 3
431 # 3
435 #
432 #
436 # But the following second line is part of the input:
433 # But the following second line is part of the input:
437 #
434 #
438 # In [2]: while True:
435 # In [2]: while True:
439 # print True
436 # print True
440 #
437 #
441 # In both cases, the 2nd line will be 'output'.
438 # In both cases, the 2nd line will be 'output'.
442 #
439 #
443 mode = 'output'
440 mode = 'output'
444 else:
441 else:
445 mode = 'tb'
442 mode = 'tb'
446
443
447 code = line
444 code = line
448 insertion = None
445 insertion = None
449
446
450 return mode, code, insertion
447 return mode, code, insertion
451
448
452 def get_tokens_unprocessed(self, text):
449 def get_tokens_unprocessed(self, text):
453 self.reset()
450 self.reset()
454 for match in line_re.finditer(text):
451 for match in line_re.finditer(text):
455 line = match.group()
452 line = match.group()
456 mode, code, insertion = self.get_mci(line)
453 mode, code, insertion = self.get_mci(line)
457
454
458 if mode != self.mode:
455 if mode != self.mode:
459 # Yield buffered tokens before transitioning to new mode.
456 # Yield buffered tokens before transitioning to new mode.
460 for token in self.buffered_tokens():
457 for token in self.buffered_tokens():
461 yield token
458 yield token
462 self.mode = mode
459 self.mode = mode
463
460
464 if insertion:
461 if insertion:
465 self.insertions.append((len(self.buffer), [insertion]))
462 self.insertions.append((len(self.buffer), [insertion]))
466 self.buffer += code
463 self.buffer += code
467 else:
464 else:
468 for token in self.buffered_tokens():
465 for token in self.buffered_tokens():
469 yield token
466 yield token
470
467
471 class IPyLexer(Lexer):
468 class IPyLexer(Lexer):
472 """
469 """
473 Primary lexer for all IPython-like code.
470 Primary lexer for all IPython-like code.
474
471
475 This is a simple helper lexer. If the first line of the text begins with
472 This is a simple helper lexer. If the first line of the text begins with
476 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
473 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
477 lexer. If not, then the entire text is parsed with an IPython lexer.
474 lexer. If not, then the entire text is parsed with an IPython lexer.
478
475
479 The goal is to reduce the number of lexers that are registered
476 The goal is to reduce the number of lexers that are registered
480 with Pygments.
477 with Pygments.
481
478
482 """
479 """
483 name = 'IPy session'
480 name = 'IPy session'
484 aliases = ['ipy']
481 aliases = ['ipy']
485
482
486 def __init__(self, **options):
483 def __init__(self, **options):
487 self.python3 = get_bool_opt(options, 'python3', False)
484 self.python3 = get_bool_opt(options, 'python3', False)
488 if self.python3:
485 if self.python3:
489 self.aliases = ['ipy3']
486 self.aliases = ['ipy3']
490 else:
487 else:
491 self.aliases = ['ipy2', 'ipy']
488 self.aliases = ['ipy2', 'ipy']
492
489
493 Lexer.__init__(self, **options)
490 Lexer.__init__(self, **options)
494
491
495 self.IPythonLexer = IPythonLexer(**options)
492 self.IPythonLexer = IPythonLexer(**options)
496 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
493 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
497
494
498 def get_tokens_unprocessed(self, text):
495 def get_tokens_unprocessed(self, text):
499 # Search for the input prompt anywhere...this allows code blocks to
496 # Search for the input prompt anywhere...this allows code blocks to
500 # begin with comments as well.
497 # begin with comments as well.
501 if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
498 if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
502 lex = self.IPythonConsoleLexer
499 lex = self.IPythonConsoleLexer
503 else:
500 else:
504 lex = self.IPythonLexer
501 lex = self.IPythonLexer
505 for token in lex.get_tokens_unprocessed(text):
502 for token in lex.get_tokens_unprocessed(text):
506 yield token
503 yield token
507
504
@@ -1,88 +1,94
1 """Test lexers module"""
1 """Test lexers module"""
2 #-----------------------------------------------------------------------------
2 #-----------------------------------------------------------------------------
3 # Copyright (C) 2014 The IPython Development Team
3 # Copyright (C) 2014 The IPython Development Team
4 #
4 #
5 # Distributed under the terms of the BSD License. The full license is in
5 # Distributed under the terms of the BSD License. The full license is in
6 # the file COPYING, distributed as part of this software.
6 # the file COPYING, distributed as part of this software.
7 #-----------------------------------------------------------------------------
7 #-----------------------------------------------------------------------------
8
8
9 #-----------------------------------------------------------------------------
9 #-----------------------------------------------------------------------------
10 # Imports
10 # Imports
11 #-----------------------------------------------------------------------------
11 #-----------------------------------------------------------------------------
12 from pygments.token import Token
12 from pygments.token import Token
13
13
14 from IPython.nbconvert.tests.base import TestsBase
14 from IPython.nbconvert.tests.base import TestsBase
15 from .. import lexers
15 from .. import lexers
16
16
17
17
18 #-----------------------------------------------------------------------------
18 #-----------------------------------------------------------------------------
19 # Classes and functions
19 # Classes and functions
20 #-----------------------------------------------------------------------------
20 #-----------------------------------------------------------------------------
21 class TestLexers(TestsBase):
21 class TestLexers(TestsBase):
22 """Collection of lexers tests"""
22 """Collection of lexers tests"""
23 def setUp(self):
23 def setUp(self):
24 self.lexer = lexers.IPythonLexer()
24 self.lexer = lexers.IPythonLexer()
25
25
26 def testIPythonLexer(self):
26 def testIPythonLexer(self):
27 fragment = '!echo $HOME\n'
27 fragment = '!echo $HOME\n'
28 tokens = [
28 tokens = [
29 (Token.Operator, '!'),
29 (Token.Operator, '!'),
30 (Token.Name.Builtin, 'echo'),
30 (Token.Name.Builtin, 'echo'),
31 (Token.Text, ' '),
31 (Token.Text, ' '),
32 (Token.Name.Variable, '$HOME'),
32 (Token.Name.Variable, '$HOME'),
33 (Token.Text, '\n'),
33 (Token.Text, '\n'),
34 ]
34 ]
35 self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
35 self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
36
36
37 fragment_2 = '!' + fragment
38 tokens_2 = [
39 (Token.Operator, '!!'),
40 ] + tokens[1:]
41 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
42
37 fragment_2 = 'x = ' + fragment
43 fragment_2 = 'x = ' + fragment
38 tokens_2 = [
44 tokens_2 = [
39 (Token.Name, 'x'),
45 (Token.Name, 'x'),
40 (Token.Text, ' '),
46 (Token.Text, ' '),
41 (Token.Operator, '='),
47 (Token.Operator, '='),
42 (Token.Text, ' '),
48 (Token.Text, ' '),
43 ] + tokens
49 ] + tokens
44 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
50 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
45
51
46 fragment_2 = 'x, = ' + fragment
52 fragment_2 = 'x, = ' + fragment
47 tokens_2 = [
53 tokens_2 = [
48 (Token.Name, 'x'),
54 (Token.Name, 'x'),
49 (Token.Punctuation, ','),
55 (Token.Punctuation, ','),
50 (Token.Text, ' '),
56 (Token.Text, ' '),
51 (Token.Operator, '='),
57 (Token.Operator, '='),
52 (Token.Text, ' '),
58 (Token.Text, ' '),
53 ] + tokens
59 ] + tokens
54 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
60 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
55
61
56 fragment_2 = 'x, = %sx ' + fragment[1:]
62 fragment_2 = 'x, = %sx ' + fragment[1:]
57 tokens_2 = [
63 tokens_2 = [
58 (Token.Name, 'x'),
64 (Token.Name, 'x'),
59 (Token.Punctuation, ','),
65 (Token.Punctuation, ','),
60 (Token.Text, ' '),
66 (Token.Text, ' '),
61 (Token.Operator, '='),
67 (Token.Operator, '='),
62 (Token.Text, ' '),
68 (Token.Text, ' '),
63 (Token.Operator, '%'),
69 (Token.Operator, '%'),
64 (Token.Keyword, 'sx'),
70 (Token.Keyword, 'sx'),
65 (Token.Text, ' '),
71 (Token.Text, ' '),
66 ] + tokens[1:]
72 ] + tokens[1:]
67 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
73 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
68
74
69 fragment_2 = 'f = %R function () {}\n'
75 fragment_2 = 'f = %R function () {}\n'
70 tokens_2 = [
76 tokens_2 = [
71 (Token.Name, 'f'),
77 (Token.Name, 'f'),
72 (Token.Text, ' '),
78 (Token.Text, ' '),
73 (Token.Operator, '='),
79 (Token.Operator, '='),
74 (Token.Text, ' '),
80 (Token.Text, ' '),
75 (Token.Operator, '%'),
81 (Token.Operator, '%'),
76 (Token.Keyword, 'R'),
82 (Token.Keyword, 'R'),
77 (Token.Text, ' function () {}\n'),
83 (Token.Text, ' function () {}\n'),
78 ]
84 ]
79 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
85 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
80
86
81 fragment_2 = '\t%%xyz\n$foo\n'
87 fragment_2 = '\t%%xyz\n$foo\n'
82 tokens_2 = [
88 tokens_2 = [
83 (Token.Text, '\t'),
89 (Token.Text, '\t'),
84 (Token.Operator, '%%'),
90 (Token.Operator, '%%'),
85 (Token.Keyword, 'xyz'),
91 (Token.Keyword, 'xyz'),
86 (Token.Text, '\n$foo\n'),
92 (Token.Text, '\n$foo\n'),
87 ]
93 ]
88 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
94 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
General Comments 0
You need to be logged in to leave comments. Login now