##// END OF EJS Templates
#7548: Fixed handling of != operator. Added tests.
Lev Abalkin -
Show More
@@ -1,508 +1,508 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 """
2 """
3 Defines a variety of Pygments lexers for highlighting IPython code.
3 Defines a variety of Pygments lexers for highlighting IPython code.
4
4
5 This includes:
5 This includes:
6
6
7 IPythonLexer, IPython3Lexer
7 IPythonLexer, IPython3Lexer
8 Lexers for pure IPython (python + magic/shell commands)
8 Lexers for pure IPython (python + magic/shell commands)
9
9
10 IPythonPartialTracebackLexer, IPythonTracebackLexer
10 IPythonPartialTracebackLexer, IPythonTracebackLexer
11 Supports 2.x and 3.x via keyword `python3`. The partial traceback
11 Supports 2.x and 3.x via keyword `python3`. The partial traceback
12 lexer reads everything but the Python code appearing in a traceback.
12 lexer reads everything but the Python code appearing in a traceback.
13 The full lexer combines the partial lexer with an IPython lexer.
13 The full lexer combines the partial lexer with an IPython lexer.
14
14
15 IPythonConsoleLexer
15 IPythonConsoleLexer
16 A lexer for IPython console sessions, with support for tracebacks.
16 A lexer for IPython console sessions, with support for tracebacks.
17
17
18 IPyLexer
18 IPyLexer
19 A friendly lexer which examines the first line of text and from it,
19 A friendly lexer which examines the first line of text and from it,
20 decides whether to use an IPython lexer or an IPython console lexer.
20 decides whether to use an IPython lexer or an IPython console lexer.
21 This is probably the only lexer that needs to be explicitly added
21 This is probably the only lexer that needs to be explicitly added
22 to Pygments.
22 to Pygments.
23
23
24 """
24 """
25 #-----------------------------------------------------------------------------
25 #-----------------------------------------------------------------------------
26 # Copyright (c) 2013, the IPython Development Team.
26 # Copyright (c) 2013, the IPython Development Team.
27 #
27 #
28 # Distributed under the terms of the Modified BSD License.
28 # Distributed under the terms of the Modified BSD License.
29 #
29 #
30 # The full license is in the file COPYING.txt, distributed with this software.
30 # The full license is in the file COPYING.txt, distributed with this software.
31 #-----------------------------------------------------------------------------
31 #-----------------------------------------------------------------------------
32
32
33 # Standard library
33 # Standard library
34 import re
34 import re
35
35
36 # Third party
36 # Third party
37 from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
37 from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
38 from pygments.lexer import (
38 from pygments.lexer import (
39 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
39 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
40 )
40 )
41 from pygments.token import (
41 from pygments.token import (
42 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
42 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
43 )
43 )
44 from pygments.util import get_bool_opt
44 from pygments.util import get_bool_opt
45
45
46 # Local
46 # Local
47 from IPython.testing.skipdoctest import skip_doctest
47 from IPython.testing.skipdoctest import skip_doctest
48
48
49 line_re = re.compile('.*?\n')
49 line_re = re.compile('.*?\n')
50
50
51 ipython_tokens = [
51 ipython_tokens = [
52 (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
52 (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
53 (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
53 (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
54 (r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),
54 (r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),
55 (r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),
55 (r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),
56 (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
56 (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
57 using(BashLexer), Text)),
57 using(BashLexer), Text)),
58 (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
58 (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
59 (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
59 (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
60 (r'((?!=)!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
60 (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
61 (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),
61 (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),
62 ]
62 ]
63
63
64 def build_ipy_lexer(python3):
64 def build_ipy_lexer(python3):
65 """Builds IPython lexers depending on the value of `python3`.
65 """Builds IPython lexers depending on the value of `python3`.
66
66
67 The lexer inherits from an appropriate Python lexer and then adds
67 The lexer inherits from an appropriate Python lexer and then adds
68 information about IPython specific keywords (i.e. magic commands,
68 information about IPython specific keywords (i.e. magic commands,
69 shell commands, etc.)
69 shell commands, etc.)
70
70
71 Parameters
71 Parameters
72 ----------
72 ----------
73 python3 : bool
73 python3 : bool
74 If `True`, then build an IPython lexer from a Python 3 lexer.
74 If `True`, then build an IPython lexer from a Python 3 lexer.
75
75
76 """
76 """
77 # It would be nice to have a single IPython lexer class which takes
77 # It would be nice to have a single IPython lexer class which takes
78 # a boolean `python3`. But since there are two Python lexer classes,
78 # a boolean `python3`. But since there are two Python lexer classes,
79 # we will also have two IPython lexer classes.
79 # we will also have two IPython lexer classes.
80 if python3:
80 if python3:
81 PyLexer = Python3Lexer
81 PyLexer = Python3Lexer
82 clsname = 'IPython3Lexer'
82 clsname = 'IPython3Lexer'
83 name = 'IPython3'
83 name = 'IPython3'
84 aliases = ['ipython3']
84 aliases = ['ipython3']
85 doc = """IPython3 Lexer"""
85 doc = """IPython3 Lexer"""
86 else:
86 else:
87 PyLexer = PythonLexer
87 PyLexer = PythonLexer
88 clsname = 'IPythonLexer'
88 clsname = 'IPythonLexer'
89 name = 'IPython'
89 name = 'IPython'
90 aliases = ['ipython2', 'ipython']
90 aliases = ['ipython2', 'ipython']
91 doc = """IPython Lexer"""
91 doc = """IPython Lexer"""
92
92
93 tokens = PyLexer.tokens.copy()
93 tokens = PyLexer.tokens.copy()
94 tokens['root'] = ipython_tokens + tokens['root']
94 tokens['root'] = ipython_tokens + tokens['root']
95
95
96 attrs = {'name': name, 'aliases': aliases, 'filenames': [],
96 attrs = {'name': name, 'aliases': aliases, 'filenames': [],
97 '__doc__': doc, 'tokens': tokens}
97 '__doc__': doc, 'tokens': tokens}
98
98
99 return type(name, (PyLexer,), attrs)
99 return type(name, (PyLexer,), attrs)
100
100
101
101
102 IPython3Lexer = build_ipy_lexer(python3=True)
102 IPython3Lexer = build_ipy_lexer(python3=True)
103 IPythonLexer = build_ipy_lexer(python3=False)
103 IPythonLexer = build_ipy_lexer(python3=False)
104
104
105
105
106 class IPythonPartialTracebackLexer(RegexLexer):
106 class IPythonPartialTracebackLexer(RegexLexer):
107 """
107 """
108 Partial lexer for IPython tracebacks.
108 Partial lexer for IPython tracebacks.
109
109
110 Handles all the non-python output. This works for both Python 2.x and 3.x.
110 Handles all the non-python output. This works for both Python 2.x and 3.x.
111
111
112 """
112 """
113 name = 'IPython Partial Traceback'
113 name = 'IPython Partial Traceback'
114
114
115 tokens = {
115 tokens = {
116 'root': [
116 'root': [
117 # Tracebacks for syntax errors have a different style.
117 # Tracebacks for syntax errors have a different style.
118 # For both types of tracebacks, we mark the first line with
118 # For both types of tracebacks, we mark the first line with
119 # Generic.Traceback. For syntax errors, we mark the filename
119 # Generic.Traceback. For syntax errors, we mark the filename
120 # as we mark the filenames for non-syntax tracebacks.
120 # as we mark the filenames for non-syntax tracebacks.
121 #
121 #
122 # These two regexps define how IPythonConsoleLexer finds a
122 # These two regexps define how IPythonConsoleLexer finds a
123 # traceback.
123 # traceback.
124 #
124 #
125 ## Non-syntax traceback
125 ## Non-syntax traceback
126 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
126 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
127 ## Syntax traceback
127 ## Syntax traceback
128 (r'^( File)(.*)(, line )(\d+\n)',
128 (r'^( File)(.*)(, line )(\d+\n)',
129 bygroups(Generic.Traceback, Name.Namespace,
129 bygroups(Generic.Traceback, Name.Namespace,
130 Generic.Traceback, Literal.Number.Integer)),
130 Generic.Traceback, Literal.Number.Integer)),
131
131
132 # (Exception Identifier)(Whitespace)(Traceback Message)
132 # (Exception Identifier)(Whitespace)(Traceback Message)
133 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
133 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
134 bygroups(Name.Exception, Generic.Whitespace, Text)),
134 bygroups(Name.Exception, Generic.Whitespace, Text)),
135 # (Module/Filename)(Text)(Callee)(Function Signature)
135 # (Module/Filename)(Text)(Callee)(Function Signature)
136 # Better options for callee and function signature?
136 # Better options for callee and function signature?
137 (r'(.*)( in )(.*)(\(.*\)\n)',
137 (r'(.*)( in )(.*)(\(.*\)\n)',
138 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
138 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
139 # Regular line: (Whitespace)(Line Number)(Python Code)
139 # Regular line: (Whitespace)(Line Number)(Python Code)
140 (r'(\s*?)(\d+)(.*?\n)',
140 (r'(\s*?)(\d+)(.*?\n)',
141 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
141 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
142 # Emphasized line: (Arrow)(Line Number)(Python Code)
142 # Emphasized line: (Arrow)(Line Number)(Python Code)
143 # Using Exception token so arrow color matches the Exception.
143 # Using Exception token so arrow color matches the Exception.
144 (r'(-*>?\s?)(\d+)(.*?\n)',
144 (r'(-*>?\s?)(\d+)(.*?\n)',
145 bygroups(Name.Exception, Literal.Number.Integer, Other)),
145 bygroups(Name.Exception, Literal.Number.Integer, Other)),
146 # (Exception Identifier)(Message)
146 # (Exception Identifier)(Message)
147 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
147 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
148 bygroups(Name.Exception, Text)),
148 bygroups(Name.Exception, Text)),
149 # Tag everything else as Other, will be handled later.
149 # Tag everything else as Other, will be handled later.
150 (r'.*\n', Other),
150 (r'.*\n', Other),
151 ],
151 ],
152 }
152 }
153
153
154
154
155 class IPythonTracebackLexer(DelegatingLexer):
155 class IPythonTracebackLexer(DelegatingLexer):
156 """
156 """
157 IPython traceback lexer.
157 IPython traceback lexer.
158
158
159 For doctests, the tracebacks can be snipped as much as desired with the
159 For doctests, the tracebacks can be snipped as much as desired with the
160 exception to the lines that designate a traceback. For non-syntax error
160 exception to the lines that designate a traceback. For non-syntax error
161 tracebacks, this is the line of hyphens. For syntax error tracebacks,
161 tracebacks, this is the line of hyphens. For syntax error tracebacks,
162 this is the line which lists the File and line number.
162 this is the line which lists the File and line number.
163
163
164 """
164 """
165 # The lexer inherits from DelegatingLexer. The "root" lexer is an
165 # The lexer inherits from DelegatingLexer. The "root" lexer is an
166 # appropriate IPython lexer, which depends on the value of the boolean
166 # appropriate IPython lexer, which depends on the value of the boolean
167 # `python3`. First, we parse with the partial IPython traceback lexer.
167 # `python3`. First, we parse with the partial IPython traceback lexer.
168 # Then, any code marked with the "Other" token is delegated to the root
168 # Then, any code marked with the "Other" token is delegated to the root
169 # lexer.
169 # lexer.
170 #
170 #
171 name = 'IPython Traceback'
171 name = 'IPython Traceback'
172 aliases = ['ipythontb']
172 aliases = ['ipythontb']
173
173
174 def __init__(self, **options):
174 def __init__(self, **options):
175 self.python3 = get_bool_opt(options, 'python3', False)
175 self.python3 = get_bool_opt(options, 'python3', False)
176 if self.python3:
176 if self.python3:
177 self.aliases = ['ipython3tb']
177 self.aliases = ['ipython3tb']
178 else:
178 else:
179 self.aliases = ['ipython2tb', 'ipythontb']
179 self.aliases = ['ipython2tb', 'ipythontb']
180
180
181 if self.python3:
181 if self.python3:
182 IPyLexer = IPython3Lexer
182 IPyLexer = IPython3Lexer
183 else:
183 else:
184 IPyLexer = IPythonLexer
184 IPyLexer = IPythonLexer
185
185
186 DelegatingLexer.__init__(self, IPyLexer,
186 DelegatingLexer.__init__(self, IPyLexer,
187 IPythonPartialTracebackLexer, **options)
187 IPythonPartialTracebackLexer, **options)
188
188
189 @skip_doctest
189 @skip_doctest
190 class IPythonConsoleLexer(Lexer):
190 class IPythonConsoleLexer(Lexer):
191 """
191 """
192 An IPython console lexer for IPython code-blocks and doctests, such as:
192 An IPython console lexer for IPython code-blocks and doctests, such as:
193
193
194 .. code-block:: rst
194 .. code-block:: rst
195
195
196 .. code-block:: ipythonconsole
196 .. code-block:: ipythonconsole
197
197
198 In [1]: a = 'foo'
198 In [1]: a = 'foo'
199
199
200 In [2]: a
200 In [2]: a
201 Out[2]: 'foo'
201 Out[2]: 'foo'
202
202
203 In [3]: print a
203 In [3]: print a
204 foo
204 foo
205
205
206 In [4]: 1 / 0
206 In [4]: 1 / 0
207
207
208
208
209 Support is also provided for IPython exceptions:
209 Support is also provided for IPython exceptions:
210
210
211 .. code-block:: rst
211 .. code-block:: rst
212
212
213 .. code-block:: ipythonconsole
213 .. code-block:: ipythonconsole
214
214
215 In [1]: raise Exception
215 In [1]: raise Exception
216
216
217 ---------------------------------------------------------------------------
217 ---------------------------------------------------------------------------
218 Exception Traceback (most recent call last)
218 Exception Traceback (most recent call last)
219 <ipython-input-1-fca2ab0ca76b> in <module>()
219 <ipython-input-1-fca2ab0ca76b> in <module>()
220 ----> 1 raise Exception
220 ----> 1 raise Exception
221
221
222 Exception:
222 Exception:
223
223
224 """
224 """
225 name = 'IPython console session'
225 name = 'IPython console session'
226 aliases = ['ipythonconsole']
226 aliases = ['ipythonconsole']
227 mimetypes = ['text/x-ipython-console']
227 mimetypes = ['text/x-ipython-console']
228
228
229 # The regexps used to determine what is input and what is output.
229 # The regexps used to determine what is input and what is output.
230 # The default prompts for IPython are:
230 # The default prompts for IPython are:
231 #
231 #
232 # c.PromptManager.in_template = 'In [\#]: '
232 # c.PromptManager.in_template = 'In [\#]: '
233 # c.PromptManager.in2_template = ' .\D.: '
233 # c.PromptManager.in2_template = ' .\D.: '
234 # c.PromptManager.out_template = 'Out[\#]: '
234 # c.PromptManager.out_template = 'Out[\#]: '
235 #
235 #
236 in1_regex = r'In \[[0-9]+\]: '
236 in1_regex = r'In \[[0-9]+\]: '
237 in2_regex = r' \.\.+\.: '
237 in2_regex = r' \.\.+\.: '
238 out_regex = r'Out\[[0-9]+\]: '
238 out_regex = r'Out\[[0-9]+\]: '
239
239
240 #: The regex to determine when a traceback starts.
240 #: The regex to determine when a traceback starts.
241 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
241 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
242
242
243 def __init__(self, **options):
243 def __init__(self, **options):
244 """Initialize the IPython console lexer.
244 """Initialize the IPython console lexer.
245
245
246 Parameters
246 Parameters
247 ----------
247 ----------
248 python3 : bool
248 python3 : bool
249 If `True`, then the console inputs are parsed using a Python 3
249 If `True`, then the console inputs are parsed using a Python 3
250 lexer. Otherwise, they are parsed using a Python 2 lexer.
250 lexer. Otherwise, they are parsed using a Python 2 lexer.
251 in1_regex : RegexObject
251 in1_regex : RegexObject
252 The compiled regular expression used to detect the start
252 The compiled regular expression used to detect the start
253 of inputs. Although the IPython configuration setting may have a
253 of inputs. Although the IPython configuration setting may have a
254 trailing whitespace, do not include it in the regex. If `None`,
254 trailing whitespace, do not include it in the regex. If `None`,
255 then the default input prompt is assumed.
255 then the default input prompt is assumed.
256 in2_regex : RegexObject
256 in2_regex : RegexObject
257 The compiled regular expression used to detect the continuation
257 The compiled regular expression used to detect the continuation
258 of inputs. Although the IPython configuration setting may have a
258 of inputs. Although the IPython configuration setting may have a
259 trailing whitespace, do not include it in the regex. If `None`,
259 trailing whitespace, do not include it in the regex. If `None`,
260 then the default input prompt is assumed.
260 then the default input prompt is assumed.
261 out_regex : RegexObject
261 out_regex : RegexObject
262 The compiled regular expression used to detect outputs. If `None`,
262 The compiled regular expression used to detect outputs. If `None`,
263 then the default output prompt is assumed.
263 then the default output prompt is assumed.
264
264
265 """
265 """
266 self.python3 = get_bool_opt(options, 'python3', False)
266 self.python3 = get_bool_opt(options, 'python3', False)
267 if self.python3:
267 if self.python3:
268 self.aliases = ['ipython3console']
268 self.aliases = ['ipython3console']
269 else:
269 else:
270 self.aliases = ['ipython2console', 'ipythonconsole']
270 self.aliases = ['ipython2console', 'ipythonconsole']
271
271
272 in1_regex = options.get('in1_regex', self.in1_regex)
272 in1_regex = options.get('in1_regex', self.in1_regex)
273 in2_regex = options.get('in2_regex', self.in2_regex)
273 in2_regex = options.get('in2_regex', self.in2_regex)
274 out_regex = options.get('out_regex', self.out_regex)
274 out_regex = options.get('out_regex', self.out_regex)
275
275
276 # So that we can work with input and output prompts which have been
276 # So that we can work with input and output prompts which have been
277 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
277 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
278 # we do not do this, then such prompts will be tagged as 'output'.
278 # we do not do this, then such prompts will be tagged as 'output'.
279 # The reason can't just use the rstrip'd variants instead is because
279 # The reason can't just use the rstrip'd variants instead is because
280 # we want any whitespace associated with the prompt to be inserted
280 # we want any whitespace associated with the prompt to be inserted
281 # with the token. This allows formatted code to be modified so as hide
281 # with the token. This allows formatted code to be modified so as hide
282 # the appearance of prompts, with the whitespace included. One example
282 # the appearance of prompts, with the whitespace included. One example
283 # use of this is in copybutton.js from the standard lib Python docs.
283 # use of this is in copybutton.js from the standard lib Python docs.
284 in1_regex_rstrip = in1_regex.rstrip() + '\n'
284 in1_regex_rstrip = in1_regex.rstrip() + '\n'
285 in2_regex_rstrip = in2_regex.rstrip() + '\n'
285 in2_regex_rstrip = in2_regex.rstrip() + '\n'
286 out_regex_rstrip = out_regex.rstrip() + '\n'
286 out_regex_rstrip = out_regex.rstrip() + '\n'
287
287
288 # Compile and save them all.
288 # Compile and save them all.
289 attrs = ['in1_regex', 'in2_regex', 'out_regex',
289 attrs = ['in1_regex', 'in2_regex', 'out_regex',
290 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
290 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
291 for attr in attrs:
291 for attr in attrs:
292 self.__setattr__(attr, re.compile(locals()[attr]))
292 self.__setattr__(attr, re.compile(locals()[attr]))
293
293
294 Lexer.__init__(self, **options)
294 Lexer.__init__(self, **options)
295
295
296 if self.python3:
296 if self.python3:
297 pylexer = IPython3Lexer
297 pylexer = IPython3Lexer
298 tblexer = IPythonTracebackLexer
298 tblexer = IPythonTracebackLexer
299 else:
299 else:
300 pylexer = IPythonLexer
300 pylexer = IPythonLexer
301 tblexer = IPythonTracebackLexer
301 tblexer = IPythonTracebackLexer
302
302
303 self.pylexer = pylexer(**options)
303 self.pylexer = pylexer(**options)
304 self.tblexer = tblexer(**options)
304 self.tblexer = tblexer(**options)
305
305
306 self.reset()
306 self.reset()
307
307
308 def reset(self):
308 def reset(self):
309 self.mode = 'output'
309 self.mode = 'output'
310 self.index = 0
310 self.index = 0
311 self.buffer = u''
311 self.buffer = u''
312 self.insertions = []
312 self.insertions = []
313
313
314 def buffered_tokens(self):
314 def buffered_tokens(self):
315 """
315 """
316 Generator of unprocessed tokens after doing insertions and before
316 Generator of unprocessed tokens after doing insertions and before
317 changing to a new state.
317 changing to a new state.
318
318
319 """
319 """
320 if self.mode == 'output':
320 if self.mode == 'output':
321 tokens = [(0, Generic.Output, self.buffer)]
321 tokens = [(0, Generic.Output, self.buffer)]
322 elif self.mode == 'input':
322 elif self.mode == 'input':
323 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
323 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
324 else: # traceback
324 else: # traceback
325 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
325 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
326
326
327 for i, t, v in do_insertions(self.insertions, tokens):
327 for i, t, v in do_insertions(self.insertions, tokens):
328 # All token indexes are relative to the buffer.
328 # All token indexes are relative to the buffer.
329 yield self.index + i, t, v
329 yield self.index + i, t, v
330
330
331 # Clear it all
331 # Clear it all
332 self.index += len(self.buffer)
332 self.index += len(self.buffer)
333 self.buffer = u''
333 self.buffer = u''
334 self.insertions = []
334 self.insertions = []
335
335
336 def get_mci(self, line):
336 def get_mci(self, line):
337 """
337 """
338 Parses the line and returns a 3-tuple: (mode, code, insertion).
338 Parses the line and returns a 3-tuple: (mode, code, insertion).
339
339
340 `mode` is the next mode (or state) of the lexer, and is always equal
340 `mode` is the next mode (or state) of the lexer, and is always equal
341 to 'input', 'output', or 'tb'.
341 to 'input', 'output', or 'tb'.
342
342
343 `code` is a portion of the line that should be added to the buffer
343 `code` is a portion of the line that should be added to the buffer
344 corresponding to the next mode and eventually lexed by another lexer.
344 corresponding to the next mode and eventually lexed by another lexer.
345 For example, `code` could be Python code if `mode` were 'input'.
345 For example, `code` could be Python code if `mode` were 'input'.
346
346
347 `insertion` is a 3-tuple (index, token, text) representing an
347 `insertion` is a 3-tuple (index, token, text) representing an
348 unprocessed "token" that will be inserted into the stream of tokens
348 unprocessed "token" that will be inserted into the stream of tokens
349 that are created from the buffer once we change modes. This is usually
349 that are created from the buffer once we change modes. This is usually
350 the input or output prompt.
350 the input or output prompt.
351
351
352 In general, the next mode depends on current mode and on the contents
352 In general, the next mode depends on current mode and on the contents
353 of `line`.
353 of `line`.
354
354
355 """
355 """
356 # To reduce the number of regex match checks, we have multiple
356 # To reduce the number of regex match checks, we have multiple
357 # 'if' blocks instead of 'if-elif' blocks.
357 # 'if' blocks instead of 'if-elif' blocks.
358
358
359 # Check for possible end of input
359 # Check for possible end of input
360 in2_match = self.in2_regex.match(line)
360 in2_match = self.in2_regex.match(line)
361 in2_match_rstrip = self.in2_regex_rstrip.match(line)
361 in2_match_rstrip = self.in2_regex_rstrip.match(line)
362 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
362 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
363 in2_match_rstrip:
363 in2_match_rstrip:
364 end_input = True
364 end_input = True
365 else:
365 else:
366 end_input = False
366 end_input = False
367 if end_input and self.mode != 'tb':
367 if end_input and self.mode != 'tb':
368 # Only look for an end of input when not in tb mode.
368 # Only look for an end of input when not in tb mode.
369 # An ellipsis could appear within the traceback.
369 # An ellipsis could appear within the traceback.
370 mode = 'output'
370 mode = 'output'
371 code = u''
371 code = u''
372 insertion = (0, Generic.Prompt, line)
372 insertion = (0, Generic.Prompt, line)
373 return mode, code, insertion
373 return mode, code, insertion
374
374
375 # Check for output prompt
375 # Check for output prompt
376 out_match = self.out_regex.match(line)
376 out_match = self.out_regex.match(line)
377 out_match_rstrip = self.out_regex_rstrip.match(line)
377 out_match_rstrip = self.out_regex_rstrip.match(line)
378 if out_match or out_match_rstrip:
378 if out_match or out_match_rstrip:
379 mode = 'output'
379 mode = 'output'
380 if out_match:
380 if out_match:
381 idx = out_match.end()
381 idx = out_match.end()
382 else:
382 else:
383 idx = out_match_rstrip.end()
383 idx = out_match_rstrip.end()
384 code = line[idx:]
384 code = line[idx:]
385 # Use the 'heading' token for output. We cannot use Generic.Error
385 # Use the 'heading' token for output. We cannot use Generic.Error
386 # since it would conflict with exceptions.
386 # since it would conflict with exceptions.
387 insertion = (0, Generic.Heading, line[:idx])
387 insertion = (0, Generic.Heading, line[:idx])
388 return mode, code, insertion
388 return mode, code, insertion
389
389
390
390
391 # Check for input or continuation prompt (non stripped version)
391 # Check for input or continuation prompt (non stripped version)
392 in1_match = self.in1_regex.match(line)
392 in1_match = self.in1_regex.match(line)
393 if in1_match or (in2_match and self.mode != 'tb'):
393 if in1_match or (in2_match and self.mode != 'tb'):
394 # New input or when not in tb, continued input.
394 # New input or when not in tb, continued input.
395 # We do not check for continued input when in tb since it is
395 # We do not check for continued input when in tb since it is
396 # allowable to replace a long stack with an ellipsis.
396 # allowable to replace a long stack with an ellipsis.
397 mode = 'input'
397 mode = 'input'
398 if in1_match:
398 if in1_match:
399 idx = in1_match.end()
399 idx = in1_match.end()
400 else: # in2_match
400 else: # in2_match
401 idx = in2_match.end()
401 idx = in2_match.end()
402 code = line[idx:]
402 code = line[idx:]
403 insertion = (0, Generic.Prompt, line[:idx])
403 insertion = (0, Generic.Prompt, line[:idx])
404 return mode, code, insertion
404 return mode, code, insertion
405
405
406 # Check for input or continuation prompt (stripped version)
406 # Check for input or continuation prompt (stripped version)
407 in1_match_rstrip = self.in1_regex_rstrip.match(line)
407 in1_match_rstrip = self.in1_regex_rstrip.match(line)
408 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
408 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
409 # New input or when not in tb, continued input.
409 # New input or when not in tb, continued input.
410 # We do not check for continued input when in tb since it is
410 # We do not check for continued input when in tb since it is
411 # allowable to replace a long stack with an ellipsis.
411 # allowable to replace a long stack with an ellipsis.
412 mode = 'input'
412 mode = 'input'
413 if in1_match_rstrip:
413 if in1_match_rstrip:
414 idx = in1_match_rstrip.end()
414 idx = in1_match_rstrip.end()
415 else: # in2_match
415 else: # in2_match
416 idx = in2_match_rstrip.end()
416 idx = in2_match_rstrip.end()
417 code = line[idx:]
417 code = line[idx:]
418 insertion = (0, Generic.Prompt, line[:idx])
418 insertion = (0, Generic.Prompt, line[:idx])
419 return mode, code, insertion
419 return mode, code, insertion
420
420
421 # Check for traceback
421 # Check for traceback
422 if self.ipytb_start.match(line):
422 if self.ipytb_start.match(line):
423 mode = 'tb'
423 mode = 'tb'
424 code = line
424 code = line
425 insertion = None
425 insertion = None
426 return mode, code, insertion
426 return mode, code, insertion
427
427
428 # All other stuff...
428 # All other stuff...
429 if self.mode in ('input', 'output'):
429 if self.mode in ('input', 'output'):
430 # We assume all other text is output. Multiline input that
430 # We assume all other text is output. Multiline input that
431 # does not use the continuation marker cannot be detected.
431 # does not use the continuation marker cannot be detected.
432 # For example, the 3 in the following is clearly output:
432 # For example, the 3 in the following is clearly output:
433 #
433 #
434 # In [1]: print 3
434 # In [1]: print 3
435 # 3
435 # 3
436 #
436 #
437 # But the following second line is part of the input:
437 # But the following second line is part of the input:
438 #
438 #
439 # In [2]: while True:
439 # In [2]: while True:
440 # print True
440 # print True
441 #
441 #
442 # In both cases, the 2nd line will be 'output'.
442 # In both cases, the 2nd line will be 'output'.
443 #
443 #
444 mode = 'output'
444 mode = 'output'
445 else:
445 else:
446 mode = 'tb'
446 mode = 'tb'
447
447
448 code = line
448 code = line
449 insertion = None
449 insertion = None
450
450
451 return mode, code, insertion
451 return mode, code, insertion
452
452
453 def get_tokens_unprocessed(self, text):
453 def get_tokens_unprocessed(self, text):
454 self.reset()
454 self.reset()
455 for match in line_re.finditer(text):
455 for match in line_re.finditer(text):
456 line = match.group()
456 line = match.group()
457 mode, code, insertion = self.get_mci(line)
457 mode, code, insertion = self.get_mci(line)
458
458
459 if mode != self.mode:
459 if mode != self.mode:
460 # Yield buffered tokens before transitioning to new mode.
460 # Yield buffered tokens before transitioning to new mode.
461 for token in self.buffered_tokens():
461 for token in self.buffered_tokens():
462 yield token
462 yield token
463 self.mode = mode
463 self.mode = mode
464
464
465 if insertion:
465 if insertion:
466 self.insertions.append((len(self.buffer), [insertion]))
466 self.insertions.append((len(self.buffer), [insertion]))
467 self.buffer += code
467 self.buffer += code
468 else:
468 else:
469 for token in self.buffered_tokens():
469 for token in self.buffered_tokens():
470 yield token
470 yield token
471
471
472 class IPyLexer(Lexer):
472 class IPyLexer(Lexer):
473 """
473 """
474 Primary lexer for all IPython-like code.
474 Primary lexer for all IPython-like code.
475
475
476 This is a simple helper lexer. If the first line of the text begins with
476 This is a simple helper lexer. If the first line of the text begins with
477 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
477 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
478 lexer. If not, then the entire text is parsed with an IPython lexer.
478 lexer. If not, then the entire text is parsed with an IPython lexer.
479
479
480 The goal is to reduce the number of lexers that are registered
480 The goal is to reduce the number of lexers that are registered
481 with Pygments.
481 with Pygments.
482
482
483 """
483 """
484 name = 'IPy session'
484 name = 'IPy session'
485 aliases = ['ipy']
485 aliases = ['ipy']
486
486
487 def __init__(self, **options):
487 def __init__(self, **options):
488 self.python3 = get_bool_opt(options, 'python3', False)
488 self.python3 = get_bool_opt(options, 'python3', False)
489 if self.python3:
489 if self.python3:
490 self.aliases = ['ipy3']
490 self.aliases = ['ipy3']
491 else:
491 else:
492 self.aliases = ['ipy2', 'ipy']
492 self.aliases = ['ipy2', 'ipy']
493
493
494 Lexer.__init__(self, **options)
494 Lexer.__init__(self, **options)
495
495
496 self.IPythonLexer = IPythonLexer(**options)
496 self.IPythonLexer = IPythonLexer(**options)
497 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
497 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
498
498
499 def get_tokens_unprocessed(self, text):
499 def get_tokens_unprocessed(self, text):
500 # Search for the input prompt anywhere...this allows code blocks to
500 # Search for the input prompt anywhere...this allows code blocks to
501 # begin with comments as well.
501 # begin with comments as well.
502 if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
502 if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
503 lex = self.IPythonConsoleLexer
503 lex = self.IPythonConsoleLexer
504 else:
504 else:
505 lex = self.IPythonLexer
505 lex = self.IPythonLexer
506 for token in lex.get_tokens_unprocessed(text):
506 for token in lex.get_tokens_unprocessed(text):
507 yield token
507 yield token
508
508
@@ -1,111 +1,131 b''
1 """Test lexers module"""
1 """Test lexers module"""
2 #-----------------------------------------------------------------------------
2 #-----------------------------------------------------------------------------
3 # Copyright (C) 2014 The IPython Development Team
3 # Copyright (C) 2014 The IPython Development Team
4 #
4 #
5 # Distributed under the terms of the BSD License. The full license is in
5 # Distributed under the terms of the BSD License. The full license is in
6 # the file COPYING, distributed as part of this software.
6 # the file COPYING, distributed as part of this software.
7 #-----------------------------------------------------------------------------
7 #-----------------------------------------------------------------------------
8
8
9 #-----------------------------------------------------------------------------
9 #-----------------------------------------------------------------------------
10 # Imports
10 # Imports
11 #-----------------------------------------------------------------------------
11 #-----------------------------------------------------------------------------
12 from pygments.token import Token
12 from pygments.token import Token
13
13
14 from IPython.nbconvert.tests.base import TestsBase
14 from IPython.nbconvert.tests.base import TestsBase
15 from .. import lexers
15 from .. import lexers
16
16
17
17
18 #-----------------------------------------------------------------------------
18 #-----------------------------------------------------------------------------
19 # Classes and functions
19 # Classes and functions
20 #-----------------------------------------------------------------------------
20 #-----------------------------------------------------------------------------
21 class TestLexers(TestsBase):
21 class TestLexers(TestsBase):
22 """Collection of lexers tests"""
22 """Collection of lexers tests"""
23 def setUp(self):
23 def setUp(self):
24 self.lexer = lexers.IPythonLexer()
24 self.lexer = lexers.IPythonLexer()
25
25
26 def testIPythonLexer(self):
26 def testIPythonLexer(self):
27 fragment = '!echo $HOME\n'
27 fragment = '!echo $HOME\n'
28 tokens = [
28 tokens = [
29 (Token.Operator, '!'),
29 (Token.Operator, '!'),
30 (Token.Name.Builtin, 'echo'),
30 (Token.Name.Builtin, 'echo'),
31 (Token.Text, ' '),
31 (Token.Text, ' '),
32 (Token.Name.Variable, '$HOME'),
32 (Token.Name.Variable, '$HOME'),
33 (Token.Text, '\n'),
33 (Token.Text, '\n'),
34 ]
34 ]
35 self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
35 self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
36
36
37 fragment_2 = '!' + fragment
37 fragment_2 = '!' + fragment
38 tokens_2 = [
38 tokens_2 = [
39 (Token.Operator, '!!'),
39 (Token.Operator, '!!'),
40 ] + tokens[1:]
40 ] + tokens[1:]
41 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
41 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
42
42
43 fragment_2 = '\t %%!\n' + fragment[1:]
43 fragment_2 = '\t %%!\n' + fragment[1:]
44 tokens_2 = [
44 tokens_2 = [
45 (Token.Text, '\t '),
45 (Token.Text, '\t '),
46 (Token.Operator, '%%!'),
46 (Token.Operator, '%%!'),
47 (Token.Text, '\n'),
47 (Token.Text, '\n'),
48 ] + tokens[1:]
48 ] + tokens[1:]
49 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
49 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
50
50
51 fragment_2 = 'x = ' + fragment
51 fragment_2 = 'x = ' + fragment
52 tokens_2 = [
52 tokens_2 = [
53 (Token.Name, 'x'),
53 (Token.Name, 'x'),
54 (Token.Text, ' '),
54 (Token.Text, ' '),
55 (Token.Operator, '='),
55 (Token.Operator, '='),
56 (Token.Text, ' '),
56 (Token.Text, ' '),
57 ] + tokens
57 ] + tokens
58 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
58 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
59
59
60 fragment_2 = 'x, = ' + fragment
60 fragment_2 = 'x, = ' + fragment
61 tokens_2 = [
61 tokens_2 = [
62 (Token.Name, 'x'),
62 (Token.Name, 'x'),
63 (Token.Punctuation, ','),
63 (Token.Punctuation, ','),
64 (Token.Text, ' '),
64 (Token.Text, ' '),
65 (Token.Operator, '='),
65 (Token.Operator, '='),
66 (Token.Text, ' '),
66 (Token.Text, ' '),
67 ] + tokens
67 ] + tokens
68 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
68 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
69
69
70 fragment_2 = 'x, = %sx ' + fragment[1:]
70 fragment_2 = 'x, = %sx ' + fragment[1:]
71 tokens_2 = [
71 tokens_2 = [
72 (Token.Name, 'x'),
72 (Token.Name, 'x'),
73 (Token.Punctuation, ','),
73 (Token.Punctuation, ','),
74 (Token.Text, ' '),
74 (Token.Text, ' '),
75 (Token.Operator, '='),
75 (Token.Operator, '='),
76 (Token.Text, ' '),
76 (Token.Text, ' '),
77 (Token.Operator, '%'),
77 (Token.Operator, '%'),
78 (Token.Keyword, 'sx'),
78 (Token.Keyword, 'sx'),
79 (Token.Text, ' '),
79 (Token.Text, ' '),
80 ] + tokens[1:]
80 ] + tokens[1:]
81 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
81 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
82
82
83 fragment_2 = 'f = %R function () {}\n'
83 fragment_2 = 'f = %R function () {}\n'
84 tokens_2 = [
84 tokens_2 = [
85 (Token.Name, 'f'),
85 (Token.Name, 'f'),
86 (Token.Text, ' '),
86 (Token.Text, ' '),
87 (Token.Operator, '='),
87 (Token.Operator, '='),
88 (Token.Text, ' '),
88 (Token.Text, ' '),
89 (Token.Operator, '%'),
89 (Token.Operator, '%'),
90 (Token.Keyword, 'R'),
90 (Token.Keyword, 'R'),
91 (Token.Text, ' function () {}\n'),
91 (Token.Text, ' function () {}\n'),
92 ]
92 ]
93 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
93 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
94
94
95 fragment_2 = '\t%%xyz\n$foo\n'
95 fragment_2 = '\t%%xyz\n$foo\n'
96 tokens_2 = [
96 tokens_2 = [
97 (Token.Text, '\t'),
97 (Token.Text, '\t'),
98 (Token.Operator, '%%'),
98 (Token.Operator, '%%'),
99 (Token.Keyword, 'xyz'),
99 (Token.Keyword, 'xyz'),
100 (Token.Text, '\n$foo\n'),
100 (Token.Text, '\n$foo\n'),
101 ]
101 ]
102 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
102 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
103
103
104 fragment_2 = '%system?\n'
104 fragment_2 = '%system?\n'
105 tokens_2 = [
105 tokens_2 = [
106 (Token.Operator, '%'),
106 (Token.Operator, '%'),
107 (Token.Keyword, 'system'),
107 (Token.Keyword, 'system'),
108 (Token.Operator, '?'),
108 (Token.Operator, '?'),
109 (Token.Text, '\n'),
109 (Token.Text, '\n'),
110 ]
110 ]
111 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
111 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
112
113 fragment_2 = 'x != y\n'
114 tokens_2 = [
115 (Token.Name, 'x'),
116 (Token.Text, ' '),
117 (Token.Operator, '!='),
118 (Token.Text, ' '),
119 (Token.Name, 'y'),
120 (Token.Text, '\n'),
121 ]
122 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
123
124 fragment_2 = ' ?math.sin\n'
125 tokens_2 = [
126 (Token.Text, ' '),
127 (Token.Operator, '?'),
128 (Token.Text, 'math.sin'),
129 (Token.Text, '\n'),
130 ]
131 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
General Comments 0
You need to be logged in to leave comments. Login now