##// END OF EJS Templates
#7558: Added a rule for ?.
Lev Abalkin -
Show More
@@ -1,504 +1,506 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 """
2 """
3 Defines a variety of Pygments lexers for highlighting IPython code.
3 Defines a variety of Pygments lexers for highlighting IPython code.
4
4
5 This includes:
5 This includes:
6
6
7 IPythonLexer, IPython3Lexer
7 IPythonLexer, IPython3Lexer
8 Lexers for pure IPython (python + magic/shell commands)
8 Lexers for pure IPython (python + magic/shell commands)
9
9
10 IPythonPartialTracebackLexer, IPythonTracebackLexer
10 IPythonPartialTracebackLexer, IPythonTracebackLexer
11 Supports 2.x and 3.x via keyword `python3`. The partial traceback
11 Supports 2.x and 3.x via keyword `python3`. The partial traceback
12 lexer reads everything but the Python code appearing in a traceback.
12 lexer reads everything but the Python code appearing in a traceback.
13 The full lexer combines the partial lexer with an IPython lexer.
13 The full lexer combines the partial lexer with an IPython lexer.
14
14
15 IPythonConsoleLexer
15 IPythonConsoleLexer
16 A lexer for IPython console sessions, with support for tracebacks.
16 A lexer for IPython console sessions, with support for tracebacks.
17
17
18 IPyLexer
18 IPyLexer
19 A friendly lexer which examines the first line of text and from it,
19 A friendly lexer which examines the first line of text and from it,
20 decides whether to use an IPython lexer or an IPython console lexer.
20 decides whether to use an IPython lexer or an IPython console lexer.
21 This is probably the only lexer that needs to be explicitly added
21 This is probably the only lexer that needs to be explicitly added
22 to Pygments.
22 to Pygments.
23
23
24 """
24 """
25 #-----------------------------------------------------------------------------
25 #-----------------------------------------------------------------------------
26 # Copyright (c) 2013, the IPython Development Team.
26 # Copyright (c) 2013, the IPython Development Team.
27 #
27 #
28 # Distributed under the terms of the Modified BSD License.
28 # Distributed under the terms of the Modified BSD License.
29 #
29 #
30 # The full license is in the file COPYING.txt, distributed with this software.
30 # The full license is in the file COPYING.txt, distributed with this software.
31 #-----------------------------------------------------------------------------
31 #-----------------------------------------------------------------------------
32
32
33 # Standard library
33 # Standard library
34 import re
34 import re
35
35
36 # Third party
36 # Third party
37 from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
37 from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
38 from pygments.lexer import (
38 from pygments.lexer import (
39 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
39 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
40 )
40 )
41 from pygments.token import (
41 from pygments.token import (
42 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
42 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
43 )
43 )
44 from pygments.util import get_bool_opt
44 from pygments.util import get_bool_opt
45
45
46 # Local
46 # Local
47 from IPython.testing.skipdoctest import skip_doctest
47 from IPython.testing.skipdoctest import skip_doctest
48
48
49 line_re = re.compile('.*?\n')
49 line_re = re.compile('.*?\n')
50
50
51 ipython_tokens = [
51 ipython_tokens = [
52 (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
52 (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
53 (r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),
54 (r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),
53 (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
55 (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
54 using(BashLexer), Text)),
56 using(BashLexer), Text)),
55 (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
57 (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
56 (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
58 (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
57 (r'(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
59 (r'(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
58 ]
60 ]
59
61
60 def build_ipy_lexer(python3):
62 def build_ipy_lexer(python3):
61 """Builds IPython lexers depending on the value of `python3`.
63 """Builds IPython lexers depending on the value of `python3`.
62
64
63 The lexer inherits from an appropriate Python lexer and then adds
65 The lexer inherits from an appropriate Python lexer and then adds
64 information about IPython specific keywords (i.e. magic commands,
66 information about IPython specific keywords (i.e. magic commands,
65 shell commands, etc.)
67 shell commands, etc.)
66
68
67 Parameters
69 Parameters
68 ----------
70 ----------
69 python3 : bool
71 python3 : bool
70 If `True`, then build an IPython lexer from a Python 3 lexer.
72 If `True`, then build an IPython lexer from a Python 3 lexer.
71
73
72 """
74 """
73 # It would be nice to have a single IPython lexer class which takes
75 # It would be nice to have a single IPython lexer class which takes
74 # a boolean `python3`. But since there are two Python lexer classes,
76 # a boolean `python3`. But since there are two Python lexer classes,
75 # we will also have two IPython lexer classes.
77 # we will also have two IPython lexer classes.
76 if python3:
78 if python3:
77 PyLexer = Python3Lexer
79 PyLexer = Python3Lexer
78 clsname = 'IPython3Lexer'
80 clsname = 'IPython3Lexer'
79 name = 'IPython3'
81 name = 'IPython3'
80 aliases = ['ipython3']
82 aliases = ['ipython3']
81 doc = """IPython3 Lexer"""
83 doc = """IPython3 Lexer"""
82 else:
84 else:
83 PyLexer = PythonLexer
85 PyLexer = PythonLexer
84 clsname = 'IPythonLexer'
86 clsname = 'IPythonLexer'
85 name = 'IPython'
87 name = 'IPython'
86 aliases = ['ipython2', 'ipython']
88 aliases = ['ipython2', 'ipython']
87 doc = """IPython Lexer"""
89 doc = """IPython Lexer"""
88
90
89 tokens = PyLexer.tokens.copy()
91 tokens = PyLexer.tokens.copy()
90 tokens['root'] = ipython_tokens + tokens['root']
92 tokens['root'] = ipython_tokens + tokens['root']
91
93
92 attrs = {'name': name, 'aliases': aliases,
94 attrs = {'name': name, 'aliases': aliases,
93 '__doc__': doc, 'tokens': tokens}
95 '__doc__': doc, 'tokens': tokens}
94
96
95 return type(name, (PyLexer,), attrs)
97 return type(name, (PyLexer,), attrs)
96
98
97
99
98 IPython3Lexer = build_ipy_lexer(python3=True)
100 IPython3Lexer = build_ipy_lexer(python3=True)
99 IPythonLexer = build_ipy_lexer(python3=False)
101 IPythonLexer = build_ipy_lexer(python3=False)
100
102
101
103
102 class IPythonPartialTracebackLexer(RegexLexer):
104 class IPythonPartialTracebackLexer(RegexLexer):
103 """
105 """
104 Partial lexer for IPython tracebacks.
106 Partial lexer for IPython tracebacks.
105
107
106 Handles all the non-python output. This works for both Python 2.x and 3.x.
108 Handles all the non-python output. This works for both Python 2.x and 3.x.
107
109
108 """
110 """
109 name = 'IPython Partial Traceback'
111 name = 'IPython Partial Traceback'
110
112
111 tokens = {
113 tokens = {
112 'root': [
114 'root': [
113 # Tracebacks for syntax errors have a different style.
115 # Tracebacks for syntax errors have a different style.
114 # For both types of tracebacks, we mark the first line with
116 # For both types of tracebacks, we mark the first line with
115 # Generic.Traceback. For syntax errors, we mark the filename
117 # Generic.Traceback. For syntax errors, we mark the filename
116 # as we mark the filenames for non-syntax tracebacks.
118 # as we mark the filenames for non-syntax tracebacks.
117 #
119 #
118 # These two regexps define how IPythonConsoleLexer finds a
120 # These two regexps define how IPythonConsoleLexer finds a
119 # traceback.
121 # traceback.
120 #
122 #
121 ## Non-syntax traceback
123 ## Non-syntax traceback
122 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
124 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
123 ## Syntax traceback
125 ## Syntax traceback
124 (r'^( File)(.*)(, line )(\d+\n)',
126 (r'^( File)(.*)(, line )(\d+\n)',
125 bygroups(Generic.Traceback, Name.Namespace,
127 bygroups(Generic.Traceback, Name.Namespace,
126 Generic.Traceback, Literal.Number.Integer)),
128 Generic.Traceback, Literal.Number.Integer)),
127
129
128 # (Exception Identifier)(Whitespace)(Traceback Message)
130 # (Exception Identifier)(Whitespace)(Traceback Message)
129 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
131 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
130 bygroups(Name.Exception, Generic.Whitespace, Text)),
132 bygroups(Name.Exception, Generic.Whitespace, Text)),
131 # (Module/Filename)(Text)(Callee)(Function Signature)
133 # (Module/Filename)(Text)(Callee)(Function Signature)
132 # Better options for callee and function signature?
134 # Better options for callee and function signature?
133 (r'(.*)( in )(.*)(\(.*\)\n)',
135 (r'(.*)( in )(.*)(\(.*\)\n)',
134 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
136 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
135 # Regular line: (Whitespace)(Line Number)(Python Code)
137 # Regular line: (Whitespace)(Line Number)(Python Code)
136 (r'(\s*?)(\d+)(.*?\n)',
138 (r'(\s*?)(\d+)(.*?\n)',
137 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
139 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
138 # Emphasized line: (Arrow)(Line Number)(Python Code)
140 # Emphasized line: (Arrow)(Line Number)(Python Code)
139 # Using Exception token so arrow color matches the Exception.
141 # Using Exception token so arrow color matches the Exception.
140 (r'(-*>?\s?)(\d+)(.*?\n)',
142 (r'(-*>?\s?)(\d+)(.*?\n)',
141 bygroups(Name.Exception, Literal.Number.Integer, Other)),
143 bygroups(Name.Exception, Literal.Number.Integer, Other)),
142 # (Exception Identifier)(Message)
144 # (Exception Identifier)(Message)
143 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
145 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
144 bygroups(Name.Exception, Text)),
146 bygroups(Name.Exception, Text)),
145 # Tag everything else as Other, will be handled later.
147 # Tag everything else as Other, will be handled later.
146 (r'.*\n', Other),
148 (r'.*\n', Other),
147 ],
149 ],
148 }
150 }
149
151
150
152
151 class IPythonTracebackLexer(DelegatingLexer):
153 class IPythonTracebackLexer(DelegatingLexer):
152 """
154 """
153 IPython traceback lexer.
155 IPython traceback lexer.
154
156
155 For doctests, the tracebacks can be snipped as much as desired with the
157 For doctests, the tracebacks can be snipped as much as desired with the
156 exception to the lines that designate a traceback. For non-syntax error
158 exception to the lines that designate a traceback. For non-syntax error
157 tracebacks, this is the line of hyphens. For syntax error tracebacks,
159 tracebacks, this is the line of hyphens. For syntax error tracebacks,
158 this is the line which lists the File and line number.
160 this is the line which lists the File and line number.
159
161
160 """
162 """
161 # The lexer inherits from DelegatingLexer. The "root" lexer is an
163 # The lexer inherits from DelegatingLexer. The "root" lexer is an
162 # appropriate IPython lexer, which depends on the value of the boolean
164 # appropriate IPython lexer, which depends on the value of the boolean
163 # `python3`. First, we parse with the partial IPython traceback lexer.
165 # `python3`. First, we parse with the partial IPython traceback lexer.
164 # Then, any code marked with the "Other" token is delegated to the root
166 # Then, any code marked with the "Other" token is delegated to the root
165 # lexer.
167 # lexer.
166 #
168 #
167 name = 'IPython Traceback'
169 name = 'IPython Traceback'
168 aliases = ['ipythontb']
170 aliases = ['ipythontb']
169
171
170 def __init__(self, **options):
172 def __init__(self, **options):
171 self.python3 = get_bool_opt(options, 'python3', False)
173 self.python3 = get_bool_opt(options, 'python3', False)
172 if self.python3:
174 if self.python3:
173 self.aliases = ['ipython3tb']
175 self.aliases = ['ipython3tb']
174 else:
176 else:
175 self.aliases = ['ipython2tb', 'ipythontb']
177 self.aliases = ['ipython2tb', 'ipythontb']
176
178
177 if self.python3:
179 if self.python3:
178 IPyLexer = IPython3Lexer
180 IPyLexer = IPython3Lexer
179 else:
181 else:
180 IPyLexer = IPythonLexer
182 IPyLexer = IPythonLexer
181
183
182 DelegatingLexer.__init__(self, IPyLexer,
184 DelegatingLexer.__init__(self, IPyLexer,
183 IPythonPartialTracebackLexer, **options)
185 IPythonPartialTracebackLexer, **options)
184
186
185 @skip_doctest
187 @skip_doctest
186 class IPythonConsoleLexer(Lexer):
188 class IPythonConsoleLexer(Lexer):
187 """
189 """
188 An IPython console lexer for IPython code-blocks and doctests, such as:
190 An IPython console lexer for IPython code-blocks and doctests, such as:
189
191
190 .. code-block:: rst
192 .. code-block:: rst
191
193
192 .. code-block:: ipythonconsole
194 .. code-block:: ipythonconsole
193
195
194 In [1]: a = 'foo'
196 In [1]: a = 'foo'
195
197
196 In [2]: a
198 In [2]: a
197 Out[2]: 'foo'
199 Out[2]: 'foo'
198
200
199 In [3]: print a
201 In [3]: print a
200 foo
202 foo
201
203
202 In [4]: 1 / 0
204 In [4]: 1 / 0
203
205
204
206
205 Support is also provided for IPython exceptions:
207 Support is also provided for IPython exceptions:
206
208
207 .. code-block:: rst
209 .. code-block:: rst
208
210
209 .. code-block:: ipythonconsole
211 .. code-block:: ipythonconsole
210
212
211 In [1]: raise Exception
213 In [1]: raise Exception
212
214
213 ---------------------------------------------------------------------------
215 ---------------------------------------------------------------------------
214 Exception Traceback (most recent call last)
216 Exception Traceback (most recent call last)
215 <ipython-input-1-fca2ab0ca76b> in <module>()
217 <ipython-input-1-fca2ab0ca76b> in <module>()
216 ----> 1 raise Exception
218 ----> 1 raise Exception
217
219
218 Exception:
220 Exception:
219
221
220 """
222 """
221 name = 'IPython console session'
223 name = 'IPython console session'
222 aliases = ['ipythonconsole']
224 aliases = ['ipythonconsole']
223 mimetypes = ['text/x-ipython-console']
225 mimetypes = ['text/x-ipython-console']
224
226
225 # The regexps used to determine what is input and what is output.
227 # The regexps used to determine what is input and what is output.
226 # The default prompts for IPython are:
228 # The default prompts for IPython are:
227 #
229 #
228 # c.PromptManager.in_template = 'In [\#]: '
230 # c.PromptManager.in_template = 'In [\#]: '
229 # c.PromptManager.in2_template = ' .\D.: '
231 # c.PromptManager.in2_template = ' .\D.: '
230 # c.PromptManager.out_template = 'Out[\#]: '
232 # c.PromptManager.out_template = 'Out[\#]: '
231 #
233 #
232 in1_regex = r'In \[[0-9]+\]: '
234 in1_regex = r'In \[[0-9]+\]: '
233 in2_regex = r' \.\.+\.: '
235 in2_regex = r' \.\.+\.: '
234 out_regex = r'Out\[[0-9]+\]: '
236 out_regex = r'Out\[[0-9]+\]: '
235
237
236 #: The regex to determine when a traceback starts.
238 #: The regex to determine when a traceback starts.
237 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
239 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
238
240
239 def __init__(self, **options):
241 def __init__(self, **options):
240 """Initialize the IPython console lexer.
242 """Initialize the IPython console lexer.
241
243
242 Parameters
244 Parameters
243 ----------
245 ----------
244 python3 : bool
246 python3 : bool
245 If `True`, then the console inputs are parsed using a Python 3
247 If `True`, then the console inputs are parsed using a Python 3
246 lexer. Otherwise, they are parsed using a Python 2 lexer.
248 lexer. Otherwise, they are parsed using a Python 2 lexer.
247 in1_regex : RegexObject
249 in1_regex : RegexObject
248 The compiled regular expression used to detect the start
250 The compiled regular expression used to detect the start
249 of inputs. Although the IPython configuration setting may have a
251 of inputs. Although the IPython configuration setting may have a
250 trailing whitespace, do not include it in the regex. If `None`,
252 trailing whitespace, do not include it in the regex. If `None`,
251 then the default input prompt is assumed.
253 then the default input prompt is assumed.
252 in2_regex : RegexObject
254 in2_regex : RegexObject
253 The compiled regular expression used to detect the continuation
255 The compiled regular expression used to detect the continuation
254 of inputs. Although the IPython configuration setting may have a
256 of inputs. Although the IPython configuration setting may have a
255 trailing whitespace, do not include it in the regex. If `None`,
257 trailing whitespace, do not include it in the regex. If `None`,
256 then the default input prompt is assumed.
258 then the default input prompt is assumed.
257 out_regex : RegexObject
259 out_regex : RegexObject
258 The compiled regular expression used to detect outputs. If `None`,
260 The compiled regular expression used to detect outputs. If `None`,
259 then the default output prompt is assumed.
261 then the default output prompt is assumed.
260
262
261 """
263 """
262 self.python3 = get_bool_opt(options, 'python3', False)
264 self.python3 = get_bool_opt(options, 'python3', False)
263 if self.python3:
265 if self.python3:
264 self.aliases = ['ipython3console']
266 self.aliases = ['ipython3console']
265 else:
267 else:
266 self.aliases = ['ipython2console', 'ipythonconsole']
268 self.aliases = ['ipython2console', 'ipythonconsole']
267
269
268 in1_regex = options.get('in1_regex', self.in1_regex)
270 in1_regex = options.get('in1_regex', self.in1_regex)
269 in2_regex = options.get('in2_regex', self.in2_regex)
271 in2_regex = options.get('in2_regex', self.in2_regex)
270 out_regex = options.get('out_regex', self.out_regex)
272 out_regex = options.get('out_regex', self.out_regex)
271
273
272 # So that we can work with input and output prompts which have been
274 # So that we can work with input and output prompts which have been
273 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
275 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
274 # we do not do this, then such prompts will be tagged as 'output'.
276 # we do not do this, then such prompts will be tagged as 'output'.
275 # The reason can't just use the rstrip'd variants instead is because
277 # The reason can't just use the rstrip'd variants instead is because
276 # we want any whitespace associated with the prompt to be inserted
278 # we want any whitespace associated with the prompt to be inserted
277 # with the token. This allows formatted code to be modified so as hide
279 # with the token. This allows formatted code to be modified so as hide
278 # the appearance of prompts, with the whitespace included. One example
280 # the appearance of prompts, with the whitespace included. One example
279 # use of this is in copybutton.js from the standard lib Python docs.
281 # use of this is in copybutton.js from the standard lib Python docs.
280 in1_regex_rstrip = in1_regex.rstrip() + '\n'
282 in1_regex_rstrip = in1_regex.rstrip() + '\n'
281 in2_regex_rstrip = in2_regex.rstrip() + '\n'
283 in2_regex_rstrip = in2_regex.rstrip() + '\n'
282 out_regex_rstrip = out_regex.rstrip() + '\n'
284 out_regex_rstrip = out_regex.rstrip() + '\n'
283
285
284 # Compile and save them all.
286 # Compile and save them all.
285 attrs = ['in1_regex', 'in2_regex', 'out_regex',
287 attrs = ['in1_regex', 'in2_regex', 'out_regex',
286 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
288 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
287 for attr in attrs:
289 for attr in attrs:
288 self.__setattr__(attr, re.compile(locals()[attr]))
290 self.__setattr__(attr, re.compile(locals()[attr]))
289
291
290 Lexer.__init__(self, **options)
292 Lexer.__init__(self, **options)
291
293
292 if self.python3:
294 if self.python3:
293 pylexer = IPython3Lexer
295 pylexer = IPython3Lexer
294 tblexer = IPythonTracebackLexer
296 tblexer = IPythonTracebackLexer
295 else:
297 else:
296 pylexer = IPythonLexer
298 pylexer = IPythonLexer
297 tblexer = IPythonTracebackLexer
299 tblexer = IPythonTracebackLexer
298
300
299 self.pylexer = pylexer(**options)
301 self.pylexer = pylexer(**options)
300 self.tblexer = tblexer(**options)
302 self.tblexer = tblexer(**options)
301
303
302 self.reset()
304 self.reset()
303
305
304 def reset(self):
306 def reset(self):
305 self.mode = 'output'
307 self.mode = 'output'
306 self.index = 0
308 self.index = 0
307 self.buffer = u''
309 self.buffer = u''
308 self.insertions = []
310 self.insertions = []
309
311
310 def buffered_tokens(self):
312 def buffered_tokens(self):
311 """
313 """
312 Generator of unprocessed tokens after doing insertions and before
314 Generator of unprocessed tokens after doing insertions and before
313 changing to a new state.
315 changing to a new state.
314
316
315 """
317 """
316 if self.mode == 'output':
318 if self.mode == 'output':
317 tokens = [(0, Generic.Output, self.buffer)]
319 tokens = [(0, Generic.Output, self.buffer)]
318 elif self.mode == 'input':
320 elif self.mode == 'input':
319 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
321 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
320 else: # traceback
322 else: # traceback
321 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
323 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
322
324
323 for i, t, v in do_insertions(self.insertions, tokens):
325 for i, t, v in do_insertions(self.insertions, tokens):
324 # All token indexes are relative to the buffer.
326 # All token indexes are relative to the buffer.
325 yield self.index + i, t, v
327 yield self.index + i, t, v
326
328
327 # Clear it all
329 # Clear it all
328 self.index += len(self.buffer)
330 self.index += len(self.buffer)
329 self.buffer = u''
331 self.buffer = u''
330 self.insertions = []
332 self.insertions = []
331
333
332 def get_mci(self, line):
334 def get_mci(self, line):
333 """
335 """
334 Parses the line and returns a 3-tuple: (mode, code, insertion).
336 Parses the line and returns a 3-tuple: (mode, code, insertion).
335
337
336 `mode` is the next mode (or state) of the lexer, and is always equal
338 `mode` is the next mode (or state) of the lexer, and is always equal
337 to 'input', 'output', or 'tb'.
339 to 'input', 'output', or 'tb'.
338
340
339 `code` is a portion of the line that should be added to the buffer
341 `code` is a portion of the line that should be added to the buffer
340 corresponding to the next mode and eventually lexed by another lexer.
342 corresponding to the next mode and eventually lexed by another lexer.
341 For example, `code` could be Python code if `mode` were 'input'.
343 For example, `code` could be Python code if `mode` were 'input'.
342
344
343 `insertion` is a 3-tuple (index, token, text) representing an
345 `insertion` is a 3-tuple (index, token, text) representing an
344 unprocessed "token" that will be inserted into the stream of tokens
346 unprocessed "token" that will be inserted into the stream of tokens
345 that are created from the buffer once we change modes. This is usually
347 that are created from the buffer once we change modes. This is usually
346 the input or output prompt.
348 the input or output prompt.
347
349
348 In general, the next mode depends on current mode and on the contents
350 In general, the next mode depends on current mode and on the contents
349 of `line`.
351 of `line`.
350
352
351 """
353 """
352 # To reduce the number of regex match checks, we have multiple
354 # To reduce the number of regex match checks, we have multiple
353 # 'if' blocks instead of 'if-elif' blocks.
355 # 'if' blocks instead of 'if-elif' blocks.
354
356
355 # Check for possible end of input
357 # Check for possible end of input
356 in2_match = self.in2_regex.match(line)
358 in2_match = self.in2_regex.match(line)
357 in2_match_rstrip = self.in2_regex_rstrip.match(line)
359 in2_match_rstrip = self.in2_regex_rstrip.match(line)
358 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
360 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
359 in2_match_rstrip:
361 in2_match_rstrip:
360 end_input = True
362 end_input = True
361 else:
363 else:
362 end_input = False
364 end_input = False
363 if end_input and self.mode != 'tb':
365 if end_input and self.mode != 'tb':
364 # Only look for an end of input when not in tb mode.
366 # Only look for an end of input when not in tb mode.
365 # An ellipsis could appear within the traceback.
367 # An ellipsis could appear within the traceback.
366 mode = 'output'
368 mode = 'output'
367 code = u''
369 code = u''
368 insertion = (0, Generic.Prompt, line)
370 insertion = (0, Generic.Prompt, line)
369 return mode, code, insertion
371 return mode, code, insertion
370
372
371 # Check for output prompt
373 # Check for output prompt
372 out_match = self.out_regex.match(line)
374 out_match = self.out_regex.match(line)
373 out_match_rstrip = self.out_regex_rstrip.match(line)
375 out_match_rstrip = self.out_regex_rstrip.match(line)
374 if out_match or out_match_rstrip:
376 if out_match or out_match_rstrip:
375 mode = 'output'
377 mode = 'output'
376 if out_match:
378 if out_match:
377 idx = out_match.end()
379 idx = out_match.end()
378 else:
380 else:
379 idx = out_match_rstrip.end()
381 idx = out_match_rstrip.end()
380 code = line[idx:]
382 code = line[idx:]
381 # Use the 'heading' token for output. We cannot use Generic.Error
383 # Use the 'heading' token for output. We cannot use Generic.Error
382 # since it would conflict with exceptions.
384 # since it would conflict with exceptions.
383 insertion = (0, Generic.Heading, line[:idx])
385 insertion = (0, Generic.Heading, line[:idx])
384 return mode, code, insertion
386 return mode, code, insertion
385
387
386
388
387 # Check for input or continuation prompt (non stripped version)
389 # Check for input or continuation prompt (non stripped version)
388 in1_match = self.in1_regex.match(line)
390 in1_match = self.in1_regex.match(line)
389 if in1_match or (in2_match and self.mode != 'tb'):
391 if in1_match or (in2_match and self.mode != 'tb'):
390 # New input or when not in tb, continued input.
392 # New input or when not in tb, continued input.
391 # We do not check for continued input when in tb since it is
393 # We do not check for continued input when in tb since it is
392 # allowable to replace a long stack with an ellipsis.
394 # allowable to replace a long stack with an ellipsis.
393 mode = 'input'
395 mode = 'input'
394 if in1_match:
396 if in1_match:
395 idx = in1_match.end()
397 idx = in1_match.end()
396 else: # in2_match
398 else: # in2_match
397 idx = in2_match.end()
399 idx = in2_match.end()
398 code = line[idx:]
400 code = line[idx:]
399 insertion = (0, Generic.Prompt, line[:idx])
401 insertion = (0, Generic.Prompt, line[:idx])
400 return mode, code, insertion
402 return mode, code, insertion
401
403
402 # Check for input or continuation prompt (stripped version)
404 # Check for input or continuation prompt (stripped version)
403 in1_match_rstrip = self.in1_regex_rstrip.match(line)
405 in1_match_rstrip = self.in1_regex_rstrip.match(line)
404 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
406 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
405 # New input or when not in tb, continued input.
407 # New input or when not in tb, continued input.
406 # We do not check for continued input when in tb since it is
408 # We do not check for continued input when in tb since it is
407 # allowable to replace a long stack with an ellipsis.
409 # allowable to replace a long stack with an ellipsis.
408 mode = 'input'
410 mode = 'input'
409 if in1_match_rstrip:
411 if in1_match_rstrip:
410 idx = in1_match_rstrip.end()
412 idx = in1_match_rstrip.end()
411 else: # in2_match
413 else: # in2_match
412 idx = in2_match_rstrip.end()
414 idx = in2_match_rstrip.end()
413 code = line[idx:]
415 code = line[idx:]
414 insertion = (0, Generic.Prompt, line[:idx])
416 insertion = (0, Generic.Prompt, line[:idx])
415 return mode, code, insertion
417 return mode, code, insertion
416
418
417 # Check for traceback
419 # Check for traceback
418 if self.ipytb_start.match(line):
420 if self.ipytb_start.match(line):
419 mode = 'tb'
421 mode = 'tb'
420 code = line
422 code = line
421 insertion = None
423 insertion = None
422 return mode, code, insertion
424 return mode, code, insertion
423
425
424 # All other stuff...
426 # All other stuff...
425 if self.mode in ('input', 'output'):
427 if self.mode in ('input', 'output'):
426 # We assume all other text is output. Multiline input that
428 # We assume all other text is output. Multiline input that
427 # does not use the continuation marker cannot be detected.
429 # does not use the continuation marker cannot be detected.
428 # For example, the 3 in the following is clearly output:
430 # For example, the 3 in the following is clearly output:
429 #
431 #
430 # In [1]: print 3
432 # In [1]: print 3
431 # 3
433 # 3
432 #
434 #
433 # But the following second line is part of the input:
435 # But the following second line is part of the input:
434 #
436 #
435 # In [2]: while True:
437 # In [2]: while True:
436 # print True
438 # print True
437 #
439 #
438 # In both cases, the 2nd line will be 'output'.
440 # In both cases, the 2nd line will be 'output'.
439 #
441 #
440 mode = 'output'
442 mode = 'output'
441 else:
443 else:
442 mode = 'tb'
444 mode = 'tb'
443
445
444 code = line
446 code = line
445 insertion = None
447 insertion = None
446
448
447 return mode, code, insertion
449 return mode, code, insertion
448
450
449 def get_tokens_unprocessed(self, text):
451 def get_tokens_unprocessed(self, text):
450 self.reset()
452 self.reset()
451 for match in line_re.finditer(text):
453 for match in line_re.finditer(text):
452 line = match.group()
454 line = match.group()
453 mode, code, insertion = self.get_mci(line)
455 mode, code, insertion = self.get_mci(line)
454
456
455 if mode != self.mode:
457 if mode != self.mode:
456 # Yield buffered tokens before transitioning to new mode.
458 # Yield buffered tokens before transitioning to new mode.
457 for token in self.buffered_tokens():
459 for token in self.buffered_tokens():
458 yield token
460 yield token
459 self.mode = mode
461 self.mode = mode
460
462
461 if insertion:
463 if insertion:
462 self.insertions.append((len(self.buffer), [insertion]))
464 self.insertions.append((len(self.buffer), [insertion]))
463 self.buffer += code
465 self.buffer += code
464 else:
466 else:
465 for token in self.buffered_tokens():
467 for token in self.buffered_tokens():
466 yield token
468 yield token
467
469
468 class IPyLexer(Lexer):
470 class IPyLexer(Lexer):
469 """
471 """
470 Primary lexer for all IPython-like code.
472 Primary lexer for all IPython-like code.
471
473
472 This is a simple helper lexer. If the first line of the text begins with
474 This is a simple helper lexer. If the first line of the text begins with
473 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
475 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
474 lexer. If not, then the entire text is parsed with an IPython lexer.
476 lexer. If not, then the entire text is parsed with an IPython lexer.
475
477
476 The goal is to reduce the number of lexers that are registered
478 The goal is to reduce the number of lexers that are registered
477 with Pygments.
479 with Pygments.
478
480
479 """
481 """
480 name = 'IPy session'
482 name = 'IPy session'
481 aliases = ['ipy']
483 aliases = ['ipy']
482
484
483 def __init__(self, **options):
485 def __init__(self, **options):
484 self.python3 = get_bool_opt(options, 'python3', False)
486 self.python3 = get_bool_opt(options, 'python3', False)
485 if self.python3:
487 if self.python3:
486 self.aliases = ['ipy3']
488 self.aliases = ['ipy3']
487 else:
489 else:
488 self.aliases = ['ipy2', 'ipy']
490 self.aliases = ['ipy2', 'ipy']
489
491
490 Lexer.__init__(self, **options)
492 Lexer.__init__(self, **options)
491
493
492 self.IPythonLexer = IPythonLexer(**options)
494 self.IPythonLexer = IPythonLexer(**options)
493 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
495 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
494
496
495 def get_tokens_unprocessed(self, text):
497 def get_tokens_unprocessed(self, text):
496 # Search for the input prompt anywhere...this allows code blocks to
498 # Search for the input prompt anywhere...this allows code blocks to
497 # begin with comments as well.
499 # begin with comments as well.
498 if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
500 if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
499 lex = self.IPythonConsoleLexer
501 lex = self.IPythonConsoleLexer
500 else:
502 else:
501 lex = self.IPythonLexer
503 lex = self.IPythonLexer
502 for token in lex.get_tokens_unprocessed(text):
504 for token in lex.get_tokens_unprocessed(text):
503 yield token
505 yield token
504
506
@@ -1,94 +1,103 b''
1 """Test lexers module"""
1 """Test lexers module"""
2 #-----------------------------------------------------------------------------
2 #-----------------------------------------------------------------------------
3 # Copyright (C) 2014 The IPython Development Team
3 # Copyright (C) 2014 The IPython Development Team
4 #
4 #
5 # Distributed under the terms of the BSD License. The full license is in
5 # Distributed under the terms of the BSD License. The full license is in
6 # the file COPYING, distributed as part of this software.
6 # the file COPYING, distributed as part of this software.
7 #-----------------------------------------------------------------------------
7 #-----------------------------------------------------------------------------
8
8
9 #-----------------------------------------------------------------------------
9 #-----------------------------------------------------------------------------
10 # Imports
10 # Imports
11 #-----------------------------------------------------------------------------
11 #-----------------------------------------------------------------------------
12 from pygments.token import Token
12 from pygments.token import Token
13
13
14 from IPython.nbconvert.tests.base import TestsBase
14 from IPython.nbconvert.tests.base import TestsBase
15 from .. import lexers
15 from .. import lexers
16
16
17
17
18 #-----------------------------------------------------------------------------
18 #-----------------------------------------------------------------------------
19 # Classes and functions
19 # Classes and functions
20 #-----------------------------------------------------------------------------
20 #-----------------------------------------------------------------------------
21 class TestLexers(TestsBase):
21 class TestLexers(TestsBase):
22 """Collection of lexers tests"""
22 """Collection of lexers tests"""
23 def setUp(self):
23 def setUp(self):
24 self.lexer = lexers.IPythonLexer()
24 self.lexer = lexers.IPythonLexer()
25
25
26 def testIPythonLexer(self):
26 def testIPythonLexer(self):
27 fragment = '!echo $HOME\n'
27 fragment = '!echo $HOME\n'
28 tokens = [
28 tokens = [
29 (Token.Operator, '!'),
29 (Token.Operator, '!'),
30 (Token.Name.Builtin, 'echo'),
30 (Token.Name.Builtin, 'echo'),
31 (Token.Text, ' '),
31 (Token.Text, ' '),
32 (Token.Name.Variable, '$HOME'),
32 (Token.Name.Variable, '$HOME'),
33 (Token.Text, '\n'),
33 (Token.Text, '\n'),
34 ]
34 ]
35 self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
35 self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
36
36
37 fragment_2 = '!' + fragment
37 fragment_2 = '!' + fragment
38 tokens_2 = [
38 tokens_2 = [
39 (Token.Operator, '!!'),
39 (Token.Operator, '!!'),
40 ] + tokens[1:]
40 ] + tokens[1:]
41 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
41 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
42
42
43 fragment_2 = 'x = ' + fragment
43 fragment_2 = 'x = ' + fragment
44 tokens_2 = [
44 tokens_2 = [
45 (Token.Name, 'x'),
45 (Token.Name, 'x'),
46 (Token.Text, ' '),
46 (Token.Text, ' '),
47 (Token.Operator, '='),
47 (Token.Operator, '='),
48 (Token.Text, ' '),
48 (Token.Text, ' '),
49 ] + tokens
49 ] + tokens
50 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
50 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
51
51
52 fragment_2 = 'x, = ' + fragment
52 fragment_2 = 'x, = ' + fragment
53 tokens_2 = [
53 tokens_2 = [
54 (Token.Name, 'x'),
54 (Token.Name, 'x'),
55 (Token.Punctuation, ','),
55 (Token.Punctuation, ','),
56 (Token.Text, ' '),
56 (Token.Text, ' '),
57 (Token.Operator, '='),
57 (Token.Operator, '='),
58 (Token.Text, ' '),
58 (Token.Text, ' '),
59 ] + tokens
59 ] + tokens
60 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
60 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
61
61
62 fragment_2 = 'x, = %sx ' + fragment[1:]
62 fragment_2 = 'x, = %sx ' + fragment[1:]
63 tokens_2 = [
63 tokens_2 = [
64 (Token.Name, 'x'),
64 (Token.Name, 'x'),
65 (Token.Punctuation, ','),
65 (Token.Punctuation, ','),
66 (Token.Text, ' '),
66 (Token.Text, ' '),
67 (Token.Operator, '='),
67 (Token.Operator, '='),
68 (Token.Text, ' '),
68 (Token.Text, ' '),
69 (Token.Operator, '%'),
69 (Token.Operator, '%'),
70 (Token.Keyword, 'sx'),
70 (Token.Keyword, 'sx'),
71 (Token.Text, ' '),
71 (Token.Text, ' '),
72 ] + tokens[1:]
72 ] + tokens[1:]
73 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
73 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
74
74
75 fragment_2 = 'f = %R function () {}\n'
75 fragment_2 = 'f = %R function () {}\n'
76 tokens_2 = [
76 tokens_2 = [
77 (Token.Name, 'f'),
77 (Token.Name, 'f'),
78 (Token.Text, ' '),
78 (Token.Text, ' '),
79 (Token.Operator, '='),
79 (Token.Operator, '='),
80 (Token.Text, ' '),
80 (Token.Text, ' '),
81 (Token.Operator, '%'),
81 (Token.Operator, '%'),
82 (Token.Keyword, 'R'),
82 (Token.Keyword, 'R'),
83 (Token.Text, ' function () {}\n'),
83 (Token.Text, ' function () {}\n'),
84 ]
84 ]
85 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
85 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
86
86
87 fragment_2 = '\t%%xyz\n$foo\n'
87 fragment_2 = '\t%%xyz\n$foo\n'
88 tokens_2 = [
88 tokens_2 = [
89 (Token.Text, '\t'),
89 (Token.Text, '\t'),
90 (Token.Operator, '%%'),
90 (Token.Operator, '%%'),
91 (Token.Keyword, 'xyz'),
91 (Token.Keyword, 'xyz'),
92 (Token.Text, '\n$foo\n'),
92 (Token.Text, '\n$foo\n'),
93 ]
93 ]
94 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
94 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
95
96 fragment_2 = '%system?\n'
97 tokens_2 = [
98 (Token.Operator, '%'),
99 (Token.Keyword, 'system'),
100 (Token.Operator, '?'),
101 (Token.Text, '\n'),
102 ]
103 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
General Comments 0
You need to be logged in to leave comments. Login now