##// END OF EJS Templates
support 'foo*??' in lexer...
Min RK -
Show More
@@ -1,510 +1,511 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 """
2 """
3 Defines a variety of Pygments lexers for highlighting IPython code.
3 Defines a variety of Pygments lexers for highlighting IPython code.
4
4
5 This includes:
5 This includes:
6
6
7 IPythonLexer, IPython3Lexer
7 IPythonLexer, IPython3Lexer
8 Lexers for pure IPython (python + magic/shell commands)
8 Lexers for pure IPython (python + magic/shell commands)
9
9
10 IPythonPartialTracebackLexer, IPythonTracebackLexer
10 IPythonPartialTracebackLexer, IPythonTracebackLexer
11 Supports 2.x and 3.x via keyword `python3`. The partial traceback
11 Supports 2.x and 3.x via keyword `python3`. The partial traceback
12 lexer reads everything but the Python code appearing in a traceback.
12 lexer reads everything but the Python code appearing in a traceback.
13 The full lexer combines the partial lexer with an IPython lexer.
13 The full lexer combines the partial lexer with an IPython lexer.
14
14
15 IPythonConsoleLexer
15 IPythonConsoleLexer
16 A lexer for IPython console sessions, with support for tracebacks.
16 A lexer for IPython console sessions, with support for tracebacks.
17
17
18 IPyLexer
18 IPyLexer
19 A friendly lexer which examines the first line of text and from it,
19 A friendly lexer which examines the first line of text and from it,
20 decides whether to use an IPython lexer or an IPython console lexer.
20 decides whether to use an IPython lexer or an IPython console lexer.
21 This is probably the only lexer that needs to be explicitly added
21 This is probably the only lexer that needs to be explicitly added
22 to Pygments.
22 to Pygments.
23
23
24 """
24 """
25 #-----------------------------------------------------------------------------
25 #-----------------------------------------------------------------------------
26 # Copyright (c) 2013, the IPython Development Team.
26 # Copyright (c) 2013, the IPython Development Team.
27 #
27 #
28 # Distributed under the terms of the Modified BSD License.
28 # Distributed under the terms of the Modified BSD License.
29 #
29 #
30 # The full license is in the file COPYING.txt, distributed with this software.
30 # The full license is in the file COPYING.txt, distributed with this software.
31 #-----------------------------------------------------------------------------
31 #-----------------------------------------------------------------------------
32
32
33 # Standard library
33 # Standard library
34 import re
34 import re
35
35
36 # Third party
36 # Third party
37 from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
37 from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
38 from pygments.lexer import (
38 from pygments.lexer import (
39 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
39 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
40 )
40 )
41 from pygments.token import (
41 from pygments.token import (
42 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
42 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
43 )
43 )
44 from pygments.util import get_bool_opt
44 from pygments.util import get_bool_opt
45
45
46 # Local
46 # Local
47
47
48 line_re = re.compile('.*?\n')
48 line_re = re.compile('.*?\n')
49
49
50 __all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',
50 __all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',
51 'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',
51 'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',
52 'IPythonConsoleLexer', 'IPyLexer']
52 'IPythonConsoleLexer', 'IPyLexer']
53
53
54 ipython_tokens = [
54 ipython_tokens = [
55 (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
55 (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
56 (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
56 (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
57 (r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),
57 (r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),
58 (r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),
58 (r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),
59 (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
59 (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
60 using(BashLexer), Text)),
60 using(BashLexer), Text)),
61 (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
61 (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
62 (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
62 (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
63 (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
63 (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
64 (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),
64 (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),
65 (r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),
65 ]
66 ]
66
67
67 def build_ipy_lexer(python3):
68 def build_ipy_lexer(python3):
68 """Builds IPython lexers depending on the value of `python3`.
69 """Builds IPython lexers depending on the value of `python3`.
69
70
70 The lexer inherits from an appropriate Python lexer and then adds
71 The lexer inherits from an appropriate Python lexer and then adds
71 information about IPython specific keywords (i.e. magic commands,
72 information about IPython specific keywords (i.e. magic commands,
72 shell commands, etc.)
73 shell commands, etc.)
73
74
74 Parameters
75 Parameters
75 ----------
76 ----------
76 python3 : bool
77 python3 : bool
77 If `True`, then build an IPython lexer from a Python 3 lexer.
78 If `True`, then build an IPython lexer from a Python 3 lexer.
78
79
79 """
80 """
80 # It would be nice to have a single IPython lexer class which takes
81 # It would be nice to have a single IPython lexer class which takes
81 # a boolean `python3`. But since there are two Python lexer classes,
82 # a boolean `python3`. But since there are two Python lexer classes,
82 # we will also have two IPython lexer classes.
83 # we will also have two IPython lexer classes.
83 if python3:
84 if python3:
84 PyLexer = Python3Lexer
85 PyLexer = Python3Lexer
85 clsname = 'IPython3Lexer'
86 clsname = 'IPython3Lexer'
86 name = 'IPython3'
87 name = 'IPython3'
87 aliases = ['ipython3']
88 aliases = ['ipython3']
88 doc = """IPython3 Lexer"""
89 doc = """IPython3 Lexer"""
89 else:
90 else:
90 PyLexer = PythonLexer
91 PyLexer = PythonLexer
91 clsname = 'IPythonLexer'
92 clsname = 'IPythonLexer'
92 name = 'IPython'
93 name = 'IPython'
93 aliases = ['ipython2', 'ipython']
94 aliases = ['ipython2', 'ipython']
94 doc = """IPython Lexer"""
95 doc = """IPython Lexer"""
95
96
96 tokens = PyLexer.tokens.copy()
97 tokens = PyLexer.tokens.copy()
97 tokens['root'] = ipython_tokens + tokens['root']
98 tokens['root'] = ipython_tokens + tokens['root']
98
99
99 attrs = {'name': name, 'aliases': aliases, 'filenames': [],
100 attrs = {'name': name, 'aliases': aliases, 'filenames': [],
100 '__doc__': doc, 'tokens': tokens}
101 '__doc__': doc, 'tokens': tokens}
101
102
102 return type(name, (PyLexer,), attrs)
103 return type(name, (PyLexer,), attrs)
103
104
104
105
105 IPython3Lexer = build_ipy_lexer(python3=True)
106 IPython3Lexer = build_ipy_lexer(python3=True)
106 IPythonLexer = build_ipy_lexer(python3=False)
107 IPythonLexer = build_ipy_lexer(python3=False)
107
108
108
109
109 class IPythonPartialTracebackLexer(RegexLexer):
110 class IPythonPartialTracebackLexer(RegexLexer):
110 """
111 """
111 Partial lexer for IPython tracebacks.
112 Partial lexer for IPython tracebacks.
112
113
113 Handles all the non-python output. This works for both Python 2.x and 3.x.
114 Handles all the non-python output. This works for both Python 2.x and 3.x.
114
115
115 """
116 """
116 name = 'IPython Partial Traceback'
117 name = 'IPython Partial Traceback'
117
118
118 tokens = {
119 tokens = {
119 'root': [
120 'root': [
120 # Tracebacks for syntax errors have a different style.
121 # Tracebacks for syntax errors have a different style.
121 # For both types of tracebacks, we mark the first line with
122 # For both types of tracebacks, we mark the first line with
122 # Generic.Traceback. For syntax errors, we mark the filename
123 # Generic.Traceback. For syntax errors, we mark the filename
123 # as we mark the filenames for non-syntax tracebacks.
124 # as we mark the filenames for non-syntax tracebacks.
124 #
125 #
125 # These two regexps define how IPythonConsoleLexer finds a
126 # These two regexps define how IPythonConsoleLexer finds a
126 # traceback.
127 # traceback.
127 #
128 #
128 ## Non-syntax traceback
129 ## Non-syntax traceback
129 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
130 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
130 ## Syntax traceback
131 ## Syntax traceback
131 (r'^( File)(.*)(, line )(\d+\n)',
132 (r'^( File)(.*)(, line )(\d+\n)',
132 bygroups(Generic.Traceback, Name.Namespace,
133 bygroups(Generic.Traceback, Name.Namespace,
133 Generic.Traceback, Literal.Number.Integer)),
134 Generic.Traceback, Literal.Number.Integer)),
134
135
135 # (Exception Identifier)(Whitespace)(Traceback Message)
136 # (Exception Identifier)(Whitespace)(Traceback Message)
136 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
137 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
137 bygroups(Name.Exception, Generic.Whitespace, Text)),
138 bygroups(Name.Exception, Generic.Whitespace, Text)),
138 # (Module/Filename)(Text)(Callee)(Function Signature)
139 # (Module/Filename)(Text)(Callee)(Function Signature)
139 # Better options for callee and function signature?
140 # Better options for callee and function signature?
140 (r'(.*)( in )(.*)(\(.*\)\n)',
141 (r'(.*)( in )(.*)(\(.*\)\n)',
141 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
142 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
142 # Regular line: (Whitespace)(Line Number)(Python Code)
143 # Regular line: (Whitespace)(Line Number)(Python Code)
143 (r'(\s*?)(\d+)(.*?\n)',
144 (r'(\s*?)(\d+)(.*?\n)',
144 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
145 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
145 # Emphasized line: (Arrow)(Line Number)(Python Code)
146 # Emphasized line: (Arrow)(Line Number)(Python Code)
146 # Using Exception token so arrow color matches the Exception.
147 # Using Exception token so arrow color matches the Exception.
147 (r'(-*>?\s?)(\d+)(.*?\n)',
148 (r'(-*>?\s?)(\d+)(.*?\n)',
148 bygroups(Name.Exception, Literal.Number.Integer, Other)),
149 bygroups(Name.Exception, Literal.Number.Integer, Other)),
149 # (Exception Identifier)(Message)
150 # (Exception Identifier)(Message)
150 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
151 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
151 bygroups(Name.Exception, Text)),
152 bygroups(Name.Exception, Text)),
152 # Tag everything else as Other, will be handled later.
153 # Tag everything else as Other, will be handled later.
153 (r'.*\n', Other),
154 (r'.*\n', Other),
154 ],
155 ],
155 }
156 }
156
157
157
158
158 class IPythonTracebackLexer(DelegatingLexer):
159 class IPythonTracebackLexer(DelegatingLexer):
159 """
160 """
160 IPython traceback lexer.
161 IPython traceback lexer.
161
162
162 For doctests, the tracebacks can be snipped as much as desired with the
163 For doctests, the tracebacks can be snipped as much as desired with the
163 exception to the lines that designate a traceback. For non-syntax error
164 exception to the lines that designate a traceback. For non-syntax error
164 tracebacks, this is the line of hyphens. For syntax error tracebacks,
165 tracebacks, this is the line of hyphens. For syntax error tracebacks,
165 this is the line which lists the File and line number.
166 this is the line which lists the File and line number.
166
167
167 """
168 """
168 # The lexer inherits from DelegatingLexer. The "root" lexer is an
169 # The lexer inherits from DelegatingLexer. The "root" lexer is an
169 # appropriate IPython lexer, which depends on the value of the boolean
170 # appropriate IPython lexer, which depends on the value of the boolean
170 # `python3`. First, we parse with the partial IPython traceback lexer.
171 # `python3`. First, we parse with the partial IPython traceback lexer.
171 # Then, any code marked with the "Other" token is delegated to the root
172 # Then, any code marked with the "Other" token is delegated to the root
172 # lexer.
173 # lexer.
173 #
174 #
174 name = 'IPython Traceback'
175 name = 'IPython Traceback'
175 aliases = ['ipythontb']
176 aliases = ['ipythontb']
176
177
177 def __init__(self, **options):
178 def __init__(self, **options):
178 self.python3 = get_bool_opt(options, 'python3', False)
179 self.python3 = get_bool_opt(options, 'python3', False)
179 if self.python3:
180 if self.python3:
180 self.aliases = ['ipython3tb']
181 self.aliases = ['ipython3tb']
181 else:
182 else:
182 self.aliases = ['ipython2tb', 'ipythontb']
183 self.aliases = ['ipython2tb', 'ipythontb']
183
184
184 if self.python3:
185 if self.python3:
185 IPyLexer = IPython3Lexer
186 IPyLexer = IPython3Lexer
186 else:
187 else:
187 IPyLexer = IPythonLexer
188 IPyLexer = IPythonLexer
188
189
189 DelegatingLexer.__init__(self, IPyLexer,
190 DelegatingLexer.__init__(self, IPyLexer,
190 IPythonPartialTracebackLexer, **options)
191 IPythonPartialTracebackLexer, **options)
191
192
192 class IPythonConsoleLexer(Lexer):
193 class IPythonConsoleLexer(Lexer):
193 """
194 """
194 An IPython console lexer for IPython code-blocks and doctests, such as:
195 An IPython console lexer for IPython code-blocks and doctests, such as:
195
196
196 .. code-block:: rst
197 .. code-block:: rst
197
198
198 .. code-block:: ipythonconsole
199 .. code-block:: ipythonconsole
199
200
200 In [1]: a = 'foo'
201 In [1]: a = 'foo'
201
202
202 In [2]: a
203 In [2]: a
203 Out[2]: 'foo'
204 Out[2]: 'foo'
204
205
205 In [3]: print a
206 In [3]: print a
206 foo
207 foo
207
208
208 In [4]: 1 / 0
209 In [4]: 1 / 0
209
210
210
211
211 Support is also provided for IPython exceptions:
212 Support is also provided for IPython exceptions:
212
213
213 .. code-block:: rst
214 .. code-block:: rst
214
215
215 .. code-block:: ipythonconsole
216 .. code-block:: ipythonconsole
216
217
217 In [1]: raise Exception
218 In [1]: raise Exception
218
219
219 ---------------------------------------------------------------------------
220 ---------------------------------------------------------------------------
220 Exception Traceback (most recent call last)
221 Exception Traceback (most recent call last)
221 <ipython-input-1-fca2ab0ca76b> in <module>()
222 <ipython-input-1-fca2ab0ca76b> in <module>()
222 ----> 1 raise Exception
223 ----> 1 raise Exception
223
224
224 Exception:
225 Exception:
225
226
226 """
227 """
227 name = 'IPython console session'
228 name = 'IPython console session'
228 aliases = ['ipythonconsole']
229 aliases = ['ipythonconsole']
229 mimetypes = ['text/x-ipython-console']
230 mimetypes = ['text/x-ipython-console']
230
231
231 # The regexps used to determine what is input and what is output.
232 # The regexps used to determine what is input and what is output.
232 # The default prompts for IPython are:
233 # The default prompts for IPython are:
233 #
234 #
234 # c.PromptManager.in_template = 'In [\#]: '
235 # c.PromptManager.in_template = 'In [\#]: '
235 # c.PromptManager.in2_template = ' .\D.: '
236 # c.PromptManager.in2_template = ' .\D.: '
236 # c.PromptManager.out_template = 'Out[\#]: '
237 # c.PromptManager.out_template = 'Out[\#]: '
237 #
238 #
238 in1_regex = r'In \[[0-9]+\]: '
239 in1_regex = r'In \[[0-9]+\]: '
239 in2_regex = r' \.\.+\.: '
240 in2_regex = r' \.\.+\.: '
240 out_regex = r'Out\[[0-9]+\]: '
241 out_regex = r'Out\[[0-9]+\]: '
241
242
242 #: The regex to determine when a traceback starts.
243 #: The regex to determine when a traceback starts.
243 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
244 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
244
245
245 def __init__(self, **options):
246 def __init__(self, **options):
246 """Initialize the IPython console lexer.
247 """Initialize the IPython console lexer.
247
248
248 Parameters
249 Parameters
249 ----------
250 ----------
250 python3 : bool
251 python3 : bool
251 If `True`, then the console inputs are parsed using a Python 3
252 If `True`, then the console inputs are parsed using a Python 3
252 lexer. Otherwise, they are parsed using a Python 2 lexer.
253 lexer. Otherwise, they are parsed using a Python 2 lexer.
253 in1_regex : RegexObject
254 in1_regex : RegexObject
254 The compiled regular expression used to detect the start
255 The compiled regular expression used to detect the start
255 of inputs. Although the IPython configuration setting may have a
256 of inputs. Although the IPython configuration setting may have a
256 trailing whitespace, do not include it in the regex. If `None`,
257 trailing whitespace, do not include it in the regex. If `None`,
257 then the default input prompt is assumed.
258 then the default input prompt is assumed.
258 in2_regex : RegexObject
259 in2_regex : RegexObject
259 The compiled regular expression used to detect the continuation
260 The compiled regular expression used to detect the continuation
260 of inputs. Although the IPython configuration setting may have a
261 of inputs. Although the IPython configuration setting may have a
261 trailing whitespace, do not include it in the regex. If `None`,
262 trailing whitespace, do not include it in the regex. If `None`,
262 then the default input prompt is assumed.
263 then the default input prompt is assumed.
263 out_regex : RegexObject
264 out_regex : RegexObject
264 The compiled regular expression used to detect outputs. If `None`,
265 The compiled regular expression used to detect outputs. If `None`,
265 then the default output prompt is assumed.
266 then the default output prompt is assumed.
266
267
267 """
268 """
268 self.python3 = get_bool_opt(options, 'python3', False)
269 self.python3 = get_bool_opt(options, 'python3', False)
269 if self.python3:
270 if self.python3:
270 self.aliases = ['ipython3console']
271 self.aliases = ['ipython3console']
271 else:
272 else:
272 self.aliases = ['ipython2console', 'ipythonconsole']
273 self.aliases = ['ipython2console', 'ipythonconsole']
273
274
274 in1_regex = options.get('in1_regex', self.in1_regex)
275 in1_regex = options.get('in1_regex', self.in1_regex)
275 in2_regex = options.get('in2_regex', self.in2_regex)
276 in2_regex = options.get('in2_regex', self.in2_regex)
276 out_regex = options.get('out_regex', self.out_regex)
277 out_regex = options.get('out_regex', self.out_regex)
277
278
278 # So that we can work with input and output prompts which have been
279 # So that we can work with input and output prompts which have been
279 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
280 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
280 # we do not do this, then such prompts will be tagged as 'output'.
281 # we do not do this, then such prompts will be tagged as 'output'.
281 # The reason can't just use the rstrip'd variants instead is because
282 # The reason can't just use the rstrip'd variants instead is because
282 # we want any whitespace associated with the prompt to be inserted
283 # we want any whitespace associated with the prompt to be inserted
283 # with the token. This allows formatted code to be modified so as hide
284 # with the token. This allows formatted code to be modified so as hide
284 # the appearance of prompts, with the whitespace included. One example
285 # the appearance of prompts, with the whitespace included. One example
285 # use of this is in copybutton.js from the standard lib Python docs.
286 # use of this is in copybutton.js from the standard lib Python docs.
286 in1_regex_rstrip = in1_regex.rstrip() + '\n'
287 in1_regex_rstrip = in1_regex.rstrip() + '\n'
287 in2_regex_rstrip = in2_regex.rstrip() + '\n'
288 in2_regex_rstrip = in2_regex.rstrip() + '\n'
288 out_regex_rstrip = out_regex.rstrip() + '\n'
289 out_regex_rstrip = out_regex.rstrip() + '\n'
289
290
290 # Compile and save them all.
291 # Compile and save them all.
291 attrs = ['in1_regex', 'in2_regex', 'out_regex',
292 attrs = ['in1_regex', 'in2_regex', 'out_regex',
292 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
293 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
293 for attr in attrs:
294 for attr in attrs:
294 self.__setattr__(attr, re.compile(locals()[attr]))
295 self.__setattr__(attr, re.compile(locals()[attr]))
295
296
296 Lexer.__init__(self, **options)
297 Lexer.__init__(self, **options)
297
298
298 if self.python3:
299 if self.python3:
299 pylexer = IPython3Lexer
300 pylexer = IPython3Lexer
300 tblexer = IPythonTracebackLexer
301 tblexer = IPythonTracebackLexer
301 else:
302 else:
302 pylexer = IPythonLexer
303 pylexer = IPythonLexer
303 tblexer = IPythonTracebackLexer
304 tblexer = IPythonTracebackLexer
304
305
305 self.pylexer = pylexer(**options)
306 self.pylexer = pylexer(**options)
306 self.tblexer = tblexer(**options)
307 self.tblexer = tblexer(**options)
307
308
308 self.reset()
309 self.reset()
309
310
310 def reset(self):
311 def reset(self):
311 self.mode = 'output'
312 self.mode = 'output'
312 self.index = 0
313 self.index = 0
313 self.buffer = u''
314 self.buffer = u''
314 self.insertions = []
315 self.insertions = []
315
316
316 def buffered_tokens(self):
317 def buffered_tokens(self):
317 """
318 """
318 Generator of unprocessed tokens after doing insertions and before
319 Generator of unprocessed tokens after doing insertions and before
319 changing to a new state.
320 changing to a new state.
320
321
321 """
322 """
322 if self.mode == 'output':
323 if self.mode == 'output':
323 tokens = [(0, Generic.Output, self.buffer)]
324 tokens = [(0, Generic.Output, self.buffer)]
324 elif self.mode == 'input':
325 elif self.mode == 'input':
325 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
326 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
326 else: # traceback
327 else: # traceback
327 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
328 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
328
329
329 for i, t, v in do_insertions(self.insertions, tokens):
330 for i, t, v in do_insertions(self.insertions, tokens):
330 # All token indexes are relative to the buffer.
331 # All token indexes are relative to the buffer.
331 yield self.index + i, t, v
332 yield self.index + i, t, v
332
333
333 # Clear it all
334 # Clear it all
334 self.index += len(self.buffer)
335 self.index += len(self.buffer)
335 self.buffer = u''
336 self.buffer = u''
336 self.insertions = []
337 self.insertions = []
337
338
338 def get_mci(self, line):
339 def get_mci(self, line):
339 """
340 """
340 Parses the line and returns a 3-tuple: (mode, code, insertion).
341 Parses the line and returns a 3-tuple: (mode, code, insertion).
341
342
342 `mode` is the next mode (or state) of the lexer, and is always equal
343 `mode` is the next mode (or state) of the lexer, and is always equal
343 to 'input', 'output', or 'tb'.
344 to 'input', 'output', or 'tb'.
344
345
345 `code` is a portion of the line that should be added to the buffer
346 `code` is a portion of the line that should be added to the buffer
346 corresponding to the next mode and eventually lexed by another lexer.
347 corresponding to the next mode and eventually lexed by another lexer.
347 For example, `code` could be Python code if `mode` were 'input'.
348 For example, `code` could be Python code if `mode` were 'input'.
348
349
349 `insertion` is a 3-tuple (index, token, text) representing an
350 `insertion` is a 3-tuple (index, token, text) representing an
350 unprocessed "token" that will be inserted into the stream of tokens
351 unprocessed "token" that will be inserted into the stream of tokens
351 that are created from the buffer once we change modes. This is usually
352 that are created from the buffer once we change modes. This is usually
352 the input or output prompt.
353 the input or output prompt.
353
354
354 In general, the next mode depends on current mode and on the contents
355 In general, the next mode depends on current mode and on the contents
355 of `line`.
356 of `line`.
356
357
357 """
358 """
358 # To reduce the number of regex match checks, we have multiple
359 # To reduce the number of regex match checks, we have multiple
359 # 'if' blocks instead of 'if-elif' blocks.
360 # 'if' blocks instead of 'if-elif' blocks.
360
361
361 # Check for possible end of input
362 # Check for possible end of input
362 in2_match = self.in2_regex.match(line)
363 in2_match = self.in2_regex.match(line)
363 in2_match_rstrip = self.in2_regex_rstrip.match(line)
364 in2_match_rstrip = self.in2_regex_rstrip.match(line)
364 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
365 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
365 in2_match_rstrip:
366 in2_match_rstrip:
366 end_input = True
367 end_input = True
367 else:
368 else:
368 end_input = False
369 end_input = False
369 if end_input and self.mode != 'tb':
370 if end_input and self.mode != 'tb':
370 # Only look for an end of input when not in tb mode.
371 # Only look for an end of input when not in tb mode.
371 # An ellipsis could appear within the traceback.
372 # An ellipsis could appear within the traceback.
372 mode = 'output'
373 mode = 'output'
373 code = u''
374 code = u''
374 insertion = (0, Generic.Prompt, line)
375 insertion = (0, Generic.Prompt, line)
375 return mode, code, insertion
376 return mode, code, insertion
376
377
377 # Check for output prompt
378 # Check for output prompt
378 out_match = self.out_regex.match(line)
379 out_match = self.out_regex.match(line)
379 out_match_rstrip = self.out_regex_rstrip.match(line)
380 out_match_rstrip = self.out_regex_rstrip.match(line)
380 if out_match or out_match_rstrip:
381 if out_match or out_match_rstrip:
381 mode = 'output'
382 mode = 'output'
382 if out_match:
383 if out_match:
383 idx = out_match.end()
384 idx = out_match.end()
384 else:
385 else:
385 idx = out_match_rstrip.end()
386 idx = out_match_rstrip.end()
386 code = line[idx:]
387 code = line[idx:]
387 # Use the 'heading' token for output. We cannot use Generic.Error
388 # Use the 'heading' token for output. We cannot use Generic.Error
388 # since it would conflict with exceptions.
389 # since it would conflict with exceptions.
389 insertion = (0, Generic.Heading, line[:idx])
390 insertion = (0, Generic.Heading, line[:idx])
390 return mode, code, insertion
391 return mode, code, insertion
391
392
392
393
393 # Check for input or continuation prompt (non stripped version)
394 # Check for input or continuation prompt (non stripped version)
394 in1_match = self.in1_regex.match(line)
395 in1_match = self.in1_regex.match(line)
395 if in1_match or (in2_match and self.mode != 'tb'):
396 if in1_match or (in2_match and self.mode != 'tb'):
396 # New input or when not in tb, continued input.
397 # New input or when not in tb, continued input.
397 # We do not check for continued input when in tb since it is
398 # We do not check for continued input when in tb since it is
398 # allowable to replace a long stack with an ellipsis.
399 # allowable to replace a long stack with an ellipsis.
399 mode = 'input'
400 mode = 'input'
400 if in1_match:
401 if in1_match:
401 idx = in1_match.end()
402 idx = in1_match.end()
402 else: # in2_match
403 else: # in2_match
403 idx = in2_match.end()
404 idx = in2_match.end()
404 code = line[idx:]
405 code = line[idx:]
405 insertion = (0, Generic.Prompt, line[:idx])
406 insertion = (0, Generic.Prompt, line[:idx])
406 return mode, code, insertion
407 return mode, code, insertion
407
408
408 # Check for input or continuation prompt (stripped version)
409 # Check for input or continuation prompt (stripped version)
409 in1_match_rstrip = self.in1_regex_rstrip.match(line)
410 in1_match_rstrip = self.in1_regex_rstrip.match(line)
410 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
411 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
411 # New input or when not in tb, continued input.
412 # New input or when not in tb, continued input.
412 # We do not check for continued input when in tb since it is
413 # We do not check for continued input when in tb since it is
413 # allowable to replace a long stack with an ellipsis.
414 # allowable to replace a long stack with an ellipsis.
414 mode = 'input'
415 mode = 'input'
415 if in1_match_rstrip:
416 if in1_match_rstrip:
416 idx = in1_match_rstrip.end()
417 idx = in1_match_rstrip.end()
417 else: # in2_match
418 else: # in2_match
418 idx = in2_match_rstrip.end()
419 idx = in2_match_rstrip.end()
419 code = line[idx:]
420 code = line[idx:]
420 insertion = (0, Generic.Prompt, line[:idx])
421 insertion = (0, Generic.Prompt, line[:idx])
421 return mode, code, insertion
422 return mode, code, insertion
422
423
423 # Check for traceback
424 # Check for traceback
424 if self.ipytb_start.match(line):
425 if self.ipytb_start.match(line):
425 mode = 'tb'
426 mode = 'tb'
426 code = line
427 code = line
427 insertion = None
428 insertion = None
428 return mode, code, insertion
429 return mode, code, insertion
429
430
430 # All other stuff...
431 # All other stuff...
431 if self.mode in ('input', 'output'):
432 if self.mode in ('input', 'output'):
432 # We assume all other text is output. Multiline input that
433 # We assume all other text is output. Multiline input that
433 # does not use the continuation marker cannot be detected.
434 # does not use the continuation marker cannot be detected.
434 # For example, the 3 in the following is clearly output:
435 # For example, the 3 in the following is clearly output:
435 #
436 #
436 # In [1]: print 3
437 # In [1]: print 3
437 # 3
438 # 3
438 #
439 #
439 # But the following second line is part of the input:
440 # But the following second line is part of the input:
440 #
441 #
441 # In [2]: while True:
442 # In [2]: while True:
442 # print True
443 # print True
443 #
444 #
444 # In both cases, the 2nd line will be 'output'.
445 # In both cases, the 2nd line will be 'output'.
445 #
446 #
446 mode = 'output'
447 mode = 'output'
447 else:
448 else:
448 mode = 'tb'
449 mode = 'tb'
449
450
450 code = line
451 code = line
451 insertion = None
452 insertion = None
452
453
453 return mode, code, insertion
454 return mode, code, insertion
454
455
455 def get_tokens_unprocessed(self, text):
456 def get_tokens_unprocessed(self, text):
456 self.reset()
457 self.reset()
457 for match in line_re.finditer(text):
458 for match in line_re.finditer(text):
458 line = match.group()
459 line = match.group()
459 mode, code, insertion = self.get_mci(line)
460 mode, code, insertion = self.get_mci(line)
460
461
461 if mode != self.mode:
462 if mode != self.mode:
462 # Yield buffered tokens before transitioning to new mode.
463 # Yield buffered tokens before transitioning to new mode.
463 for token in self.buffered_tokens():
464 for token in self.buffered_tokens():
464 yield token
465 yield token
465 self.mode = mode
466 self.mode = mode
466
467
467 if insertion:
468 if insertion:
468 self.insertions.append((len(self.buffer), [insertion]))
469 self.insertions.append((len(self.buffer), [insertion]))
469 self.buffer += code
470 self.buffer += code
470 else:
471 else:
471 for token in self.buffered_tokens():
472 for token in self.buffered_tokens():
472 yield token
473 yield token
473
474
474 class IPyLexer(Lexer):
475 class IPyLexer(Lexer):
475 """
476 """
476 Primary lexer for all IPython-like code.
477 Primary lexer for all IPython-like code.
477
478
478 This is a simple helper lexer. If the first line of the text begins with
479 This is a simple helper lexer. If the first line of the text begins with
479 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
480 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
480 lexer. If not, then the entire text is parsed with an IPython lexer.
481 lexer. If not, then the entire text is parsed with an IPython lexer.
481
482
482 The goal is to reduce the number of lexers that are registered
483 The goal is to reduce the number of lexers that are registered
483 with Pygments.
484 with Pygments.
484
485
485 """
486 """
486 name = 'IPy session'
487 name = 'IPy session'
487 aliases = ['ipy']
488 aliases = ['ipy']
488
489
489 def __init__(self, **options):
490 def __init__(self, **options):
490 self.python3 = get_bool_opt(options, 'python3', False)
491 self.python3 = get_bool_opt(options, 'python3', False)
491 if self.python3:
492 if self.python3:
492 self.aliases = ['ipy3']
493 self.aliases = ['ipy3']
493 else:
494 else:
494 self.aliases = ['ipy2', 'ipy']
495 self.aliases = ['ipy2', 'ipy']
495
496
496 Lexer.__init__(self, **options)
497 Lexer.__init__(self, **options)
497
498
498 self.IPythonLexer = IPythonLexer(**options)
499 self.IPythonLexer = IPythonLexer(**options)
499 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
500 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
500
501
501 def get_tokens_unprocessed(self, text):
502 def get_tokens_unprocessed(self, text):
502 # Search for the input prompt anywhere...this allows code blocks to
503 # Search for the input prompt anywhere...this allows code blocks to
503 # begin with comments as well.
504 # begin with comments as well.
504 if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
505 if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
505 lex = self.IPythonConsoleLexer
506 lex = self.IPythonConsoleLexer
506 else:
507 else:
507 lex = self.IPythonLexer
508 lex = self.IPythonLexer
508 for token in lex.get_tokens_unprocessed(text):
509 for token in lex.get_tokens_unprocessed(text):
509 yield token
510 yield token
510
511
@@ -1,122 +1,130 b''
1 """Test lexers module"""
1 """Test lexers module"""
2
2
3 # Copyright (c) IPython Development Team.
3 # Copyright (c) IPython Development Team.
4 # Distributed under the terms of the Modified BSD License.
4 # Distributed under the terms of the Modified BSD License.
5
5
6 from unittest import TestCase
6 from unittest import TestCase
7 from pygments.token import Token
7 from pygments.token import Token
8
8
9 from .. import lexers
9 from .. import lexers
10
10
11
11
12 class TestLexers(TestCase):
12 class TestLexers(TestCase):
13 """Collection of lexers tests"""
13 """Collection of lexers tests"""
14 def setUp(self):
14 def setUp(self):
15 self.lexer = lexers.IPythonLexer()
15 self.lexer = lexers.IPythonLexer()
16
16
17 def testIPythonLexer(self):
17 def testIPythonLexer(self):
18 fragment = '!echo $HOME\n'
18 fragment = '!echo $HOME\n'
19 tokens = [
19 tokens = [
20 (Token.Operator, '!'),
20 (Token.Operator, '!'),
21 (Token.Name.Builtin, 'echo'),
21 (Token.Name.Builtin, 'echo'),
22 (Token.Text, ' '),
22 (Token.Text, ' '),
23 (Token.Name.Variable, '$HOME'),
23 (Token.Name.Variable, '$HOME'),
24 (Token.Text, '\n'),
24 (Token.Text, '\n'),
25 ]
25 ]
26 self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
26 self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
27
27
28 fragment_2 = '!' + fragment
28 fragment_2 = '!' + fragment
29 tokens_2 = [
29 tokens_2 = [
30 (Token.Operator, '!!'),
30 (Token.Operator, '!!'),
31 ] + tokens[1:]
31 ] + tokens[1:]
32 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
32 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
33
33
34 fragment_2 = '\t %%!\n' + fragment[1:]
34 fragment_2 = '\t %%!\n' + fragment[1:]
35 tokens_2 = [
35 tokens_2 = [
36 (Token.Text, '\t '),
36 (Token.Text, '\t '),
37 (Token.Operator, '%%!'),
37 (Token.Operator, '%%!'),
38 (Token.Text, '\n'),
38 (Token.Text, '\n'),
39 ] + tokens[1:]
39 ] + tokens[1:]
40 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
40 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
41
41
42 fragment_2 = 'x = ' + fragment
42 fragment_2 = 'x = ' + fragment
43 tokens_2 = [
43 tokens_2 = [
44 (Token.Name, 'x'),
44 (Token.Name, 'x'),
45 (Token.Text, ' '),
45 (Token.Text, ' '),
46 (Token.Operator, '='),
46 (Token.Operator, '='),
47 (Token.Text, ' '),
47 (Token.Text, ' '),
48 ] + tokens
48 ] + tokens
49 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
49 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
50
50
51 fragment_2 = 'x, = ' + fragment
51 fragment_2 = 'x, = ' + fragment
52 tokens_2 = [
52 tokens_2 = [
53 (Token.Name, 'x'),
53 (Token.Name, 'x'),
54 (Token.Punctuation, ','),
54 (Token.Punctuation, ','),
55 (Token.Text, ' '),
55 (Token.Text, ' '),
56 (Token.Operator, '='),
56 (Token.Operator, '='),
57 (Token.Text, ' '),
57 (Token.Text, ' '),
58 ] + tokens
58 ] + tokens
59 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
59 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
60
60
61 fragment_2 = 'x, = %sx ' + fragment[1:]
61 fragment_2 = 'x, = %sx ' + fragment[1:]
62 tokens_2 = [
62 tokens_2 = [
63 (Token.Name, 'x'),
63 (Token.Name, 'x'),
64 (Token.Punctuation, ','),
64 (Token.Punctuation, ','),
65 (Token.Text, ' '),
65 (Token.Text, ' '),
66 (Token.Operator, '='),
66 (Token.Operator, '='),
67 (Token.Text, ' '),
67 (Token.Text, ' '),
68 (Token.Operator, '%'),
68 (Token.Operator, '%'),
69 (Token.Keyword, 'sx'),
69 (Token.Keyword, 'sx'),
70 (Token.Text, ' '),
70 (Token.Text, ' '),
71 ] + tokens[1:]
71 ] + tokens[1:]
72 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
72 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
73
73
74 fragment_2 = 'f = %R function () {}\n'
74 fragment_2 = 'f = %R function () {}\n'
75 tokens_2 = [
75 tokens_2 = [
76 (Token.Name, 'f'),
76 (Token.Name, 'f'),
77 (Token.Text, ' '),
77 (Token.Text, ' '),
78 (Token.Operator, '='),
78 (Token.Operator, '='),
79 (Token.Text, ' '),
79 (Token.Text, ' '),
80 (Token.Operator, '%'),
80 (Token.Operator, '%'),
81 (Token.Keyword, 'R'),
81 (Token.Keyword, 'R'),
82 (Token.Text, ' function () {}\n'),
82 (Token.Text, ' function () {}\n'),
83 ]
83 ]
84 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
84 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
85
85
86 fragment_2 = '\t%%xyz\n$foo\n'
86 fragment_2 = '\t%%xyz\n$foo\n'
87 tokens_2 = [
87 tokens_2 = [
88 (Token.Text, '\t'),
88 (Token.Text, '\t'),
89 (Token.Operator, '%%'),
89 (Token.Operator, '%%'),
90 (Token.Keyword, 'xyz'),
90 (Token.Keyword, 'xyz'),
91 (Token.Text, '\n$foo\n'),
91 (Token.Text, '\n$foo\n'),
92 ]
92 ]
93 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
93 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
94
94
95 fragment_2 = '%system?\n'
95 fragment_2 = '%system?\n'
96 tokens_2 = [
96 tokens_2 = [
97 (Token.Operator, '%'),
97 (Token.Operator, '%'),
98 (Token.Keyword, 'system'),
98 (Token.Keyword, 'system'),
99 (Token.Operator, '?'),
99 (Token.Operator, '?'),
100 (Token.Text, '\n'),
100 (Token.Text, '\n'),
101 ]
101 ]
102 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
102 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
103
103
104 fragment_2 = 'x != y\n'
104 fragment_2 = 'x != y\n'
105 tokens_2 = [
105 tokens_2 = [
106 (Token.Name, 'x'),
106 (Token.Name, 'x'),
107 (Token.Text, ' '),
107 (Token.Text, ' '),
108 (Token.Operator, '!='),
108 (Token.Operator, '!='),
109 (Token.Text, ' '),
109 (Token.Text, ' '),
110 (Token.Name, 'y'),
110 (Token.Name, 'y'),
111 (Token.Text, '\n'),
111 (Token.Text, '\n'),
112 ]
112 ]
113 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
113 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
114
114
115 fragment_2 = ' ?math.sin\n'
115 fragment_2 = ' ?math.sin\n'
116 tokens_2 = [
116 tokens_2 = [
117 (Token.Text, ' '),
117 (Token.Text, ' '),
118 (Token.Operator, '?'),
118 (Token.Operator, '?'),
119 (Token.Text, 'math.sin'),
119 (Token.Text, 'math.sin'),
120 (Token.Text, '\n'),
120 (Token.Text, '\n'),
121 ]
121 ]
122 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
122 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
123
124 fragment = ' *int*?\n'
125 tokens = [
126 (Token.Text, ' *int*'),
127 (Token.Operator, '?'),
128 (Token.Text, '\n'),
129 ]
130 self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
General Comments 0
You need to be logged in to leave comments. Login now