##// END OF EJS Templates
Add ipython_tokens for syntax highlighting following cell magic
Jörg Dietrich -
Show More
@@ -1,512 +1,531 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 """
2 """
3 Defines a variety of Pygments lexers for highlighting IPython code.
3 Defines a variety of Pygments lexers for highlighting IPython code.
4
4
5 This includes:
5 This includes:
6
6
7 IPythonLexer, IPython3Lexer
7 IPythonLexer, IPython3Lexer
8 Lexers for pure IPython (python + magic/shell commands)
8 Lexers for pure IPython (python + magic/shell commands)
9
9
10 IPythonPartialTracebackLexer, IPythonTracebackLexer
10 IPythonPartialTracebackLexer, IPythonTracebackLexer
11 Supports 2.x and 3.x via keyword `python3`. The partial traceback
11 Supports 2.x and 3.x via keyword `python3`. The partial traceback
12 lexer reads everything but the Python code appearing in a traceback.
12 lexer reads everything but the Python code appearing in a traceback.
13 The full lexer combines the partial lexer with an IPython lexer.
13 The full lexer combines the partial lexer with an IPython lexer.
14
14
15 IPythonConsoleLexer
15 IPythonConsoleLexer
16 A lexer for IPython console sessions, with support for tracebacks.
16 A lexer for IPython console sessions, with support for tracebacks.
17
17
18 IPyLexer
18 IPyLexer
19 A friendly lexer which examines the first line of text and from it,
19 A friendly lexer which examines the first line of text and from it,
20 decides whether to use an IPython lexer or an IPython console lexer.
20 decides whether to use an IPython lexer or an IPython console lexer.
21 This is probably the only lexer that needs to be explicitly added
21 This is probably the only lexer that needs to be explicitly added
22 to Pygments.
22 to Pygments.
23
23
24 """
24 """
25 #-----------------------------------------------------------------------------
25 #-----------------------------------------------------------------------------
26 # Copyright (c) 2013, the IPython Development Team.
26 # Copyright (c) 2013, the IPython Development Team.
27 #
27 #
28 # Distributed under the terms of the Modified BSD License.
28 # Distributed under the terms of the Modified BSD License.
29 #
29 #
30 # The full license is in the file COPYING.txt, distributed with this software.
30 # The full license is in the file COPYING.txt, distributed with this software.
31 #-----------------------------------------------------------------------------
31 #-----------------------------------------------------------------------------
32
32
33 # Standard library
33 # Standard library
34 import re
34 import re
35
35
36 # Third party
36 # Third party
37 from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
37 from pygments.lexers import (
38 BashLexer, HtmlLexer, JavascriptLexer, RubyLexer, PerlLexer, PythonLexer,
39 Python3Lexer, TexLexer)
38 from pygments.lexer import (
40 from pygments.lexer import (
39 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
41 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
40 )
42 )
41 from pygments.token import (
43 from pygments.token import (
42 Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
44 Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
43 )
45 )
44 from pygments.util import get_bool_opt
46 from pygments.util import get_bool_opt
45
47
46 # Local
48 # Local
47
49
48 line_re = re.compile('.*?\n')
50 line_re = re.compile('.*?\n')
49
51
50 __all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',
52 __all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',
51 'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',
53 'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',
52 'IPythonConsoleLexer', 'IPyLexer']
54 'IPythonConsoleLexer', 'IPyLexer']
53
55
54 ipython_tokens = [
55 (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
56 (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
57 (r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),
58 (r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),
59 (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
60 using(BashLexer), Text)),
61 (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
62 (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
63 (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
64 (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),
65 (r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),
66 ]
67
56
68 def build_ipy_lexer(python3):
57 def build_ipy_lexer(python3):
69 """Builds IPython lexers depending on the value of `python3`.
58 """Builds IPython lexers depending on the value of `python3`.
70
59
71 The lexer inherits from an appropriate Python lexer and then adds
60 The lexer inherits from an appropriate Python lexer and then adds
72 information about IPython specific keywords (i.e. magic commands,
61 information about IPython specific keywords (i.e. magic commands,
73 shell commands, etc.)
62 shell commands, etc.)
74
63
75 Parameters
64 Parameters
76 ----------
65 ----------
77 python3 : bool
66 python3 : bool
78 If `True`, then build an IPython lexer from a Python 3 lexer.
67 If `True`, then build an IPython lexer from a Python 3 lexer.
79
68
80 """
69 """
81 # It would be nice to have a single IPython lexer class which takes
70 # It would be nice to have a single IPython lexer class which takes
82 # a boolean `python3`. But since there are two Python lexer classes,
71 # a boolean `python3`. But since there are two Python lexer classes,
83 # we will also have two IPython lexer classes.
72 # we will also have two IPython lexer classes.
84 if python3:
73 if python3:
85 PyLexer = Python3Lexer
74 PyLexer = Python3Lexer
86 name = 'IPython3'
75 name = 'IPython3'
87 aliases = ['ipython3']
76 aliases = ['ipython3']
88 doc = """IPython3 Lexer"""
77 doc = """IPython3 Lexer"""
89 else:
78 else:
90 PyLexer = PythonLexer
79 PyLexer = PythonLexer
91 name = 'IPython'
80 name = 'IPython'
92 aliases = ['ipython2', 'ipython']
81 aliases = ['ipython2', 'ipython']
93 doc = """IPython Lexer"""
82 doc = """IPython Lexer"""
94
83
84 ipython_tokens = [
85 (r'(?s)(\s*)(%%capture)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
86 (r'(?s)(\s*)(%%debug)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
87 (r'(?is)(\s*)(%%html)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(HtmlLexer))),
88 (r'(?s)(\s*)(%%javascript)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),
89 (r'(?s)(\s*)(%%js)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),
90 (r'(?s)(\s*)(%%latex)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(TexLexer))),
91 (r'(?s)(\s*)(%%pypy)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PerlLexer))),
92 (r'(?s)(\s*)(%%prun)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
93 (r'(?s)(\s*)(%%pypy)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
94 (r'(?s)(\s*)(%%python)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
95 (r'(?s)(\s*)(%%python2)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PythonLexer))),
96 (r'(?s)(\s*)(%%python3)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(Python3Lexer))),
97 (r'(?s)(\s*)(%%ruby)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(RubyLexer))),
98 (r'(?s)(\s*)(%%time)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
99 (r'(?s)(\s*)(%%timeit)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
100 (r'(?s)(\s*)(%%writefile)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
101 (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
102 (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
103 (r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),
104 (r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),
105 (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
106 using(BashLexer), Text)),
107 (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
108 (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
109 (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
110 (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),
111 (r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),
112 ]
113
95 tokens = PyLexer.tokens.copy()
114 tokens = PyLexer.tokens.copy()
96 tokens['root'] = ipython_tokens + tokens['root']
115 tokens['root'] = ipython_tokens + tokens['root']
97
116
98 attrs = {'name': name, 'aliases': aliases, 'filenames': [],
117 attrs = {'name': name, 'aliases': aliases, 'filenames': [],
99 '__doc__': doc, 'tokens': tokens}
118 '__doc__': doc, 'tokens': tokens}
100
119
101 return type(name, (PyLexer,), attrs)
120 return type(name, (PyLexer,), attrs)
102
121
103
122
104 IPython3Lexer = build_ipy_lexer(python3=True)
123 IPython3Lexer = build_ipy_lexer(python3=True)
105 IPythonLexer = build_ipy_lexer(python3=False)
124 IPythonLexer = build_ipy_lexer(python3=False)
106
125
107
126
108 class IPythonPartialTracebackLexer(RegexLexer):
127 class IPythonPartialTracebackLexer(RegexLexer):
109 """
128 """
110 Partial lexer for IPython tracebacks.
129 Partial lexer for IPython tracebacks.
111
130
112 Handles all the non-python output. This works for both Python 2.x and 3.x.
131 Handles all the non-python output. This works for both Python 2.x and 3.x.
113
132
114 """
133 """
115 name = 'IPython Partial Traceback'
134 name = 'IPython Partial Traceback'
116
135
117 tokens = {
136 tokens = {
118 'root': [
137 'root': [
119 # Tracebacks for syntax errors have a different style.
138 # Tracebacks for syntax errors have a different style.
120 # For both types of tracebacks, we mark the first line with
139 # For both types of tracebacks, we mark the first line with
121 # Generic.Traceback. For syntax errors, we mark the filename
140 # Generic.Traceback. For syntax errors, we mark the filename
122 # as we mark the filenames for non-syntax tracebacks.
141 # as we mark the filenames for non-syntax tracebacks.
123 #
142 #
124 # These two regexps define how IPythonConsoleLexer finds a
143 # These two regexps define how IPythonConsoleLexer finds a
125 # traceback.
144 # traceback.
126 #
145 #
127 ## Non-syntax traceback
146 ## Non-syntax traceback
128 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
147 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
129 ## Syntax traceback
148 ## Syntax traceback
130 (r'^( File)(.*)(, line )(\d+\n)',
149 (r'^( File)(.*)(, line )(\d+\n)',
131 bygroups(Generic.Traceback, Name.Namespace,
150 bygroups(Generic.Traceback, Name.Namespace,
132 Generic.Traceback, Literal.Number.Integer)),
151 Generic.Traceback, Literal.Number.Integer)),
133
152
134 # (Exception Identifier)(Whitespace)(Traceback Message)
153 # (Exception Identifier)(Whitespace)(Traceback Message)
135 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
154 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
136 bygroups(Name.Exception, Generic.Whitespace, Text)),
155 bygroups(Name.Exception, Generic.Whitespace, Text)),
137 # (Module/Filename)(Text)(Callee)(Function Signature)
156 # (Module/Filename)(Text)(Callee)(Function Signature)
138 # Better options for callee and function signature?
157 # Better options for callee and function signature?
139 (r'(.*)( in )(.*)(\(.*\)\n)',
158 (r'(.*)( in )(.*)(\(.*\)\n)',
140 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
159 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
141 # Regular line: (Whitespace)(Line Number)(Python Code)
160 # Regular line: (Whitespace)(Line Number)(Python Code)
142 (r'(\s*?)(\d+)(.*?\n)',
161 (r'(\s*?)(\d+)(.*?\n)',
143 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
162 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
144 # Emphasized line: (Arrow)(Line Number)(Python Code)
163 # Emphasized line: (Arrow)(Line Number)(Python Code)
145 # Using Exception token so arrow color matches the Exception.
164 # Using Exception token so arrow color matches the Exception.
146 (r'(-*>?\s?)(\d+)(.*?\n)',
165 (r'(-*>?\s?)(\d+)(.*?\n)',
147 bygroups(Name.Exception, Literal.Number.Integer, Other)),
166 bygroups(Name.Exception, Literal.Number.Integer, Other)),
148 # (Exception Identifier)(Message)
167 # (Exception Identifier)(Message)
149 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
168 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
150 bygroups(Name.Exception, Text)),
169 bygroups(Name.Exception, Text)),
151 # Tag everything else as Other, will be handled later.
170 # Tag everything else as Other, will be handled later.
152 (r'.*\n', Other),
171 (r'.*\n', Other),
153 ],
172 ],
154 }
173 }
155
174
156
175
157 class IPythonTracebackLexer(DelegatingLexer):
176 class IPythonTracebackLexer(DelegatingLexer):
158 """
177 """
159 IPython traceback lexer.
178 IPython traceback lexer.
160
179
161 For doctests, the tracebacks can be snipped as much as desired with the
180 For doctests, the tracebacks can be snipped as much as desired with the
162 exception to the lines that designate a traceback. For non-syntax error
181 exception to the lines that designate a traceback. For non-syntax error
163 tracebacks, this is the line of hyphens. For syntax error tracebacks,
182 tracebacks, this is the line of hyphens. For syntax error tracebacks,
164 this is the line which lists the File and line number.
183 this is the line which lists the File and line number.
165
184
166 """
185 """
167 # The lexer inherits from DelegatingLexer. The "root" lexer is an
186 # The lexer inherits from DelegatingLexer. The "root" lexer is an
168 # appropriate IPython lexer, which depends on the value of the boolean
187 # appropriate IPython lexer, which depends on the value of the boolean
169 # `python3`. First, we parse with the partial IPython traceback lexer.
188 # `python3`. First, we parse with the partial IPython traceback lexer.
170 # Then, any code marked with the "Other" token is delegated to the root
189 # Then, any code marked with the "Other" token is delegated to the root
171 # lexer.
190 # lexer.
172 #
191 #
173 name = 'IPython Traceback'
192 name = 'IPython Traceback'
174 aliases = ['ipythontb']
193 aliases = ['ipythontb']
175
194
176 def __init__(self, **options):
195 def __init__(self, **options):
177 self.python3 = get_bool_opt(options, 'python3', False)
196 self.python3 = get_bool_opt(options, 'python3', False)
178 if self.python3:
197 if self.python3:
179 self.aliases = ['ipython3tb']
198 self.aliases = ['ipython3tb']
180 else:
199 else:
181 self.aliases = ['ipython2tb', 'ipythontb']
200 self.aliases = ['ipython2tb', 'ipythontb']
182
201
183 if self.python3:
202 if self.python3:
184 IPyLexer = IPython3Lexer
203 IPyLexer = IPython3Lexer
185 else:
204 else:
186 IPyLexer = IPythonLexer
205 IPyLexer = IPythonLexer
187
206
188 DelegatingLexer.__init__(self, IPyLexer,
207 DelegatingLexer.__init__(self, IPyLexer,
189 IPythonPartialTracebackLexer, **options)
208 IPythonPartialTracebackLexer, **options)
190
209
191 class IPythonConsoleLexer(Lexer):
210 class IPythonConsoleLexer(Lexer):
192 """
211 """
193 An IPython console lexer for IPython code-blocks and doctests, such as:
212 An IPython console lexer for IPython code-blocks and doctests, such as:
194
213
195 .. code-block:: rst
214 .. code-block:: rst
196
215
197 .. code-block:: ipythonconsole
216 .. code-block:: ipythonconsole
198
217
199 In [1]: a = 'foo'
218 In [1]: a = 'foo'
200
219
201 In [2]: a
220 In [2]: a
202 Out[2]: 'foo'
221 Out[2]: 'foo'
203
222
204 In [3]: print a
223 In [3]: print a
205 foo
224 foo
206
225
207 In [4]: 1 / 0
226 In [4]: 1 / 0
208
227
209
228
210 Support is also provided for IPython exceptions:
229 Support is also provided for IPython exceptions:
211
230
212 .. code-block:: rst
231 .. code-block:: rst
213
232
214 .. code-block:: ipythonconsole
233 .. code-block:: ipythonconsole
215
234
216 In [1]: raise Exception
235 In [1]: raise Exception
217
236
218 ---------------------------------------------------------------------------
237 ---------------------------------------------------------------------------
219 Exception Traceback (most recent call last)
238 Exception Traceback (most recent call last)
220 <ipython-input-1-fca2ab0ca76b> in <module>()
239 <ipython-input-1-fca2ab0ca76b> in <module>()
221 ----> 1 raise Exception
240 ----> 1 raise Exception
222
241
223 Exception:
242 Exception:
224
243
225 """
244 """
226 name = 'IPython console session'
245 name = 'IPython console session'
227 aliases = ['ipythonconsole']
246 aliases = ['ipythonconsole']
228 mimetypes = ['text/x-ipython-console']
247 mimetypes = ['text/x-ipython-console']
229
248
230 # The regexps used to determine what is input and what is output.
249 # The regexps used to determine what is input and what is output.
231 # The default prompts for IPython are:
250 # The default prompts for IPython are:
232 #
251 #
233 # in = 'In [#]: '
252 # in = 'In [#]: '
234 # continuation = ' .D.: '
253 # continuation = ' .D.: '
235 # template = 'Out[#]: '
254 # template = 'Out[#]: '
236 #
255 #
237 # Where '#' is the 'prompt number' or 'execution count' and 'D'
256 # Where '#' is the 'prompt number' or 'execution count' and 'D'
238 # D is a number of dots matching the width of the execution count
257 # D is a number of dots matching the width of the execution count
239 #
258 #
240 in1_regex = r'In \[[0-9]+\]: '
259 in1_regex = r'In \[[0-9]+\]: '
241 in2_regex = r' \.\.+\.: '
260 in2_regex = r' \.\.+\.: '
242 out_regex = r'Out\[[0-9]+\]: '
261 out_regex = r'Out\[[0-9]+\]: '
243
262
244 #: The regex to determine when a traceback starts.
263 #: The regex to determine when a traceback starts.
245 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
264 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
246
265
247 def __init__(self, **options):
266 def __init__(self, **options):
248 """Initialize the IPython console lexer.
267 """Initialize the IPython console lexer.
249
268
250 Parameters
269 Parameters
251 ----------
270 ----------
252 python3 : bool
271 python3 : bool
253 If `True`, then the console inputs are parsed using a Python 3
272 If `True`, then the console inputs are parsed using a Python 3
254 lexer. Otherwise, they are parsed using a Python 2 lexer.
273 lexer. Otherwise, they are parsed using a Python 2 lexer.
255 in1_regex : RegexObject
274 in1_regex : RegexObject
256 The compiled regular expression used to detect the start
275 The compiled regular expression used to detect the start
257 of inputs. Although the IPython configuration setting may have a
276 of inputs. Although the IPython configuration setting may have a
258 trailing whitespace, do not include it in the regex. If `None`,
277 trailing whitespace, do not include it in the regex. If `None`,
259 then the default input prompt is assumed.
278 then the default input prompt is assumed.
260 in2_regex : RegexObject
279 in2_regex : RegexObject
261 The compiled regular expression used to detect the continuation
280 The compiled regular expression used to detect the continuation
262 of inputs. Although the IPython configuration setting may have a
281 of inputs. Although the IPython configuration setting may have a
263 trailing whitespace, do not include it in the regex. If `None`,
282 trailing whitespace, do not include it in the regex. If `None`,
264 then the default input prompt is assumed.
283 then the default input prompt is assumed.
265 out_regex : RegexObject
284 out_regex : RegexObject
266 The compiled regular expression used to detect outputs. If `None`,
285 The compiled regular expression used to detect outputs. If `None`,
267 then the default output prompt is assumed.
286 then the default output prompt is assumed.
268
287
269 """
288 """
270 self.python3 = get_bool_opt(options, 'python3', False)
289 self.python3 = get_bool_opt(options, 'python3', False)
271 if self.python3:
290 if self.python3:
272 self.aliases = ['ipython3console']
291 self.aliases = ['ipython3console']
273 else:
292 else:
274 self.aliases = ['ipython2console', 'ipythonconsole']
293 self.aliases = ['ipython2console', 'ipythonconsole']
275
294
276 in1_regex = options.get('in1_regex', self.in1_regex)
295 in1_regex = options.get('in1_regex', self.in1_regex)
277 in2_regex = options.get('in2_regex', self.in2_regex)
296 in2_regex = options.get('in2_regex', self.in2_regex)
278 out_regex = options.get('out_regex', self.out_regex)
297 out_regex = options.get('out_regex', self.out_regex)
279
298
280 # So that we can work with input and output prompts which have been
299 # So that we can work with input and output prompts which have been
281 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
300 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
282 # we do not do this, then such prompts will be tagged as 'output'.
301 # we do not do this, then such prompts will be tagged as 'output'.
283 # The reason can't just use the rstrip'd variants instead is because
302 # The reason can't just use the rstrip'd variants instead is because
284 # we want any whitespace associated with the prompt to be inserted
303 # we want any whitespace associated with the prompt to be inserted
285 # with the token. This allows formatted code to be modified so as hide
304 # with the token. This allows formatted code to be modified so as hide
286 # the appearance of prompts, with the whitespace included. One example
305 # the appearance of prompts, with the whitespace included. One example
287 # use of this is in copybutton.js from the standard lib Python docs.
306 # use of this is in copybutton.js from the standard lib Python docs.
288 in1_regex_rstrip = in1_regex.rstrip() + '\n'
307 in1_regex_rstrip = in1_regex.rstrip() + '\n'
289 in2_regex_rstrip = in2_regex.rstrip() + '\n'
308 in2_regex_rstrip = in2_regex.rstrip() + '\n'
290 out_regex_rstrip = out_regex.rstrip() + '\n'
309 out_regex_rstrip = out_regex.rstrip() + '\n'
291
310
292 # Compile and save them all.
311 # Compile and save them all.
293 attrs = ['in1_regex', 'in2_regex', 'out_regex',
312 attrs = ['in1_regex', 'in2_regex', 'out_regex',
294 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
313 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
295 for attr in attrs:
314 for attr in attrs:
296 self.__setattr__(attr, re.compile(locals()[attr]))
315 self.__setattr__(attr, re.compile(locals()[attr]))
297
316
298 Lexer.__init__(self, **options)
317 Lexer.__init__(self, **options)
299
318
300 if self.python3:
319 if self.python3:
301 pylexer = IPython3Lexer
320 pylexer = IPython3Lexer
302 tblexer = IPythonTracebackLexer
321 tblexer = IPythonTracebackLexer
303 else:
322 else:
304 pylexer = IPythonLexer
323 pylexer = IPythonLexer
305 tblexer = IPythonTracebackLexer
324 tblexer = IPythonTracebackLexer
306
325
307 self.pylexer = pylexer(**options)
326 self.pylexer = pylexer(**options)
308 self.tblexer = tblexer(**options)
327 self.tblexer = tblexer(**options)
309
328
310 self.reset()
329 self.reset()
311
330
312 def reset(self):
331 def reset(self):
313 self.mode = 'output'
332 self.mode = 'output'
314 self.index = 0
333 self.index = 0
315 self.buffer = u''
334 self.buffer = u''
316 self.insertions = []
335 self.insertions = []
317
336
318 def buffered_tokens(self):
337 def buffered_tokens(self):
319 """
338 """
320 Generator of unprocessed tokens after doing insertions and before
339 Generator of unprocessed tokens after doing insertions and before
321 changing to a new state.
340 changing to a new state.
322
341
323 """
342 """
324 if self.mode == 'output':
343 if self.mode == 'output':
325 tokens = [(0, Generic.Output, self.buffer)]
344 tokens = [(0, Generic.Output, self.buffer)]
326 elif self.mode == 'input':
345 elif self.mode == 'input':
327 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
346 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
328 else: # traceback
347 else: # traceback
329 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
348 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
330
349
331 for i, t, v in do_insertions(self.insertions, tokens):
350 for i, t, v in do_insertions(self.insertions, tokens):
332 # All token indexes are relative to the buffer.
351 # All token indexes are relative to the buffer.
333 yield self.index + i, t, v
352 yield self.index + i, t, v
334
353
335 # Clear it all
354 # Clear it all
336 self.index += len(self.buffer)
355 self.index += len(self.buffer)
337 self.buffer = u''
356 self.buffer = u''
338 self.insertions = []
357 self.insertions = []
339
358
340 def get_mci(self, line):
359 def get_mci(self, line):
341 """
360 """
342 Parses the line and returns a 3-tuple: (mode, code, insertion).
361 Parses the line and returns a 3-tuple: (mode, code, insertion).
343
362
344 `mode` is the next mode (or state) of the lexer, and is always equal
363 `mode` is the next mode (or state) of the lexer, and is always equal
345 to 'input', 'output', or 'tb'.
364 to 'input', 'output', or 'tb'.
346
365
347 `code` is a portion of the line that should be added to the buffer
366 `code` is a portion of the line that should be added to the buffer
348 corresponding to the next mode and eventually lexed by another lexer.
367 corresponding to the next mode and eventually lexed by another lexer.
349 For example, `code` could be Python code if `mode` were 'input'.
368 For example, `code` could be Python code if `mode` were 'input'.
350
369
351 `insertion` is a 3-tuple (index, token, text) representing an
370 `insertion` is a 3-tuple (index, token, text) representing an
352 unprocessed "token" that will be inserted into the stream of tokens
371 unprocessed "token" that will be inserted into the stream of tokens
353 that are created from the buffer once we change modes. This is usually
372 that are created from the buffer once we change modes. This is usually
354 the input or output prompt.
373 the input or output prompt.
355
374
356 In general, the next mode depends on current mode and on the contents
375 In general, the next mode depends on current mode and on the contents
357 of `line`.
376 of `line`.
358
377
359 """
378 """
360 # To reduce the number of regex match checks, we have multiple
379 # To reduce the number of regex match checks, we have multiple
361 # 'if' blocks instead of 'if-elif' blocks.
380 # 'if' blocks instead of 'if-elif' blocks.
362
381
363 # Check for possible end of input
382 # Check for possible end of input
364 in2_match = self.in2_regex.match(line)
383 in2_match = self.in2_regex.match(line)
365 in2_match_rstrip = self.in2_regex_rstrip.match(line)
384 in2_match_rstrip = self.in2_regex_rstrip.match(line)
366 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
385 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
367 in2_match_rstrip:
386 in2_match_rstrip:
368 end_input = True
387 end_input = True
369 else:
388 else:
370 end_input = False
389 end_input = False
371 if end_input and self.mode != 'tb':
390 if end_input and self.mode != 'tb':
372 # Only look for an end of input when not in tb mode.
391 # Only look for an end of input when not in tb mode.
373 # An ellipsis could appear within the traceback.
392 # An ellipsis could appear within the traceback.
374 mode = 'output'
393 mode = 'output'
375 code = u''
394 code = u''
376 insertion = (0, Generic.Prompt, line)
395 insertion = (0, Generic.Prompt, line)
377 return mode, code, insertion
396 return mode, code, insertion
378
397
379 # Check for output prompt
398 # Check for output prompt
380 out_match = self.out_regex.match(line)
399 out_match = self.out_regex.match(line)
381 out_match_rstrip = self.out_regex_rstrip.match(line)
400 out_match_rstrip = self.out_regex_rstrip.match(line)
382 if out_match or out_match_rstrip:
401 if out_match or out_match_rstrip:
383 mode = 'output'
402 mode = 'output'
384 if out_match:
403 if out_match:
385 idx = out_match.end()
404 idx = out_match.end()
386 else:
405 else:
387 idx = out_match_rstrip.end()
406 idx = out_match_rstrip.end()
388 code = line[idx:]
407 code = line[idx:]
389 # Use the 'heading' token for output. We cannot use Generic.Error
408 # Use the 'heading' token for output. We cannot use Generic.Error
390 # since it would conflict with exceptions.
409 # since it would conflict with exceptions.
391 insertion = (0, Generic.Heading, line[:idx])
410 insertion = (0, Generic.Heading, line[:idx])
392 return mode, code, insertion
411 return mode, code, insertion
393
412
394
413
395 # Check for input or continuation prompt (non stripped version)
414 # Check for input or continuation prompt (non stripped version)
396 in1_match = self.in1_regex.match(line)
415 in1_match = self.in1_regex.match(line)
397 if in1_match or (in2_match and self.mode != 'tb'):
416 if in1_match or (in2_match and self.mode != 'tb'):
398 # New input or when not in tb, continued input.
417 # New input or when not in tb, continued input.
399 # We do not check for continued input when in tb since it is
418 # We do not check for continued input when in tb since it is
400 # allowable to replace a long stack with an ellipsis.
419 # allowable to replace a long stack with an ellipsis.
401 mode = 'input'
420 mode = 'input'
402 if in1_match:
421 if in1_match:
403 idx = in1_match.end()
422 idx = in1_match.end()
404 else: # in2_match
423 else: # in2_match
405 idx = in2_match.end()
424 idx = in2_match.end()
406 code = line[idx:]
425 code = line[idx:]
407 insertion = (0, Generic.Prompt, line[:idx])
426 insertion = (0, Generic.Prompt, line[:idx])
408 return mode, code, insertion
427 return mode, code, insertion
409
428
410 # Check for input or continuation prompt (stripped version)
429 # Check for input or continuation prompt (stripped version)
411 in1_match_rstrip = self.in1_regex_rstrip.match(line)
430 in1_match_rstrip = self.in1_regex_rstrip.match(line)
412 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
431 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
413 # New input or when not in tb, continued input.
432 # New input or when not in tb, continued input.
414 # We do not check for continued input when in tb since it is
433 # We do not check for continued input when in tb since it is
415 # allowable to replace a long stack with an ellipsis.
434 # allowable to replace a long stack with an ellipsis.
416 mode = 'input'
435 mode = 'input'
417 if in1_match_rstrip:
436 if in1_match_rstrip:
418 idx = in1_match_rstrip.end()
437 idx = in1_match_rstrip.end()
419 else: # in2_match
438 else: # in2_match
420 idx = in2_match_rstrip.end()
439 idx = in2_match_rstrip.end()
421 code = line[idx:]
440 code = line[idx:]
422 insertion = (0, Generic.Prompt, line[:idx])
441 insertion = (0, Generic.Prompt, line[:idx])
423 return mode, code, insertion
442 return mode, code, insertion
424
443
425 # Check for traceback
444 # Check for traceback
426 if self.ipytb_start.match(line):
445 if self.ipytb_start.match(line):
427 mode = 'tb'
446 mode = 'tb'
428 code = line
447 code = line
429 insertion = None
448 insertion = None
430 return mode, code, insertion
449 return mode, code, insertion
431
450
432 # All other stuff...
451 # All other stuff...
433 if self.mode in ('input', 'output'):
452 if self.mode in ('input', 'output'):
434 # We assume all other text is output. Multiline input that
453 # We assume all other text is output. Multiline input that
435 # does not use the continuation marker cannot be detected.
454 # does not use the continuation marker cannot be detected.
436 # For example, the 3 in the following is clearly output:
455 # For example, the 3 in the following is clearly output:
437 #
456 #
438 # In [1]: print 3
457 # In [1]: print 3
439 # 3
458 # 3
440 #
459 #
441 # But the following second line is part of the input:
460 # But the following second line is part of the input:
442 #
461 #
443 # In [2]: while True:
462 # In [2]: while True:
444 # print True
463 # print True
445 #
464 #
446 # In both cases, the 2nd line will be 'output'.
465 # In both cases, the 2nd line will be 'output'.
447 #
466 #
448 mode = 'output'
467 mode = 'output'
449 else:
468 else:
450 mode = 'tb'
469 mode = 'tb'
451
470
452 code = line
471 code = line
453 insertion = None
472 insertion = None
454
473
455 return mode, code, insertion
474 return mode, code, insertion
456
475
457 def get_tokens_unprocessed(self, text):
476 def get_tokens_unprocessed(self, text):
458 self.reset()
477 self.reset()
459 for match in line_re.finditer(text):
478 for match in line_re.finditer(text):
460 line = match.group()
479 line = match.group()
461 mode, code, insertion = self.get_mci(line)
480 mode, code, insertion = self.get_mci(line)
462
481
463 if mode != self.mode:
482 if mode != self.mode:
464 # Yield buffered tokens before transitioning to new mode.
483 # Yield buffered tokens before transitioning to new mode.
465 for token in self.buffered_tokens():
484 for token in self.buffered_tokens():
466 yield token
485 yield token
467 self.mode = mode
486 self.mode = mode
468
487
469 if insertion:
488 if insertion:
470 self.insertions.append((len(self.buffer), [insertion]))
489 self.insertions.append((len(self.buffer), [insertion]))
471 self.buffer += code
490 self.buffer += code
472
491
473 for token in self.buffered_tokens():
492 for token in self.buffered_tokens():
474 yield token
493 yield token
475
494
476 class IPyLexer(Lexer):
495 class IPyLexer(Lexer):
477 """
496 """
478 Primary lexer for all IPython-like code.
497 Primary lexer for all IPython-like code.
479
498
480 This is a simple helper lexer. If the first line of the text begins with
499 This is a simple helper lexer. If the first line of the text begins with
481 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
500 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
482 lexer. If not, then the entire text is parsed with an IPython lexer.
501 lexer. If not, then the entire text is parsed with an IPython lexer.
483
502
484 The goal is to reduce the number of lexers that are registered
503 The goal is to reduce the number of lexers that are registered
485 with Pygments.
504 with Pygments.
486
505
487 """
506 """
488 name = 'IPy session'
507 name = 'IPy session'
489 aliases = ['ipy']
508 aliases = ['ipy']
490
509
491 def __init__(self, **options):
510 def __init__(self, **options):
492 self.python3 = get_bool_opt(options, 'python3', False)
511 self.python3 = get_bool_opt(options, 'python3', False)
493 if self.python3:
512 if self.python3:
494 self.aliases = ['ipy3']
513 self.aliases = ['ipy3']
495 else:
514 else:
496 self.aliases = ['ipy2', 'ipy']
515 self.aliases = ['ipy2', 'ipy']
497
516
498 Lexer.__init__(self, **options)
517 Lexer.__init__(self, **options)
499
518
500 self.IPythonLexer = IPythonLexer(**options)
519 self.IPythonLexer = IPythonLexer(**options)
501 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
520 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
502
521
503 def get_tokens_unprocessed(self, text):
522 def get_tokens_unprocessed(self, text):
504 # Search for the input prompt anywhere...this allows code blocks to
523 # Search for the input prompt anywhere...this allows code blocks to
505 # begin with comments as well.
524 # begin with comments as well.
506 if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
525 if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
507 lex = self.IPythonConsoleLexer
526 lex = self.IPythonConsoleLexer
508 else:
527 else:
509 lex = self.IPythonLexer
528 lex = self.IPythonLexer
510 for token in lex.get_tokens_unprocessed(text):
529 for token in lex.get_tokens_unprocessed(text):
511 yield token
530 yield token
512
531
General Comments 0
You need to be logged in to leave comments. Login now