##// END OF EJS Templates
Same highlighting for %%file as for %%writefile...
Matthias Geier -
Show More
@@ -1,531 +1,532 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 """
2 """
3 Defines a variety of Pygments lexers for highlighting IPython code.
3 Defines a variety of Pygments lexers for highlighting IPython code.
4
4
5 This includes:
5 This includes:
6
6
7 IPythonLexer, IPython3Lexer
7 IPythonLexer, IPython3Lexer
8 Lexers for pure IPython (python + magic/shell commands)
8 Lexers for pure IPython (python + magic/shell commands)
9
9
10 IPythonPartialTracebackLexer, IPythonTracebackLexer
10 IPythonPartialTracebackLexer, IPythonTracebackLexer
11 Supports 2.x and 3.x via keyword `python3`. The partial traceback
11 Supports 2.x and 3.x via keyword `python3`. The partial traceback
12 lexer reads everything but the Python code appearing in a traceback.
12 lexer reads everything but the Python code appearing in a traceback.
13 The full lexer combines the partial lexer with an IPython lexer.
13 The full lexer combines the partial lexer with an IPython lexer.
14
14
15 IPythonConsoleLexer
15 IPythonConsoleLexer
16 A lexer for IPython console sessions, with support for tracebacks.
16 A lexer for IPython console sessions, with support for tracebacks.
17
17
18 IPyLexer
18 IPyLexer
19 A friendly lexer which examines the first line of text and from it,
19 A friendly lexer which examines the first line of text and from it,
20 decides whether to use an IPython lexer or an IPython console lexer.
20 decides whether to use an IPython lexer or an IPython console lexer.
21 This is probably the only lexer that needs to be explicitly added
21 This is probably the only lexer that needs to be explicitly added
22 to Pygments.
22 to Pygments.
23
23
24 """
24 """
25 #-----------------------------------------------------------------------------
25 #-----------------------------------------------------------------------------
26 # Copyright (c) 2013, the IPython Development Team.
26 # Copyright (c) 2013, the IPython Development Team.
27 #
27 #
28 # Distributed under the terms of the Modified BSD License.
28 # Distributed under the terms of the Modified BSD License.
29 #
29 #
30 # The full license is in the file COPYING.txt, distributed with this software.
30 # The full license is in the file COPYING.txt, distributed with this software.
31 #-----------------------------------------------------------------------------
31 #-----------------------------------------------------------------------------
32
32
33 # Standard library
33 # Standard library
34 import re
34 import re
35
35
36 # Third party
36 # Third party
37 from pygments.lexers import (
37 from pygments.lexers import (
38 BashLexer, HtmlLexer, JavascriptLexer, RubyLexer, PerlLexer, PythonLexer,
38 BashLexer, HtmlLexer, JavascriptLexer, RubyLexer, PerlLexer, PythonLexer,
39 Python3Lexer, TexLexer)
39 Python3Lexer, TexLexer)
40 from pygments.lexer import (
40 from pygments.lexer import (
41 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
41 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
42 )
42 )
43 from pygments.token import (
43 from pygments.token import (
44 Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
44 Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
45 )
45 )
46 from pygments.util import get_bool_opt
46 from pygments.util import get_bool_opt
47
47
48 # Local
48 # Local
49
49
50 line_re = re.compile('.*?\n')
50 line_re = re.compile('.*?\n')
51
51
52 __all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',
52 __all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',
53 'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',
53 'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',
54 'IPythonConsoleLexer', 'IPyLexer']
54 'IPythonConsoleLexer', 'IPyLexer']
55
55
56
56
57 def build_ipy_lexer(python3):
57 def build_ipy_lexer(python3):
58 """Builds IPython lexers depending on the value of `python3`.
58 """Builds IPython lexers depending on the value of `python3`.
59
59
60 The lexer inherits from an appropriate Python lexer and then adds
60 The lexer inherits from an appropriate Python lexer and then adds
61 information about IPython specific keywords (i.e. magic commands,
61 information about IPython specific keywords (i.e. magic commands,
62 shell commands, etc.)
62 shell commands, etc.)
63
63
64 Parameters
64 Parameters
65 ----------
65 ----------
66 python3 : bool
66 python3 : bool
67 If `True`, then build an IPython lexer from a Python 3 lexer.
67 If `True`, then build an IPython lexer from a Python 3 lexer.
68
68
69 """
69 """
70 # It would be nice to have a single IPython lexer class which takes
70 # It would be nice to have a single IPython lexer class which takes
71 # a boolean `python3`. But since there are two Python lexer classes,
71 # a boolean `python3`. But since there are two Python lexer classes,
72 # we will also have two IPython lexer classes.
72 # we will also have two IPython lexer classes.
73 if python3:
73 if python3:
74 PyLexer = Python3Lexer
74 PyLexer = Python3Lexer
75 name = 'IPython3'
75 name = 'IPython3'
76 aliases = ['ipython3']
76 aliases = ['ipython3']
77 doc = """IPython3 Lexer"""
77 doc = """IPython3 Lexer"""
78 else:
78 else:
79 PyLexer = PythonLexer
79 PyLexer = PythonLexer
80 name = 'IPython'
80 name = 'IPython'
81 aliases = ['ipython2', 'ipython']
81 aliases = ['ipython2', 'ipython']
82 doc = """IPython Lexer"""
82 doc = """IPython Lexer"""
83
83
84 ipython_tokens = [
84 ipython_tokens = [
85 (r'(?s)(\s*)(%%capture)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
85 (r'(?s)(\s*)(%%capture)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
86 (r'(?s)(\s*)(%%debug)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
86 (r'(?s)(\s*)(%%debug)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
87 (r'(?is)(\s*)(%%html)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(HtmlLexer))),
87 (r'(?is)(\s*)(%%html)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(HtmlLexer))),
88 (r'(?s)(\s*)(%%javascript)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),
88 (r'(?s)(\s*)(%%javascript)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),
89 (r'(?s)(\s*)(%%js)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),
89 (r'(?s)(\s*)(%%js)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),
90 (r'(?s)(\s*)(%%latex)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(TexLexer))),
90 (r'(?s)(\s*)(%%latex)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(TexLexer))),
91 (r'(?s)(\s*)(%%pypy)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PerlLexer))),
91 (r'(?s)(\s*)(%%pypy)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PerlLexer))),
92 (r'(?s)(\s*)(%%prun)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
92 (r'(?s)(\s*)(%%prun)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
93 (r'(?s)(\s*)(%%pypy)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
93 (r'(?s)(\s*)(%%pypy)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
94 (r'(?s)(\s*)(%%python)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
94 (r'(?s)(\s*)(%%python)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
95 (r'(?s)(\s*)(%%python2)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PythonLexer))),
95 (r'(?s)(\s*)(%%python2)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PythonLexer))),
96 (r'(?s)(\s*)(%%python3)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(Python3Lexer))),
96 (r'(?s)(\s*)(%%python3)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(Python3Lexer))),
97 (r'(?s)(\s*)(%%ruby)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(RubyLexer))),
97 (r'(?s)(\s*)(%%ruby)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(RubyLexer))),
98 (r'(?s)(\s*)(%%time)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
98 (r'(?s)(\s*)(%%time)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
99 (r'(?s)(\s*)(%%timeit)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
99 (r'(?s)(\s*)(%%timeit)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
100 (r'(?s)(\s*)(%%writefile)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
100 (r'(?s)(\s*)(%%writefile)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
101 (r'(?s)(\s*)(%%file)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
101 (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
102 (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
102 (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
103 (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
103 (r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),
104 (r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),
104 (r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),
105 (r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),
105 (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
106 (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
106 using(BashLexer), Text)),
107 using(BashLexer), Text)),
107 (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
108 (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
108 (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
109 (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
109 (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
110 (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
110 (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),
111 (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),
111 (r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),
112 (r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),
112 ]
113 ]
113
114
114 tokens = PyLexer.tokens.copy()
115 tokens = PyLexer.tokens.copy()
115 tokens['root'] = ipython_tokens + tokens['root']
116 tokens['root'] = ipython_tokens + tokens['root']
116
117
117 attrs = {'name': name, 'aliases': aliases, 'filenames': [],
118 attrs = {'name': name, 'aliases': aliases, 'filenames': [],
118 '__doc__': doc, 'tokens': tokens}
119 '__doc__': doc, 'tokens': tokens}
119
120
120 return type(name, (PyLexer,), attrs)
121 return type(name, (PyLexer,), attrs)
121
122
122
123
123 IPython3Lexer = build_ipy_lexer(python3=True)
124 IPython3Lexer = build_ipy_lexer(python3=True)
124 IPythonLexer = build_ipy_lexer(python3=False)
125 IPythonLexer = build_ipy_lexer(python3=False)
125
126
126
127
127 class IPythonPartialTracebackLexer(RegexLexer):
128 class IPythonPartialTracebackLexer(RegexLexer):
128 """
129 """
129 Partial lexer for IPython tracebacks.
130 Partial lexer for IPython tracebacks.
130
131
131 Handles all the non-python output. This works for both Python 2.x and 3.x.
132 Handles all the non-python output. This works for both Python 2.x and 3.x.
132
133
133 """
134 """
134 name = 'IPython Partial Traceback'
135 name = 'IPython Partial Traceback'
135
136
136 tokens = {
137 tokens = {
137 'root': [
138 'root': [
138 # Tracebacks for syntax errors have a different style.
139 # Tracebacks for syntax errors have a different style.
139 # For both types of tracebacks, we mark the first line with
140 # For both types of tracebacks, we mark the first line with
140 # Generic.Traceback. For syntax errors, we mark the filename
141 # Generic.Traceback. For syntax errors, we mark the filename
141 # as we mark the filenames for non-syntax tracebacks.
142 # as we mark the filenames for non-syntax tracebacks.
142 #
143 #
143 # These two regexps define how IPythonConsoleLexer finds a
144 # These two regexps define how IPythonConsoleLexer finds a
144 # traceback.
145 # traceback.
145 #
146 #
146 ## Non-syntax traceback
147 ## Non-syntax traceback
147 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
148 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
148 ## Syntax traceback
149 ## Syntax traceback
149 (r'^( File)(.*)(, line )(\d+\n)',
150 (r'^( File)(.*)(, line )(\d+\n)',
150 bygroups(Generic.Traceback, Name.Namespace,
151 bygroups(Generic.Traceback, Name.Namespace,
151 Generic.Traceback, Literal.Number.Integer)),
152 Generic.Traceback, Literal.Number.Integer)),
152
153
153 # (Exception Identifier)(Whitespace)(Traceback Message)
154 # (Exception Identifier)(Whitespace)(Traceback Message)
154 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
155 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
155 bygroups(Name.Exception, Generic.Whitespace, Text)),
156 bygroups(Name.Exception, Generic.Whitespace, Text)),
156 # (Module/Filename)(Text)(Callee)(Function Signature)
157 # (Module/Filename)(Text)(Callee)(Function Signature)
157 # Better options for callee and function signature?
158 # Better options for callee and function signature?
158 (r'(.*)( in )(.*)(\(.*\)\n)',
159 (r'(.*)( in )(.*)(\(.*\)\n)',
159 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
160 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
160 # Regular line: (Whitespace)(Line Number)(Python Code)
161 # Regular line: (Whitespace)(Line Number)(Python Code)
161 (r'(\s*?)(\d+)(.*?\n)',
162 (r'(\s*?)(\d+)(.*?\n)',
162 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
163 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
163 # Emphasized line: (Arrow)(Line Number)(Python Code)
164 # Emphasized line: (Arrow)(Line Number)(Python Code)
164 # Using Exception token so arrow color matches the Exception.
165 # Using Exception token so arrow color matches the Exception.
165 (r'(-*>?\s?)(\d+)(.*?\n)',
166 (r'(-*>?\s?)(\d+)(.*?\n)',
166 bygroups(Name.Exception, Literal.Number.Integer, Other)),
167 bygroups(Name.Exception, Literal.Number.Integer, Other)),
167 # (Exception Identifier)(Message)
168 # (Exception Identifier)(Message)
168 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
169 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
169 bygroups(Name.Exception, Text)),
170 bygroups(Name.Exception, Text)),
170 # Tag everything else as Other, will be handled later.
171 # Tag everything else as Other, will be handled later.
171 (r'.*\n', Other),
172 (r'.*\n', Other),
172 ],
173 ],
173 }
174 }
174
175
175
176
176 class IPythonTracebackLexer(DelegatingLexer):
177 class IPythonTracebackLexer(DelegatingLexer):
177 """
178 """
178 IPython traceback lexer.
179 IPython traceback lexer.
179
180
180 For doctests, the tracebacks can be snipped as much as desired with the
181 For doctests, the tracebacks can be snipped as much as desired with the
181 exception to the lines that designate a traceback. For non-syntax error
182 exception to the lines that designate a traceback. For non-syntax error
182 tracebacks, this is the line of hyphens. For syntax error tracebacks,
183 tracebacks, this is the line of hyphens. For syntax error tracebacks,
183 this is the line which lists the File and line number.
184 this is the line which lists the File and line number.
184
185
185 """
186 """
186 # The lexer inherits from DelegatingLexer. The "root" lexer is an
187 # The lexer inherits from DelegatingLexer. The "root" lexer is an
187 # appropriate IPython lexer, which depends on the value of the boolean
188 # appropriate IPython lexer, which depends on the value of the boolean
188 # `python3`. First, we parse with the partial IPython traceback lexer.
189 # `python3`. First, we parse with the partial IPython traceback lexer.
189 # Then, any code marked with the "Other" token is delegated to the root
190 # Then, any code marked with the "Other" token is delegated to the root
190 # lexer.
191 # lexer.
191 #
192 #
192 name = 'IPython Traceback'
193 name = 'IPython Traceback'
193 aliases = ['ipythontb']
194 aliases = ['ipythontb']
194
195
195 def __init__(self, **options):
196 def __init__(self, **options):
196 self.python3 = get_bool_opt(options, 'python3', False)
197 self.python3 = get_bool_opt(options, 'python3', False)
197 if self.python3:
198 if self.python3:
198 self.aliases = ['ipython3tb']
199 self.aliases = ['ipython3tb']
199 else:
200 else:
200 self.aliases = ['ipython2tb', 'ipythontb']
201 self.aliases = ['ipython2tb', 'ipythontb']
201
202
202 if self.python3:
203 if self.python3:
203 IPyLexer = IPython3Lexer
204 IPyLexer = IPython3Lexer
204 else:
205 else:
205 IPyLexer = IPythonLexer
206 IPyLexer = IPythonLexer
206
207
207 DelegatingLexer.__init__(self, IPyLexer,
208 DelegatingLexer.__init__(self, IPyLexer,
208 IPythonPartialTracebackLexer, **options)
209 IPythonPartialTracebackLexer, **options)
209
210
210 class IPythonConsoleLexer(Lexer):
211 class IPythonConsoleLexer(Lexer):
211 """
212 """
212 An IPython console lexer for IPython code-blocks and doctests, such as:
213 An IPython console lexer for IPython code-blocks and doctests, such as:
213
214
214 .. code-block:: rst
215 .. code-block:: rst
215
216
216 .. code-block:: ipythonconsole
217 .. code-block:: ipythonconsole
217
218
218 In [1]: a = 'foo'
219 In [1]: a = 'foo'
219
220
220 In [2]: a
221 In [2]: a
221 Out[2]: 'foo'
222 Out[2]: 'foo'
222
223
223 In [3]: print a
224 In [3]: print a
224 foo
225 foo
225
226
226 In [4]: 1 / 0
227 In [4]: 1 / 0
227
228
228
229
229 Support is also provided for IPython exceptions:
230 Support is also provided for IPython exceptions:
230
231
231 .. code-block:: rst
232 .. code-block:: rst
232
233
233 .. code-block:: ipythonconsole
234 .. code-block:: ipythonconsole
234
235
235 In [1]: raise Exception
236 In [1]: raise Exception
236
237
237 ---------------------------------------------------------------------------
238 ---------------------------------------------------------------------------
238 Exception Traceback (most recent call last)
239 Exception Traceback (most recent call last)
239 <ipython-input-1-fca2ab0ca76b> in <module>
240 <ipython-input-1-fca2ab0ca76b> in <module>
240 ----> 1 raise Exception
241 ----> 1 raise Exception
241
242
242 Exception:
243 Exception:
243
244
244 """
245 """
245 name = 'IPython console session'
246 name = 'IPython console session'
246 aliases = ['ipythonconsole']
247 aliases = ['ipythonconsole']
247 mimetypes = ['text/x-ipython-console']
248 mimetypes = ['text/x-ipython-console']
248
249
249 # The regexps used to determine what is input and what is output.
250 # The regexps used to determine what is input and what is output.
250 # The default prompts for IPython are:
251 # The default prompts for IPython are:
251 #
252 #
252 # in = 'In [#]: '
253 # in = 'In [#]: '
253 # continuation = ' .D.: '
254 # continuation = ' .D.: '
254 # template = 'Out[#]: '
255 # template = 'Out[#]: '
255 #
256 #
256 # Where '#' is the 'prompt number' or 'execution count' and 'D'
257 # Where '#' is the 'prompt number' or 'execution count' and 'D'
257 # D is a number of dots matching the width of the execution count
258 # D is a number of dots matching the width of the execution count
258 #
259 #
259 in1_regex = r'In \[[0-9]+\]: '
260 in1_regex = r'In \[[0-9]+\]: '
260 in2_regex = r' \.\.+\.: '
261 in2_regex = r' \.\.+\.: '
261 out_regex = r'Out\[[0-9]+\]: '
262 out_regex = r'Out\[[0-9]+\]: '
262
263
263 #: The regex to determine when a traceback starts.
264 #: The regex to determine when a traceback starts.
264 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
265 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
265
266
266 def __init__(self, **options):
267 def __init__(self, **options):
267 """Initialize the IPython console lexer.
268 """Initialize the IPython console lexer.
268
269
269 Parameters
270 Parameters
270 ----------
271 ----------
271 python3 : bool
272 python3 : bool
272 If `True`, then the console inputs are parsed using a Python 3
273 If `True`, then the console inputs are parsed using a Python 3
273 lexer. Otherwise, they are parsed using a Python 2 lexer.
274 lexer. Otherwise, they are parsed using a Python 2 lexer.
274 in1_regex : RegexObject
275 in1_regex : RegexObject
275 The compiled regular expression used to detect the start
276 The compiled regular expression used to detect the start
276 of inputs. Although the IPython configuration setting may have a
277 of inputs. Although the IPython configuration setting may have a
277 trailing whitespace, do not include it in the regex. If `None`,
278 trailing whitespace, do not include it in the regex. If `None`,
278 then the default input prompt is assumed.
279 then the default input prompt is assumed.
279 in2_regex : RegexObject
280 in2_regex : RegexObject
280 The compiled regular expression used to detect the continuation
281 The compiled regular expression used to detect the continuation
281 of inputs. Although the IPython configuration setting may have a
282 of inputs. Although the IPython configuration setting may have a
282 trailing whitespace, do not include it in the regex. If `None`,
283 trailing whitespace, do not include it in the regex. If `None`,
283 then the default input prompt is assumed.
284 then the default input prompt is assumed.
284 out_regex : RegexObject
285 out_regex : RegexObject
285 The compiled regular expression used to detect outputs. If `None`,
286 The compiled regular expression used to detect outputs. If `None`,
286 then the default output prompt is assumed.
287 then the default output prompt is assumed.
287
288
288 """
289 """
289 self.python3 = get_bool_opt(options, 'python3', False)
290 self.python3 = get_bool_opt(options, 'python3', False)
290 if self.python3:
291 if self.python3:
291 self.aliases = ['ipython3console']
292 self.aliases = ['ipython3console']
292 else:
293 else:
293 self.aliases = ['ipython2console', 'ipythonconsole']
294 self.aliases = ['ipython2console', 'ipythonconsole']
294
295
295 in1_regex = options.get('in1_regex', self.in1_regex)
296 in1_regex = options.get('in1_regex', self.in1_regex)
296 in2_regex = options.get('in2_regex', self.in2_regex)
297 in2_regex = options.get('in2_regex', self.in2_regex)
297 out_regex = options.get('out_regex', self.out_regex)
298 out_regex = options.get('out_regex', self.out_regex)
298
299
299 # So that we can work with input and output prompts which have been
300 # So that we can work with input and output prompts which have been
300 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
301 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
301 # we do not do this, then such prompts will be tagged as 'output'.
302 # we do not do this, then such prompts will be tagged as 'output'.
302 # The reason can't just use the rstrip'd variants instead is because
303 # The reason can't just use the rstrip'd variants instead is because
303 # we want any whitespace associated with the prompt to be inserted
304 # we want any whitespace associated with the prompt to be inserted
304 # with the token. This allows formatted code to be modified so as hide
305 # with the token. This allows formatted code to be modified so as hide
305 # the appearance of prompts, with the whitespace included. One example
306 # the appearance of prompts, with the whitespace included. One example
306 # use of this is in copybutton.js from the standard lib Python docs.
307 # use of this is in copybutton.js from the standard lib Python docs.
307 in1_regex_rstrip = in1_regex.rstrip() + '\n'
308 in1_regex_rstrip = in1_regex.rstrip() + '\n'
308 in2_regex_rstrip = in2_regex.rstrip() + '\n'
309 in2_regex_rstrip = in2_regex.rstrip() + '\n'
309 out_regex_rstrip = out_regex.rstrip() + '\n'
310 out_regex_rstrip = out_regex.rstrip() + '\n'
310
311
311 # Compile and save them all.
312 # Compile and save them all.
312 attrs = ['in1_regex', 'in2_regex', 'out_regex',
313 attrs = ['in1_regex', 'in2_regex', 'out_regex',
313 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
314 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
314 for attr in attrs:
315 for attr in attrs:
315 self.__setattr__(attr, re.compile(locals()[attr]))
316 self.__setattr__(attr, re.compile(locals()[attr]))
316
317
317 Lexer.__init__(self, **options)
318 Lexer.__init__(self, **options)
318
319
319 if self.python3:
320 if self.python3:
320 pylexer = IPython3Lexer
321 pylexer = IPython3Lexer
321 tblexer = IPythonTracebackLexer
322 tblexer = IPythonTracebackLexer
322 else:
323 else:
323 pylexer = IPythonLexer
324 pylexer = IPythonLexer
324 tblexer = IPythonTracebackLexer
325 tblexer = IPythonTracebackLexer
325
326
326 self.pylexer = pylexer(**options)
327 self.pylexer = pylexer(**options)
327 self.tblexer = tblexer(**options)
328 self.tblexer = tblexer(**options)
328
329
329 self.reset()
330 self.reset()
330
331
331 def reset(self):
332 def reset(self):
332 self.mode = 'output'
333 self.mode = 'output'
333 self.index = 0
334 self.index = 0
334 self.buffer = u''
335 self.buffer = u''
335 self.insertions = []
336 self.insertions = []
336
337
337 def buffered_tokens(self):
338 def buffered_tokens(self):
338 """
339 """
339 Generator of unprocessed tokens after doing insertions and before
340 Generator of unprocessed tokens after doing insertions and before
340 changing to a new state.
341 changing to a new state.
341
342
342 """
343 """
343 if self.mode == 'output':
344 if self.mode == 'output':
344 tokens = [(0, Generic.Output, self.buffer)]
345 tokens = [(0, Generic.Output, self.buffer)]
345 elif self.mode == 'input':
346 elif self.mode == 'input':
346 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
347 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
347 else: # traceback
348 else: # traceback
348 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
349 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
349
350
350 for i, t, v in do_insertions(self.insertions, tokens):
351 for i, t, v in do_insertions(self.insertions, tokens):
351 # All token indexes are relative to the buffer.
352 # All token indexes are relative to the buffer.
352 yield self.index + i, t, v
353 yield self.index + i, t, v
353
354
354 # Clear it all
355 # Clear it all
355 self.index += len(self.buffer)
356 self.index += len(self.buffer)
356 self.buffer = u''
357 self.buffer = u''
357 self.insertions = []
358 self.insertions = []
358
359
359 def get_mci(self, line):
360 def get_mci(self, line):
360 """
361 """
361 Parses the line and returns a 3-tuple: (mode, code, insertion).
362 Parses the line and returns a 3-tuple: (mode, code, insertion).
362
363
363 `mode` is the next mode (or state) of the lexer, and is always equal
364 `mode` is the next mode (or state) of the lexer, and is always equal
364 to 'input', 'output', or 'tb'.
365 to 'input', 'output', or 'tb'.
365
366
366 `code` is a portion of the line that should be added to the buffer
367 `code` is a portion of the line that should be added to the buffer
367 corresponding to the next mode and eventually lexed by another lexer.
368 corresponding to the next mode and eventually lexed by another lexer.
368 For example, `code` could be Python code if `mode` were 'input'.
369 For example, `code` could be Python code if `mode` were 'input'.
369
370
370 `insertion` is a 3-tuple (index, token, text) representing an
371 `insertion` is a 3-tuple (index, token, text) representing an
371 unprocessed "token" that will be inserted into the stream of tokens
372 unprocessed "token" that will be inserted into the stream of tokens
372 that are created from the buffer once we change modes. This is usually
373 that are created from the buffer once we change modes. This is usually
373 the input or output prompt.
374 the input or output prompt.
374
375
375 In general, the next mode depends on current mode and on the contents
376 In general, the next mode depends on current mode and on the contents
376 of `line`.
377 of `line`.
377
378
378 """
379 """
379 # To reduce the number of regex match checks, we have multiple
380 # To reduce the number of regex match checks, we have multiple
380 # 'if' blocks instead of 'if-elif' blocks.
381 # 'if' blocks instead of 'if-elif' blocks.
381
382
382 # Check for possible end of input
383 # Check for possible end of input
383 in2_match = self.in2_regex.match(line)
384 in2_match = self.in2_regex.match(line)
384 in2_match_rstrip = self.in2_regex_rstrip.match(line)
385 in2_match_rstrip = self.in2_regex_rstrip.match(line)
385 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
386 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
386 in2_match_rstrip:
387 in2_match_rstrip:
387 end_input = True
388 end_input = True
388 else:
389 else:
389 end_input = False
390 end_input = False
390 if end_input and self.mode != 'tb':
391 if end_input and self.mode != 'tb':
391 # Only look for an end of input when not in tb mode.
392 # Only look for an end of input when not in tb mode.
392 # An ellipsis could appear within the traceback.
393 # An ellipsis could appear within the traceback.
393 mode = 'output'
394 mode = 'output'
394 code = u''
395 code = u''
395 insertion = (0, Generic.Prompt, line)
396 insertion = (0, Generic.Prompt, line)
396 return mode, code, insertion
397 return mode, code, insertion
397
398
398 # Check for output prompt
399 # Check for output prompt
399 out_match = self.out_regex.match(line)
400 out_match = self.out_regex.match(line)
400 out_match_rstrip = self.out_regex_rstrip.match(line)
401 out_match_rstrip = self.out_regex_rstrip.match(line)
401 if out_match or out_match_rstrip:
402 if out_match or out_match_rstrip:
402 mode = 'output'
403 mode = 'output'
403 if out_match:
404 if out_match:
404 idx = out_match.end()
405 idx = out_match.end()
405 else:
406 else:
406 idx = out_match_rstrip.end()
407 idx = out_match_rstrip.end()
407 code = line[idx:]
408 code = line[idx:]
408 # Use the 'heading' token for output. We cannot use Generic.Error
409 # Use the 'heading' token for output. We cannot use Generic.Error
409 # since it would conflict with exceptions.
410 # since it would conflict with exceptions.
410 insertion = (0, Generic.Heading, line[:idx])
411 insertion = (0, Generic.Heading, line[:idx])
411 return mode, code, insertion
412 return mode, code, insertion
412
413
413
414
414 # Check for input or continuation prompt (non stripped version)
415 # Check for input or continuation prompt (non stripped version)
415 in1_match = self.in1_regex.match(line)
416 in1_match = self.in1_regex.match(line)
416 if in1_match or (in2_match and self.mode != 'tb'):
417 if in1_match or (in2_match and self.mode != 'tb'):
417 # New input or when not in tb, continued input.
418 # New input or when not in tb, continued input.
418 # We do not check for continued input when in tb since it is
419 # We do not check for continued input when in tb since it is
419 # allowable to replace a long stack with an ellipsis.
420 # allowable to replace a long stack with an ellipsis.
420 mode = 'input'
421 mode = 'input'
421 if in1_match:
422 if in1_match:
422 idx = in1_match.end()
423 idx = in1_match.end()
423 else: # in2_match
424 else: # in2_match
424 idx = in2_match.end()
425 idx = in2_match.end()
425 code = line[idx:]
426 code = line[idx:]
426 insertion = (0, Generic.Prompt, line[:idx])
427 insertion = (0, Generic.Prompt, line[:idx])
427 return mode, code, insertion
428 return mode, code, insertion
428
429
429 # Check for input or continuation prompt (stripped version)
430 # Check for input or continuation prompt (stripped version)
430 in1_match_rstrip = self.in1_regex_rstrip.match(line)
431 in1_match_rstrip = self.in1_regex_rstrip.match(line)
431 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
432 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
432 # New input or when not in tb, continued input.
433 # New input or when not in tb, continued input.
433 # We do not check for continued input when in tb since it is
434 # We do not check for continued input when in tb since it is
434 # allowable to replace a long stack with an ellipsis.
435 # allowable to replace a long stack with an ellipsis.
435 mode = 'input'
436 mode = 'input'
436 if in1_match_rstrip:
437 if in1_match_rstrip:
437 idx = in1_match_rstrip.end()
438 idx = in1_match_rstrip.end()
438 else: # in2_match
439 else: # in2_match
439 idx = in2_match_rstrip.end()
440 idx = in2_match_rstrip.end()
440 code = line[idx:]
441 code = line[idx:]
441 insertion = (0, Generic.Prompt, line[:idx])
442 insertion = (0, Generic.Prompt, line[:idx])
442 return mode, code, insertion
443 return mode, code, insertion
443
444
444 # Check for traceback
445 # Check for traceback
445 if self.ipytb_start.match(line):
446 if self.ipytb_start.match(line):
446 mode = 'tb'
447 mode = 'tb'
447 code = line
448 code = line
448 insertion = None
449 insertion = None
449 return mode, code, insertion
450 return mode, code, insertion
450
451
451 # All other stuff...
452 # All other stuff...
452 if self.mode in ('input', 'output'):
453 if self.mode in ('input', 'output'):
453 # We assume all other text is output. Multiline input that
454 # We assume all other text is output. Multiline input that
454 # does not use the continuation marker cannot be detected.
455 # does not use the continuation marker cannot be detected.
455 # For example, the 3 in the following is clearly output:
456 # For example, the 3 in the following is clearly output:
456 #
457 #
457 # In [1]: print 3
458 # In [1]: print 3
458 # 3
459 # 3
459 #
460 #
460 # But the following second line is part of the input:
461 # But the following second line is part of the input:
461 #
462 #
462 # In [2]: while True:
463 # In [2]: while True:
463 # print True
464 # print True
464 #
465 #
465 # In both cases, the 2nd line will be 'output'.
466 # In both cases, the 2nd line will be 'output'.
466 #
467 #
467 mode = 'output'
468 mode = 'output'
468 else:
469 else:
469 mode = 'tb'
470 mode = 'tb'
470
471
471 code = line
472 code = line
472 insertion = None
473 insertion = None
473
474
474 return mode, code, insertion
475 return mode, code, insertion
475
476
476 def get_tokens_unprocessed(self, text):
477 def get_tokens_unprocessed(self, text):
477 self.reset()
478 self.reset()
478 for match in line_re.finditer(text):
479 for match in line_re.finditer(text):
479 line = match.group()
480 line = match.group()
480 mode, code, insertion = self.get_mci(line)
481 mode, code, insertion = self.get_mci(line)
481
482
482 if mode != self.mode:
483 if mode != self.mode:
483 # Yield buffered tokens before transitioning to new mode.
484 # Yield buffered tokens before transitioning to new mode.
484 for token in self.buffered_tokens():
485 for token in self.buffered_tokens():
485 yield token
486 yield token
486 self.mode = mode
487 self.mode = mode
487
488
488 if insertion:
489 if insertion:
489 self.insertions.append((len(self.buffer), [insertion]))
490 self.insertions.append((len(self.buffer), [insertion]))
490 self.buffer += code
491 self.buffer += code
491
492
492 for token in self.buffered_tokens():
493 for token in self.buffered_tokens():
493 yield token
494 yield token
494
495
495 class IPyLexer(Lexer):
496 class IPyLexer(Lexer):
496 """
497 """
497 Primary lexer for all IPython-like code.
498 Primary lexer for all IPython-like code.
498
499
499 This is a simple helper lexer. If the first line of the text begins with
500 This is a simple helper lexer. If the first line of the text begins with
500 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
501 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
501 lexer. If not, then the entire text is parsed with an IPython lexer.
502 lexer. If not, then the entire text is parsed with an IPython lexer.
502
503
503 The goal is to reduce the number of lexers that are registered
504 The goal is to reduce the number of lexers that are registered
504 with Pygments.
505 with Pygments.
505
506
506 """
507 """
507 name = 'IPy session'
508 name = 'IPy session'
508 aliases = ['ipy']
509 aliases = ['ipy']
509
510
510 def __init__(self, **options):
511 def __init__(self, **options):
511 self.python3 = get_bool_opt(options, 'python3', False)
512 self.python3 = get_bool_opt(options, 'python3', False)
512 if self.python3:
513 if self.python3:
513 self.aliases = ['ipy3']
514 self.aliases = ['ipy3']
514 else:
515 else:
515 self.aliases = ['ipy2', 'ipy']
516 self.aliases = ['ipy2', 'ipy']
516
517
517 Lexer.__init__(self, **options)
518 Lexer.__init__(self, **options)
518
519
519 self.IPythonLexer = IPythonLexer(**options)
520 self.IPythonLexer = IPythonLexer(**options)
520 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
521 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
521
522
522 def get_tokens_unprocessed(self, text):
523 def get_tokens_unprocessed(self, text):
523 # Search for the input prompt anywhere...this allows code blocks to
524 # Search for the input prompt anywhere...this allows code blocks to
524 # begin with comments as well.
525 # begin with comments as well.
525 if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
526 if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
526 lex = self.IPythonConsoleLexer
527 lex = self.IPythonConsoleLexer
527 else:
528 else:
528 lex = self.IPythonLexer
529 lex = self.IPythonLexer
529 for token in lex.get_tokens_unprocessed(text):
530 for token in lex.get_tokens_unprocessed(text):
530 yield token
531 yield token
531
532
General Comments 0
You need to be logged in to leave comments. Login now