##// END OF EJS Templates
Fix %%perl highlighting
Matthias Geier -
Show More
@@ -1,532 +1,532 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 """
2 """
3 Defines a variety of Pygments lexers for highlighting IPython code.
3 Defines a variety of Pygments lexers for highlighting IPython code.
4
4
5 This includes:
5 This includes:
6
6
7 IPythonLexer, IPython3Lexer
7 IPythonLexer, IPython3Lexer
8 Lexers for pure IPython (python + magic/shell commands)
8 Lexers for pure IPython (python + magic/shell commands)
9
9
10 IPythonPartialTracebackLexer, IPythonTracebackLexer
10 IPythonPartialTracebackLexer, IPythonTracebackLexer
11 Supports 2.x and 3.x via keyword `python3`. The partial traceback
11 Supports 2.x and 3.x via keyword `python3`. The partial traceback
12 lexer reads everything but the Python code appearing in a traceback.
12 lexer reads everything but the Python code appearing in a traceback.
13 The full lexer combines the partial lexer with an IPython lexer.
13 The full lexer combines the partial lexer with an IPython lexer.
14
14
15 IPythonConsoleLexer
15 IPythonConsoleLexer
16 A lexer for IPython console sessions, with support for tracebacks.
16 A lexer for IPython console sessions, with support for tracebacks.
17
17
18 IPyLexer
18 IPyLexer
19 A friendly lexer which examines the first line of text and from it,
19 A friendly lexer which examines the first line of text and from it,
20 decides whether to use an IPython lexer or an IPython console lexer.
20 decides whether to use an IPython lexer or an IPython console lexer.
21 This is probably the only lexer that needs to be explicitly added
21 This is probably the only lexer that needs to be explicitly added
22 to Pygments.
22 to Pygments.
23
23
24 """
24 """
25 #-----------------------------------------------------------------------------
25 #-----------------------------------------------------------------------------
26 # Copyright (c) 2013, the IPython Development Team.
26 # Copyright (c) 2013, the IPython Development Team.
27 #
27 #
28 # Distributed under the terms of the Modified BSD License.
28 # Distributed under the terms of the Modified BSD License.
29 #
29 #
30 # The full license is in the file COPYING.txt, distributed with this software.
30 # The full license is in the file COPYING.txt, distributed with this software.
31 #-----------------------------------------------------------------------------
31 #-----------------------------------------------------------------------------
32
32
33 # Standard library
33 # Standard library
34 import re
34 import re
35
35
36 # Third party
36 # Third party
37 from pygments.lexers import (
37 from pygments.lexers import (
38 BashLexer, HtmlLexer, JavascriptLexer, RubyLexer, PerlLexer, PythonLexer,
38 BashLexer, HtmlLexer, JavascriptLexer, RubyLexer, PerlLexer, PythonLexer,
39 Python3Lexer, TexLexer)
39 Python3Lexer, TexLexer)
40 from pygments.lexer import (
40 from pygments.lexer import (
41 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
41 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
42 )
42 )
43 from pygments.token import (
43 from pygments.token import (
44 Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
44 Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
45 )
45 )
46 from pygments.util import get_bool_opt
46 from pygments.util import get_bool_opt
47
47
48 # Local
48 # Local
49
49
50 line_re = re.compile('.*?\n')
50 line_re = re.compile('.*?\n')
51
51
52 __all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',
52 __all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',
53 'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',
53 'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',
54 'IPythonConsoleLexer', 'IPyLexer']
54 'IPythonConsoleLexer', 'IPyLexer']
55
55
56
56
57 def build_ipy_lexer(python3):
57 def build_ipy_lexer(python3):
58 """Builds IPython lexers depending on the value of `python3`.
58 """Builds IPython lexers depending on the value of `python3`.
59
59
60 The lexer inherits from an appropriate Python lexer and then adds
60 The lexer inherits from an appropriate Python lexer and then adds
61 information about IPython specific keywords (i.e. magic commands,
61 information about IPython specific keywords (i.e. magic commands,
62 shell commands, etc.)
62 shell commands, etc.)
63
63
64 Parameters
64 Parameters
65 ----------
65 ----------
66 python3 : bool
66 python3 : bool
67 If `True`, then build an IPython lexer from a Python 3 lexer.
67 If `True`, then build an IPython lexer from a Python 3 lexer.
68
68
69 """
69 """
70 # It would be nice to have a single IPython lexer class which takes
70 # It would be nice to have a single IPython lexer class which takes
71 # a boolean `python3`. But since there are two Python lexer classes,
71 # a boolean `python3`. But since there are two Python lexer classes,
72 # we will also have two IPython lexer classes.
72 # we will also have two IPython lexer classes.
73 if python3:
73 if python3:
74 PyLexer = Python3Lexer
74 PyLexer = Python3Lexer
75 name = 'IPython3'
75 name = 'IPython3'
76 aliases = ['ipython3']
76 aliases = ['ipython3']
77 doc = """IPython3 Lexer"""
77 doc = """IPython3 Lexer"""
78 else:
78 else:
79 PyLexer = PythonLexer
79 PyLexer = PythonLexer
80 name = 'IPython'
80 name = 'IPython'
81 aliases = ['ipython2', 'ipython']
81 aliases = ['ipython2', 'ipython']
82 doc = """IPython Lexer"""
82 doc = """IPython Lexer"""
83
83
84 ipython_tokens = [
84 ipython_tokens = [
85 (r'(?s)(\s*)(%%capture)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
85 (r'(?s)(\s*)(%%capture)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
86 (r'(?s)(\s*)(%%debug)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
86 (r'(?s)(\s*)(%%debug)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
87 (r'(?is)(\s*)(%%html)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(HtmlLexer))),
87 (r'(?is)(\s*)(%%html)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(HtmlLexer))),
88 (r'(?s)(\s*)(%%javascript)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),
88 (r'(?s)(\s*)(%%javascript)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),
89 (r'(?s)(\s*)(%%js)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),
89 (r'(?s)(\s*)(%%js)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),
90 (r'(?s)(\s*)(%%latex)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(TexLexer))),
90 (r'(?s)(\s*)(%%latex)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(TexLexer))),
91 (r'(?s)(\s*)(%%pypy)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PerlLexer))),
91 (r'(?s)(\s*)(%%perl)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PerlLexer))),
92 (r'(?s)(\s*)(%%prun)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
92 (r'(?s)(\s*)(%%prun)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
93 (r'(?s)(\s*)(%%pypy)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
93 (r'(?s)(\s*)(%%pypy)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
94 (r'(?s)(\s*)(%%python)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
94 (r'(?s)(\s*)(%%python)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
95 (r'(?s)(\s*)(%%python2)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PythonLexer))),
95 (r'(?s)(\s*)(%%python2)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PythonLexer))),
96 (r'(?s)(\s*)(%%python3)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(Python3Lexer))),
96 (r'(?s)(\s*)(%%python3)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(Python3Lexer))),
97 (r'(?s)(\s*)(%%ruby)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(RubyLexer))),
97 (r'(?s)(\s*)(%%ruby)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(RubyLexer))),
98 (r'(?s)(\s*)(%%time)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
98 (r'(?s)(\s*)(%%time)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
99 (r'(?s)(\s*)(%%timeit)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
99 (r'(?s)(\s*)(%%timeit)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
100 (r'(?s)(\s*)(%%writefile)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
100 (r'(?s)(\s*)(%%writefile)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
101 (r'(?s)(\s*)(%%file)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
101 (r'(?s)(\s*)(%%file)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
102 (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
102 (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
103 (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
103 (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
104 (r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),
104 (r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),
105 (r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),
105 (r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),
106 (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
106 (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
107 using(BashLexer), Text)),
107 using(BashLexer), Text)),
108 (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
108 (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
109 (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
109 (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
110 (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
110 (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
111 (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),
111 (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),
112 (r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),
112 (r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),
113 ]
113 ]
114
114
115 tokens = PyLexer.tokens.copy()
115 tokens = PyLexer.tokens.copy()
116 tokens['root'] = ipython_tokens + tokens['root']
116 tokens['root'] = ipython_tokens + tokens['root']
117
117
118 attrs = {'name': name, 'aliases': aliases, 'filenames': [],
118 attrs = {'name': name, 'aliases': aliases, 'filenames': [],
119 '__doc__': doc, 'tokens': tokens}
119 '__doc__': doc, 'tokens': tokens}
120
120
121 return type(name, (PyLexer,), attrs)
121 return type(name, (PyLexer,), attrs)
122
122
123
123
124 IPython3Lexer = build_ipy_lexer(python3=True)
124 IPython3Lexer = build_ipy_lexer(python3=True)
125 IPythonLexer = build_ipy_lexer(python3=False)
125 IPythonLexer = build_ipy_lexer(python3=False)
126
126
127
127
128 class IPythonPartialTracebackLexer(RegexLexer):
128 class IPythonPartialTracebackLexer(RegexLexer):
129 """
129 """
130 Partial lexer for IPython tracebacks.
130 Partial lexer for IPython tracebacks.
131
131
132 Handles all the non-python output. This works for both Python 2.x and 3.x.
132 Handles all the non-python output. This works for both Python 2.x and 3.x.
133
133
134 """
134 """
135 name = 'IPython Partial Traceback'
135 name = 'IPython Partial Traceback'
136
136
137 tokens = {
137 tokens = {
138 'root': [
138 'root': [
139 # Tracebacks for syntax errors have a different style.
139 # Tracebacks for syntax errors have a different style.
140 # For both types of tracebacks, we mark the first line with
140 # For both types of tracebacks, we mark the first line with
141 # Generic.Traceback. For syntax errors, we mark the filename
141 # Generic.Traceback. For syntax errors, we mark the filename
142 # as we mark the filenames for non-syntax tracebacks.
142 # as we mark the filenames for non-syntax tracebacks.
143 #
143 #
144 # These two regexps define how IPythonConsoleLexer finds a
144 # These two regexps define how IPythonConsoleLexer finds a
145 # traceback.
145 # traceback.
146 #
146 #
147 ## Non-syntax traceback
147 ## Non-syntax traceback
148 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
148 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
149 ## Syntax traceback
149 ## Syntax traceback
150 (r'^( File)(.*)(, line )(\d+\n)',
150 (r'^( File)(.*)(, line )(\d+\n)',
151 bygroups(Generic.Traceback, Name.Namespace,
151 bygroups(Generic.Traceback, Name.Namespace,
152 Generic.Traceback, Literal.Number.Integer)),
152 Generic.Traceback, Literal.Number.Integer)),
153
153
154 # (Exception Identifier)(Whitespace)(Traceback Message)
154 # (Exception Identifier)(Whitespace)(Traceback Message)
155 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
155 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
156 bygroups(Name.Exception, Generic.Whitespace, Text)),
156 bygroups(Name.Exception, Generic.Whitespace, Text)),
157 # (Module/Filename)(Text)(Callee)(Function Signature)
157 # (Module/Filename)(Text)(Callee)(Function Signature)
158 # Better options for callee and function signature?
158 # Better options for callee and function signature?
159 (r'(.*)( in )(.*)(\(.*\)\n)',
159 (r'(.*)( in )(.*)(\(.*\)\n)',
160 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
160 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
161 # Regular line: (Whitespace)(Line Number)(Python Code)
161 # Regular line: (Whitespace)(Line Number)(Python Code)
162 (r'(\s*?)(\d+)(.*?\n)',
162 (r'(\s*?)(\d+)(.*?\n)',
163 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
163 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
164 # Emphasized line: (Arrow)(Line Number)(Python Code)
164 # Emphasized line: (Arrow)(Line Number)(Python Code)
165 # Using Exception token so arrow color matches the Exception.
165 # Using Exception token so arrow color matches the Exception.
166 (r'(-*>?\s?)(\d+)(.*?\n)',
166 (r'(-*>?\s?)(\d+)(.*?\n)',
167 bygroups(Name.Exception, Literal.Number.Integer, Other)),
167 bygroups(Name.Exception, Literal.Number.Integer, Other)),
168 # (Exception Identifier)(Message)
168 # (Exception Identifier)(Message)
169 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
169 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
170 bygroups(Name.Exception, Text)),
170 bygroups(Name.Exception, Text)),
171 # Tag everything else as Other, will be handled later.
171 # Tag everything else as Other, will be handled later.
172 (r'.*\n', Other),
172 (r'.*\n', Other),
173 ],
173 ],
174 }
174 }
175
175
176
176
177 class IPythonTracebackLexer(DelegatingLexer):
177 class IPythonTracebackLexer(DelegatingLexer):
178 """
178 """
179 IPython traceback lexer.
179 IPython traceback lexer.
180
180
181 For doctests, the tracebacks can be snipped as much as desired with the
181 For doctests, the tracebacks can be snipped as much as desired with the
182 exception to the lines that designate a traceback. For non-syntax error
182 exception to the lines that designate a traceback. For non-syntax error
183 tracebacks, this is the line of hyphens. For syntax error tracebacks,
183 tracebacks, this is the line of hyphens. For syntax error tracebacks,
184 this is the line which lists the File and line number.
184 this is the line which lists the File and line number.
185
185
186 """
186 """
187 # The lexer inherits from DelegatingLexer. The "root" lexer is an
187 # The lexer inherits from DelegatingLexer. The "root" lexer is an
188 # appropriate IPython lexer, which depends on the value of the boolean
188 # appropriate IPython lexer, which depends on the value of the boolean
189 # `python3`. First, we parse with the partial IPython traceback lexer.
189 # `python3`. First, we parse with the partial IPython traceback lexer.
190 # Then, any code marked with the "Other" token is delegated to the root
190 # Then, any code marked with the "Other" token is delegated to the root
191 # lexer.
191 # lexer.
192 #
192 #
193 name = 'IPython Traceback'
193 name = 'IPython Traceback'
194 aliases = ['ipythontb']
194 aliases = ['ipythontb']
195
195
196 def __init__(self, **options):
196 def __init__(self, **options):
197 self.python3 = get_bool_opt(options, 'python3', False)
197 self.python3 = get_bool_opt(options, 'python3', False)
198 if self.python3:
198 if self.python3:
199 self.aliases = ['ipython3tb']
199 self.aliases = ['ipython3tb']
200 else:
200 else:
201 self.aliases = ['ipython2tb', 'ipythontb']
201 self.aliases = ['ipython2tb', 'ipythontb']
202
202
203 if self.python3:
203 if self.python3:
204 IPyLexer = IPython3Lexer
204 IPyLexer = IPython3Lexer
205 else:
205 else:
206 IPyLexer = IPythonLexer
206 IPyLexer = IPythonLexer
207
207
208 DelegatingLexer.__init__(self, IPyLexer,
208 DelegatingLexer.__init__(self, IPyLexer,
209 IPythonPartialTracebackLexer, **options)
209 IPythonPartialTracebackLexer, **options)
210
210
211 class IPythonConsoleLexer(Lexer):
211 class IPythonConsoleLexer(Lexer):
212 """
212 """
213 An IPython console lexer for IPython code-blocks and doctests, such as:
213 An IPython console lexer for IPython code-blocks and doctests, such as:
214
214
215 .. code-block:: rst
215 .. code-block:: rst
216
216
217 .. code-block:: ipythonconsole
217 .. code-block:: ipythonconsole
218
218
219 In [1]: a = 'foo'
219 In [1]: a = 'foo'
220
220
221 In [2]: a
221 In [2]: a
222 Out[2]: 'foo'
222 Out[2]: 'foo'
223
223
224 In [3]: print a
224 In [3]: print a
225 foo
225 foo
226
226
227 In [4]: 1 / 0
227 In [4]: 1 / 0
228
228
229
229
230 Support is also provided for IPython exceptions:
230 Support is also provided for IPython exceptions:
231
231
232 .. code-block:: rst
232 .. code-block:: rst
233
233
234 .. code-block:: ipythonconsole
234 .. code-block:: ipythonconsole
235
235
236 In [1]: raise Exception
236 In [1]: raise Exception
237
237
238 ---------------------------------------------------------------------------
238 ---------------------------------------------------------------------------
239 Exception Traceback (most recent call last)
239 Exception Traceback (most recent call last)
240 <ipython-input-1-fca2ab0ca76b> in <module>
240 <ipython-input-1-fca2ab0ca76b> in <module>
241 ----> 1 raise Exception
241 ----> 1 raise Exception
242
242
243 Exception:
243 Exception:
244
244
245 """
245 """
246 name = 'IPython console session'
246 name = 'IPython console session'
247 aliases = ['ipythonconsole']
247 aliases = ['ipythonconsole']
248 mimetypes = ['text/x-ipython-console']
248 mimetypes = ['text/x-ipython-console']
249
249
250 # The regexps used to determine what is input and what is output.
250 # The regexps used to determine what is input and what is output.
251 # The default prompts for IPython are:
251 # The default prompts for IPython are:
252 #
252 #
253 # in = 'In [#]: '
253 # in = 'In [#]: '
254 # continuation = ' .D.: '
254 # continuation = ' .D.: '
255 # template = 'Out[#]: '
255 # template = 'Out[#]: '
256 #
256 #
257 # Where '#' is the 'prompt number' or 'execution count' and 'D'
257 # Where '#' is the 'prompt number' or 'execution count' and 'D'
258 # D is a number of dots matching the width of the execution count
258 # D is a number of dots matching the width of the execution count
259 #
259 #
260 in1_regex = r'In \[[0-9]+\]: '
260 in1_regex = r'In \[[0-9]+\]: '
261 in2_regex = r' \.\.+\.: '
261 in2_regex = r' \.\.+\.: '
262 out_regex = r'Out\[[0-9]+\]: '
262 out_regex = r'Out\[[0-9]+\]: '
263
263
264 #: The regex to determine when a traceback starts.
264 #: The regex to determine when a traceback starts.
265 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
265 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
266
266
267 def __init__(self, **options):
267 def __init__(self, **options):
268 """Initialize the IPython console lexer.
268 """Initialize the IPython console lexer.
269
269
270 Parameters
270 Parameters
271 ----------
271 ----------
272 python3 : bool
272 python3 : bool
273 If `True`, then the console inputs are parsed using a Python 3
273 If `True`, then the console inputs are parsed using a Python 3
274 lexer. Otherwise, they are parsed using a Python 2 lexer.
274 lexer. Otherwise, they are parsed using a Python 2 lexer.
275 in1_regex : RegexObject
275 in1_regex : RegexObject
276 The compiled regular expression used to detect the start
276 The compiled regular expression used to detect the start
277 of inputs. Although the IPython configuration setting may have a
277 of inputs. Although the IPython configuration setting may have a
278 trailing whitespace, do not include it in the regex. If `None`,
278 trailing whitespace, do not include it in the regex. If `None`,
279 then the default input prompt is assumed.
279 then the default input prompt is assumed.
280 in2_regex : RegexObject
280 in2_regex : RegexObject
281 The compiled regular expression used to detect the continuation
281 The compiled regular expression used to detect the continuation
282 of inputs. Although the IPython configuration setting may have a
282 of inputs. Although the IPython configuration setting may have a
283 trailing whitespace, do not include it in the regex. If `None`,
283 trailing whitespace, do not include it in the regex. If `None`,
284 then the default input prompt is assumed.
284 then the default input prompt is assumed.
285 out_regex : RegexObject
285 out_regex : RegexObject
286 The compiled regular expression used to detect outputs. If `None`,
286 The compiled regular expression used to detect outputs. If `None`,
287 then the default output prompt is assumed.
287 then the default output prompt is assumed.
288
288
289 """
289 """
290 self.python3 = get_bool_opt(options, 'python3', False)
290 self.python3 = get_bool_opt(options, 'python3', False)
291 if self.python3:
291 if self.python3:
292 self.aliases = ['ipython3console']
292 self.aliases = ['ipython3console']
293 else:
293 else:
294 self.aliases = ['ipython2console', 'ipythonconsole']
294 self.aliases = ['ipython2console', 'ipythonconsole']
295
295
296 in1_regex = options.get('in1_regex', self.in1_regex)
296 in1_regex = options.get('in1_regex', self.in1_regex)
297 in2_regex = options.get('in2_regex', self.in2_regex)
297 in2_regex = options.get('in2_regex', self.in2_regex)
298 out_regex = options.get('out_regex', self.out_regex)
298 out_regex = options.get('out_regex', self.out_regex)
299
299
300 # So that we can work with input and output prompts which have been
300 # So that we can work with input and output prompts which have been
301 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
301 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
302 # we do not do this, then such prompts will be tagged as 'output'.
302 # we do not do this, then such prompts will be tagged as 'output'.
303 # The reason can't just use the rstrip'd variants instead is because
303 # The reason can't just use the rstrip'd variants instead is because
304 # we want any whitespace associated with the prompt to be inserted
304 # we want any whitespace associated with the prompt to be inserted
305 # with the token. This allows formatted code to be modified so as hide
305 # with the token. This allows formatted code to be modified so as hide
306 # the appearance of prompts, with the whitespace included. One example
306 # the appearance of prompts, with the whitespace included. One example
307 # use of this is in copybutton.js from the standard lib Python docs.
307 # use of this is in copybutton.js from the standard lib Python docs.
308 in1_regex_rstrip = in1_regex.rstrip() + '\n'
308 in1_regex_rstrip = in1_regex.rstrip() + '\n'
309 in2_regex_rstrip = in2_regex.rstrip() + '\n'
309 in2_regex_rstrip = in2_regex.rstrip() + '\n'
310 out_regex_rstrip = out_regex.rstrip() + '\n'
310 out_regex_rstrip = out_regex.rstrip() + '\n'
311
311
312 # Compile and save them all.
312 # Compile and save them all.
313 attrs = ['in1_regex', 'in2_regex', 'out_regex',
313 attrs = ['in1_regex', 'in2_regex', 'out_regex',
314 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
314 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
315 for attr in attrs:
315 for attr in attrs:
316 self.__setattr__(attr, re.compile(locals()[attr]))
316 self.__setattr__(attr, re.compile(locals()[attr]))
317
317
318 Lexer.__init__(self, **options)
318 Lexer.__init__(self, **options)
319
319
320 if self.python3:
320 if self.python3:
321 pylexer = IPython3Lexer
321 pylexer = IPython3Lexer
322 tblexer = IPythonTracebackLexer
322 tblexer = IPythonTracebackLexer
323 else:
323 else:
324 pylexer = IPythonLexer
324 pylexer = IPythonLexer
325 tblexer = IPythonTracebackLexer
325 tblexer = IPythonTracebackLexer
326
326
327 self.pylexer = pylexer(**options)
327 self.pylexer = pylexer(**options)
328 self.tblexer = tblexer(**options)
328 self.tblexer = tblexer(**options)
329
329
330 self.reset()
330 self.reset()
331
331
332 def reset(self):
332 def reset(self):
333 self.mode = 'output'
333 self.mode = 'output'
334 self.index = 0
334 self.index = 0
335 self.buffer = u''
335 self.buffer = u''
336 self.insertions = []
336 self.insertions = []
337
337
338 def buffered_tokens(self):
338 def buffered_tokens(self):
339 """
339 """
340 Generator of unprocessed tokens after doing insertions and before
340 Generator of unprocessed tokens after doing insertions and before
341 changing to a new state.
341 changing to a new state.
342
342
343 """
343 """
344 if self.mode == 'output':
344 if self.mode == 'output':
345 tokens = [(0, Generic.Output, self.buffer)]
345 tokens = [(0, Generic.Output, self.buffer)]
346 elif self.mode == 'input':
346 elif self.mode == 'input':
347 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
347 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
348 else: # traceback
348 else: # traceback
349 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
349 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
350
350
351 for i, t, v in do_insertions(self.insertions, tokens):
351 for i, t, v in do_insertions(self.insertions, tokens):
352 # All token indexes are relative to the buffer.
352 # All token indexes are relative to the buffer.
353 yield self.index + i, t, v
353 yield self.index + i, t, v
354
354
355 # Clear it all
355 # Clear it all
356 self.index += len(self.buffer)
356 self.index += len(self.buffer)
357 self.buffer = u''
357 self.buffer = u''
358 self.insertions = []
358 self.insertions = []
359
359
360 def get_mci(self, line):
360 def get_mci(self, line):
361 """
361 """
362 Parses the line and returns a 3-tuple: (mode, code, insertion).
362 Parses the line and returns a 3-tuple: (mode, code, insertion).
363
363
364 `mode` is the next mode (or state) of the lexer, and is always equal
364 `mode` is the next mode (or state) of the lexer, and is always equal
365 to 'input', 'output', or 'tb'.
365 to 'input', 'output', or 'tb'.
366
366
367 `code` is a portion of the line that should be added to the buffer
367 `code` is a portion of the line that should be added to the buffer
368 corresponding to the next mode and eventually lexed by another lexer.
368 corresponding to the next mode and eventually lexed by another lexer.
369 For example, `code` could be Python code if `mode` were 'input'.
369 For example, `code` could be Python code if `mode` were 'input'.
370
370
371 `insertion` is a 3-tuple (index, token, text) representing an
371 `insertion` is a 3-tuple (index, token, text) representing an
372 unprocessed "token" that will be inserted into the stream of tokens
372 unprocessed "token" that will be inserted into the stream of tokens
373 that are created from the buffer once we change modes. This is usually
373 that are created from the buffer once we change modes. This is usually
374 the input or output prompt.
374 the input or output prompt.
375
375
376 In general, the next mode depends on current mode and on the contents
376 In general, the next mode depends on current mode and on the contents
377 of `line`.
377 of `line`.
378
378
379 """
379 """
380 # To reduce the number of regex match checks, we have multiple
380 # To reduce the number of regex match checks, we have multiple
381 # 'if' blocks instead of 'if-elif' blocks.
381 # 'if' blocks instead of 'if-elif' blocks.
382
382
383 # Check for possible end of input
383 # Check for possible end of input
384 in2_match = self.in2_regex.match(line)
384 in2_match = self.in2_regex.match(line)
385 in2_match_rstrip = self.in2_regex_rstrip.match(line)
385 in2_match_rstrip = self.in2_regex_rstrip.match(line)
386 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
386 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
387 in2_match_rstrip:
387 in2_match_rstrip:
388 end_input = True
388 end_input = True
389 else:
389 else:
390 end_input = False
390 end_input = False
391 if end_input and self.mode != 'tb':
391 if end_input and self.mode != 'tb':
392 # Only look for an end of input when not in tb mode.
392 # Only look for an end of input when not in tb mode.
393 # An ellipsis could appear within the traceback.
393 # An ellipsis could appear within the traceback.
394 mode = 'output'
394 mode = 'output'
395 code = u''
395 code = u''
396 insertion = (0, Generic.Prompt, line)
396 insertion = (0, Generic.Prompt, line)
397 return mode, code, insertion
397 return mode, code, insertion
398
398
399 # Check for output prompt
399 # Check for output prompt
400 out_match = self.out_regex.match(line)
400 out_match = self.out_regex.match(line)
401 out_match_rstrip = self.out_regex_rstrip.match(line)
401 out_match_rstrip = self.out_regex_rstrip.match(line)
402 if out_match or out_match_rstrip:
402 if out_match or out_match_rstrip:
403 mode = 'output'
403 mode = 'output'
404 if out_match:
404 if out_match:
405 idx = out_match.end()
405 idx = out_match.end()
406 else:
406 else:
407 idx = out_match_rstrip.end()
407 idx = out_match_rstrip.end()
408 code = line[idx:]
408 code = line[idx:]
409 # Use the 'heading' token for output. We cannot use Generic.Error
409 # Use the 'heading' token for output. We cannot use Generic.Error
410 # since it would conflict with exceptions.
410 # since it would conflict with exceptions.
411 insertion = (0, Generic.Heading, line[:idx])
411 insertion = (0, Generic.Heading, line[:idx])
412 return mode, code, insertion
412 return mode, code, insertion
413
413
414
414
415 # Check for input or continuation prompt (non stripped version)
415 # Check for input or continuation prompt (non stripped version)
416 in1_match = self.in1_regex.match(line)
416 in1_match = self.in1_regex.match(line)
417 if in1_match or (in2_match and self.mode != 'tb'):
417 if in1_match or (in2_match and self.mode != 'tb'):
418 # New input or when not in tb, continued input.
418 # New input or when not in tb, continued input.
419 # We do not check for continued input when in tb since it is
419 # We do not check for continued input when in tb since it is
420 # allowable to replace a long stack with an ellipsis.
420 # allowable to replace a long stack with an ellipsis.
421 mode = 'input'
421 mode = 'input'
422 if in1_match:
422 if in1_match:
423 idx = in1_match.end()
423 idx = in1_match.end()
424 else: # in2_match
424 else: # in2_match
425 idx = in2_match.end()
425 idx = in2_match.end()
426 code = line[idx:]
426 code = line[idx:]
427 insertion = (0, Generic.Prompt, line[:idx])
427 insertion = (0, Generic.Prompt, line[:idx])
428 return mode, code, insertion
428 return mode, code, insertion
429
429
430 # Check for input or continuation prompt (stripped version)
430 # Check for input or continuation prompt (stripped version)
431 in1_match_rstrip = self.in1_regex_rstrip.match(line)
431 in1_match_rstrip = self.in1_regex_rstrip.match(line)
432 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
432 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
433 # New input or when not in tb, continued input.
433 # New input or when not in tb, continued input.
434 # We do not check for continued input when in tb since it is
434 # We do not check for continued input when in tb since it is
435 # allowable to replace a long stack with an ellipsis.
435 # allowable to replace a long stack with an ellipsis.
436 mode = 'input'
436 mode = 'input'
437 if in1_match_rstrip:
437 if in1_match_rstrip:
438 idx = in1_match_rstrip.end()
438 idx = in1_match_rstrip.end()
439 else: # in2_match
439 else: # in2_match
440 idx = in2_match_rstrip.end()
440 idx = in2_match_rstrip.end()
441 code = line[idx:]
441 code = line[idx:]
442 insertion = (0, Generic.Prompt, line[:idx])
442 insertion = (0, Generic.Prompt, line[:idx])
443 return mode, code, insertion
443 return mode, code, insertion
444
444
445 # Check for traceback
445 # Check for traceback
446 if self.ipytb_start.match(line):
446 if self.ipytb_start.match(line):
447 mode = 'tb'
447 mode = 'tb'
448 code = line
448 code = line
449 insertion = None
449 insertion = None
450 return mode, code, insertion
450 return mode, code, insertion
451
451
452 # All other stuff...
452 # All other stuff...
453 if self.mode in ('input', 'output'):
453 if self.mode in ('input', 'output'):
454 # We assume all other text is output. Multiline input that
454 # We assume all other text is output. Multiline input that
455 # does not use the continuation marker cannot be detected.
455 # does not use the continuation marker cannot be detected.
456 # For example, the 3 in the following is clearly output:
456 # For example, the 3 in the following is clearly output:
457 #
457 #
458 # In [1]: print 3
458 # In [1]: print 3
459 # 3
459 # 3
460 #
460 #
461 # But the following second line is part of the input:
461 # But the following second line is part of the input:
462 #
462 #
463 # In [2]: while True:
463 # In [2]: while True:
464 # print True
464 # print True
465 #
465 #
466 # In both cases, the 2nd line will be 'output'.
466 # In both cases, the 2nd line will be 'output'.
467 #
467 #
468 mode = 'output'
468 mode = 'output'
469 else:
469 else:
470 mode = 'tb'
470 mode = 'tb'
471
471
472 code = line
472 code = line
473 insertion = None
473 insertion = None
474
474
475 return mode, code, insertion
475 return mode, code, insertion
476
476
477 def get_tokens_unprocessed(self, text):
477 def get_tokens_unprocessed(self, text):
478 self.reset()
478 self.reset()
479 for match in line_re.finditer(text):
479 for match in line_re.finditer(text):
480 line = match.group()
480 line = match.group()
481 mode, code, insertion = self.get_mci(line)
481 mode, code, insertion = self.get_mci(line)
482
482
483 if mode != self.mode:
483 if mode != self.mode:
484 # Yield buffered tokens before transitioning to new mode.
484 # Yield buffered tokens before transitioning to new mode.
485 for token in self.buffered_tokens():
485 for token in self.buffered_tokens():
486 yield token
486 yield token
487 self.mode = mode
487 self.mode = mode
488
488
489 if insertion:
489 if insertion:
490 self.insertions.append((len(self.buffer), [insertion]))
490 self.insertions.append((len(self.buffer), [insertion]))
491 self.buffer += code
491 self.buffer += code
492
492
493 for token in self.buffered_tokens():
493 for token in self.buffered_tokens():
494 yield token
494 yield token
495
495
496 class IPyLexer(Lexer):
496 class IPyLexer(Lexer):
497 """
497 """
498 Primary lexer for all IPython-like code.
498 Primary lexer for all IPython-like code.
499
499
500 This is a simple helper lexer. If the first line of the text begins with
500 This is a simple helper lexer. If the first line of the text begins with
501 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
501 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
502 lexer. If not, then the entire text is parsed with an IPython lexer.
502 lexer. If not, then the entire text is parsed with an IPython lexer.
503
503
504 The goal is to reduce the number of lexers that are registered
504 The goal is to reduce the number of lexers that are registered
505 with Pygments.
505 with Pygments.
506
506
507 """
507 """
508 name = 'IPy session'
508 name = 'IPy session'
509 aliases = ['ipy']
509 aliases = ['ipy']
510
510
511 def __init__(self, **options):
511 def __init__(self, **options):
512 self.python3 = get_bool_opt(options, 'python3', False)
512 self.python3 = get_bool_opt(options, 'python3', False)
513 if self.python3:
513 if self.python3:
514 self.aliases = ['ipy3']
514 self.aliases = ['ipy3']
515 else:
515 else:
516 self.aliases = ['ipy2', 'ipy']
516 self.aliases = ['ipy2', 'ipy']
517
517
518 Lexer.__init__(self, **options)
518 Lexer.__init__(self, **options)
519
519
520 self.IPythonLexer = IPythonLexer(**options)
520 self.IPythonLexer = IPythonLexer(**options)
521 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
521 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
522
522
523 def get_tokens_unprocessed(self, text):
523 def get_tokens_unprocessed(self, text):
524 # Search for the input prompt anywhere...this allows code blocks to
524 # Search for the input prompt anywhere...this allows code blocks to
525 # begin with comments as well.
525 # begin with comments as well.
526 if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
526 if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
527 lex = self.IPythonConsoleLexer
527 lex = self.IPythonConsoleLexer
528 else:
528 else:
529 lex = self.IPythonLexer
529 lex = self.IPythonLexer
530 for token in lex.get_tokens_unprocessed(text):
530 for token in lex.get_tokens_unprocessed(text):
531 yield token
531 yield token
532
532
General Comments 0
You need to be logged in to leave comments. Login now