##// END OF EJS Templates
Backport PR #14022: Fix failing docs build.
Matthias Bussonnier -
Show More
@@ -1,526 +1,540 b''
1 # -*- coding: utf-8 -*-
1 # -*- coding: utf-8 -*-
2 """
2 """
3 Defines a variety of Pygments lexers for highlighting IPython code.
3 Defines a variety of Pygments lexers for highlighting IPython code.
4
4
5 This includes:
5 This includes:
6
6
7 IPythonLexer, IPython3Lexer
7 IPythonLexer, IPython3Lexer
8 Lexers for pure IPython (python + magic/shell commands)
8 Lexers for pure IPython (python + magic/shell commands)
9
9
10 IPythonPartialTracebackLexer, IPythonTracebackLexer
10 IPythonPartialTracebackLexer, IPythonTracebackLexer
11 Supports 2.x and 3.x via keyword `python3`. The partial traceback
11 Supports 2.x and 3.x via keyword `python3`. The partial traceback
12 lexer reads everything but the Python code appearing in a traceback.
12 lexer reads everything but the Python code appearing in a traceback.
13 The full lexer combines the partial lexer with an IPython lexer.
13 The full lexer combines the partial lexer with an IPython lexer.
14
14
15 IPythonConsoleLexer
15 IPythonConsoleLexer
16 A lexer for IPython console sessions, with support for tracebacks.
16 A lexer for IPython console sessions, with support for tracebacks.
17
17
18 IPyLexer
18 IPyLexer
19 A friendly lexer which examines the first line of text and from it,
19 A friendly lexer which examines the first line of text and from it,
20 decides whether to use an IPython lexer or an IPython console lexer.
20 decides whether to use an IPython lexer or an IPython console lexer.
21 This is probably the only lexer that needs to be explicitly added
21 This is probably the only lexer that needs to be explicitly added
22 to Pygments.
22 to Pygments.
23
23
24 """
24 """
25 #-----------------------------------------------------------------------------
25 #-----------------------------------------------------------------------------
26 # Copyright (c) 2013, the IPython Development Team.
26 # Copyright (c) 2013, the IPython Development Team.
27 #
27 #
28 # Distributed under the terms of the Modified BSD License.
28 # Distributed under the terms of the Modified BSD License.
29 #
29 #
30 # The full license is in the file COPYING.txt, distributed with this software.
30 # The full license is in the file COPYING.txt, distributed with this software.
31 #-----------------------------------------------------------------------------
31 #-----------------------------------------------------------------------------
32
32
33 # Standard library
33 # Standard library
34 import re
34 import re
35
35
36 # Third party
36 # Third party
37 from pygments.lexers import (
37 from pygments.lexers import (
38 BashLexer, HtmlLexer, JavascriptLexer, RubyLexer, PerlLexer, PythonLexer,
38 BashLexer, HtmlLexer, JavascriptLexer, RubyLexer, PerlLexer, PythonLexer,
39 Python3Lexer, TexLexer)
39 Python3Lexer, TexLexer)
40 from pygments.lexer import (
40 from pygments.lexer import (
41 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
41 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
42 )
42 )
43 from pygments.token import (
43 from pygments.token import (
44 Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
44 Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
45 )
45 )
46 from pygments.util import get_bool_opt
46 from pygments.util import get_bool_opt
47
47
48 # Local
48 # Local
49
49
50 line_re = re.compile('.*?\n')
50 line_re = re.compile('.*?\n')
51
51
52 __all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',
52 __all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',
53 'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',
53 'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',
54 'IPythonConsoleLexer', 'IPyLexer']
54 'IPythonConsoleLexer', 'IPyLexer']
55
55
56
56
57 def build_ipy_lexer(python3):
57 def build_ipy_lexer(python3):
58 """Builds IPython lexers depending on the value of `python3`.
58 """Builds IPython lexers depending on the value of `python3`.
59
59
60 The lexer inherits from an appropriate Python lexer and then adds
60 The lexer inherits from an appropriate Python lexer and then adds
61 information about IPython specific keywords (i.e. magic commands,
61 information about IPython specific keywords (i.e. magic commands,
62 shell commands, etc.)
62 shell commands, etc.)
63
63
64 Parameters
64 Parameters
65 ----------
65 ----------
66 python3 : bool
66 python3 : bool
67 If `True`, then build an IPython lexer from a Python 3 lexer.
67 If `True`, then build an IPython lexer from a Python 3 lexer.
68
68
69 """
69 """
70 # It would be nice to have a single IPython lexer class which takes
70 # It would be nice to have a single IPython lexer class which takes
71 # a boolean `python3`. But since there are two Python lexer classes,
71 # a boolean `python3`. But since there are two Python lexer classes,
72 # we will also have two IPython lexer classes.
72 # we will also have two IPython lexer classes.
73 if python3:
73 if python3:
74 PyLexer = Python3Lexer
74 PyLexer = Python3Lexer
75 name = 'IPython3'
75 name = 'IPython3'
76 aliases = ['ipython3']
76 aliases = ['ipython3']
77 doc = """IPython3 Lexer"""
77 doc = """IPython3 Lexer"""
78 else:
78 else:
79 PyLexer = PythonLexer
79 PyLexer = PythonLexer
80 name = 'IPython'
80 name = 'IPython'
81 aliases = ['ipython2', 'ipython']
81 aliases = ['ipython2', 'ipython']
82 doc = """IPython Lexer"""
82 doc = """IPython Lexer"""
83
83
84 ipython_tokens = [
84 ipython_tokens = [
85 (r'(?s)(\s*)(%%capture)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
85 (r'(?s)(\s*)(%%capture)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
86 (r'(?s)(\s*)(%%debug)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
86 (r'(?s)(\s*)(%%debug)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
87 (r'(?is)(\s*)(%%html)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(HtmlLexer))),
87 (r'(?is)(\s*)(%%html)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(HtmlLexer))),
88 (r'(?s)(\s*)(%%javascript)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),
88 (r'(?s)(\s*)(%%javascript)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),
89 (r'(?s)(\s*)(%%js)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),
89 (r'(?s)(\s*)(%%js)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),
90 (r'(?s)(\s*)(%%latex)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(TexLexer))),
90 (r'(?s)(\s*)(%%latex)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(TexLexer))),
91 (r'(?s)(\s*)(%%perl)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PerlLexer))),
91 (r'(?s)(\s*)(%%perl)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PerlLexer))),
92 (r'(?s)(\s*)(%%prun)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
92 (r'(?s)(\s*)(%%prun)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
93 (r'(?s)(\s*)(%%pypy)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
93 (r'(?s)(\s*)(%%pypy)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
94 (r'(?s)(\s*)(%%python)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
94 (r'(?s)(\s*)(%%python)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
95 (r'(?s)(\s*)(%%python2)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PythonLexer))),
95 (r'(?s)(\s*)(%%python2)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PythonLexer))),
96 (r'(?s)(\s*)(%%python3)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(Python3Lexer))),
96 (r'(?s)(\s*)(%%python3)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(Python3Lexer))),
97 (r'(?s)(\s*)(%%ruby)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(RubyLexer))),
97 (r'(?s)(\s*)(%%ruby)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(RubyLexer))),
98 (r'(?s)(\s*)(%%time)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
98 (r'(?s)(\s*)(%%time)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
99 (r'(?s)(\s*)(%%timeit)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
99 (r'(?s)(\s*)(%%timeit)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
100 (r'(?s)(\s*)(%%writefile)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
100 (r'(?s)(\s*)(%%writefile)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
101 (r'(?s)(\s*)(%%file)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
101 (r'(?s)(\s*)(%%file)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
102 (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
102 (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
103 (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
103 (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
104 (r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),
104 (r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),
105 (r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),
105 (r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),
106 (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
106 (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
107 using(BashLexer), Text)),
107 using(BashLexer), Text)),
108 (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
108 (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
109 (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
109 (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
110 (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
110 (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
111 (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),
111 (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),
112 (r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),
112 (r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),
113 ]
113 ]
114
114
115 tokens = PyLexer.tokens.copy()
115 tokens = PyLexer.tokens.copy()
116 tokens['root'] = ipython_tokens + tokens['root']
116 tokens['root'] = ipython_tokens + tokens['root']
117
117
118 attrs = {'name': name, 'aliases': aliases, 'filenames': [],
118 attrs = {'name': name, 'aliases': aliases, 'filenames': [],
119 '__doc__': doc, 'tokens': tokens}
119 '__doc__': doc, 'tokens': tokens}
120
120
121 return type(name, (PyLexer,), attrs)
121 return type(name, (PyLexer,), attrs)
122
122
123
123
124 IPython3Lexer = build_ipy_lexer(python3=True)
124 IPython3Lexer = build_ipy_lexer(python3=True)
125 IPythonLexer = build_ipy_lexer(python3=False)
125 IPythonLexer = build_ipy_lexer(python3=False)
126
126
127
127
128 class IPythonPartialTracebackLexer(RegexLexer):
128 class IPythonPartialTracebackLexer(RegexLexer):
129 """
129 """
130 Partial lexer for IPython tracebacks.
130 Partial lexer for IPython tracebacks.
131
131
132 Handles all the non-python output.
132 Handles all the non-python output.
133
133
134 """
134 """
135 name = 'IPython Partial Traceback'
135 name = 'IPython Partial Traceback'
136
136
137 tokens = {
137 tokens = {
138 'root': [
138 'root': [
139 # Tracebacks for syntax errors have a different style.
139 # Tracebacks for syntax errors have a different style.
140 # For both types of tracebacks, we mark the first line with
140 # For both types of tracebacks, we mark the first line with
141 # Generic.Traceback. For syntax errors, we mark the filename
141 # Generic.Traceback. For syntax errors, we mark the filename
142 # as we mark the filenames for non-syntax tracebacks.
142 # as we mark the filenames for non-syntax tracebacks.
143 #
143 #
144 # These two regexps define how IPythonConsoleLexer finds a
144 # These two regexps define how IPythonConsoleLexer finds a
145 # traceback.
145 # traceback.
146 #
146 #
147 ## Non-syntax traceback
147 ## Non-syntax traceback
148 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
148 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
149 ## Syntax traceback
149 ## Syntax traceback
150 (r'^( File)(.*)(, line )(\d+\n)',
150 (r'^( File)(.*)(, line )(\d+\n)',
151 bygroups(Generic.Traceback, Name.Namespace,
151 bygroups(Generic.Traceback, Name.Namespace,
152 Generic.Traceback, Literal.Number.Integer)),
152 Generic.Traceback, Literal.Number.Integer)),
153
153
154 # (Exception Identifier)(Whitespace)(Traceback Message)
154 # (Exception Identifier)(Whitespace)(Traceback Message)
155 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
155 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
156 bygroups(Name.Exception, Generic.Whitespace, Text)),
156 bygroups(Name.Exception, Generic.Whitespace, Text)),
157 # (Module/Filename)(Text)(Callee)(Function Signature)
157 # (Module/Filename)(Text)(Callee)(Function Signature)
158 # Better options for callee and function signature?
158 # Better options for callee and function signature?
159 (r'(.*)( in )(.*)(\(.*\)\n)',
159 (r'(.*)( in )(.*)(\(.*\)\n)',
160 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
160 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
161 # Regular line: (Whitespace)(Line Number)(Python Code)
161 # Regular line: (Whitespace)(Line Number)(Python Code)
162 (r'(\s*?)(\d+)(.*?\n)',
162 (r'(\s*?)(\d+)(.*?\n)',
163 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
163 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
164 # Emphasized line: (Arrow)(Line Number)(Python Code)
164 # Emphasized line: (Arrow)(Line Number)(Python Code)
165 # Using Exception token so arrow color matches the Exception.
165 # Using Exception token so arrow color matches the Exception.
166 (r'(-*>?\s?)(\d+)(.*?\n)',
166 (r'(-*>?\s?)(\d+)(.*?\n)',
167 bygroups(Name.Exception, Literal.Number.Integer, Other)),
167 bygroups(Name.Exception, Literal.Number.Integer, Other)),
168 # (Exception Identifier)(Message)
168 # (Exception Identifier)(Message)
169 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
169 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
170 bygroups(Name.Exception, Text)),
170 bygroups(Name.Exception, Text)),
171 # Tag everything else as Other, will be handled later.
171 # Tag everything else as Other, will be handled later.
172 (r'.*\n', Other),
172 (r'.*\n', Other),
173 ],
173 ],
174 }
174 }
175
175
176
176
177 class IPythonTracebackLexer(DelegatingLexer):
177 class IPythonTracebackLexer(DelegatingLexer):
178 """
178 """
179 IPython traceback lexer.
179 IPython traceback lexer.
180
180
181 For doctests, the tracebacks can be snipped as much as desired with the
181 For doctests, the tracebacks can be snipped as much as desired with the
182 exception to the lines that designate a traceback. For non-syntax error
182 exception to the lines that designate a traceback. For non-syntax error
183 tracebacks, this is the line of hyphens. For syntax error tracebacks,
183 tracebacks, this is the line of hyphens. For syntax error tracebacks,
184 this is the line which lists the File and line number.
184 this is the line which lists the File and line number.
185
185
186 """
186 """
187 # The lexer inherits from DelegatingLexer. The "root" lexer is an
187 # The lexer inherits from DelegatingLexer. The "root" lexer is an
188 # appropriate IPython lexer, which depends on the value of the boolean
188 # appropriate IPython lexer, which depends on the value of the boolean
189 # `python3`. First, we parse with the partial IPython traceback lexer.
189 # `python3`. First, we parse with the partial IPython traceback lexer.
190 # Then, any code marked with the "Other" token is delegated to the root
190 # Then, any code marked with the "Other" token is delegated to the root
191 # lexer.
191 # lexer.
192 #
192 #
193 name = 'IPython Traceback'
193 name = 'IPython Traceback'
194 aliases = ['ipythontb']
194 aliases = ['ipythontb']
195
195
196 def __init__(self, **options):
196 def __init__(self, **options):
197 """
198 A subclass of `DelegatingLexer` which delegates to the appropriate to either IPyLexer,
199 IPythonPartialTracebackLexer.
200 """
201 # note we need a __init__ doc, as otherwise it inherits the doc from the super class
202 # which will fail the documentation build as it references section of the pygments docs that
203 # do not exists when building IPython's docs.
197 self.python3 = get_bool_opt(options, 'python3', False)
204 self.python3 = get_bool_opt(options, 'python3', False)
198 if self.python3:
205 if self.python3:
199 self.aliases = ['ipython3tb']
206 self.aliases = ['ipython3tb']
200 else:
207 else:
201 self.aliases = ['ipython2tb', 'ipythontb']
208 self.aliases = ['ipython2tb', 'ipythontb']
202
209
203 if self.python3:
210 if self.python3:
204 IPyLexer = IPython3Lexer
211 IPyLexer = IPython3Lexer
205 else:
212 else:
206 IPyLexer = IPythonLexer
213 IPyLexer = IPythonLexer
207
214
208 DelegatingLexer.__init__(self, IPyLexer,
215 DelegatingLexer.__init__(self, IPyLexer,
209 IPythonPartialTracebackLexer, **options)
216 IPythonPartialTracebackLexer, **options)
210
217
211 class IPythonConsoleLexer(Lexer):
218 class IPythonConsoleLexer(Lexer):
212 """
219 """
213 An IPython console lexer for IPython code-blocks and doctests, such as:
220 An IPython console lexer for IPython code-blocks and doctests, such as:
214
221
215 .. code-block:: rst
222 .. code-block:: rst
216
223
217 .. code-block:: ipythonconsole
224 .. code-block:: ipythonconsole
218
225
219 In [1]: a = 'foo'
226 In [1]: a = 'foo'
220
227
221 In [2]: a
228 In [2]: a
222 Out[2]: 'foo'
229 Out[2]: 'foo'
223
230
224 In [3]: print(a)
231 In [3]: print(a)
225 foo
232 foo
226
233
227
234
228 Support is also provided for IPython exceptions:
235 Support is also provided for IPython exceptions:
229
236
230 .. code-block:: rst
237 .. code-block:: rst
231
238
232 .. code-block:: ipythonconsole
239 .. code-block:: ipythonconsole
233
240
234 In [1]: raise Exception
241 In [1]: raise Exception
235 Traceback (most recent call last):
242 Traceback (most recent call last):
236 ...
243 ...
237 Exception
244 Exception
238
245
239 """
246 """
240 name = 'IPython console session'
247 name = 'IPython console session'
241 aliases = ['ipythonconsole']
248 aliases = ['ipythonconsole']
242 mimetypes = ['text/x-ipython-console']
249 mimetypes = ['text/x-ipython-console']
243
250
244 # The regexps used to determine what is input and what is output.
251 # The regexps used to determine what is input and what is output.
245 # The default prompts for IPython are:
252 # The default prompts for IPython are:
246 #
253 #
247 # in = 'In [#]: '
254 # in = 'In [#]: '
248 # continuation = ' .D.: '
255 # continuation = ' .D.: '
249 # template = 'Out[#]: '
256 # template = 'Out[#]: '
250 #
257 #
251 # Where '#' is the 'prompt number' or 'execution count' and 'D'
258 # Where '#' is the 'prompt number' or 'execution count' and 'D'
252 # D is a number of dots matching the width of the execution count
259 # D is a number of dots matching the width of the execution count
253 #
260 #
254 in1_regex = r'In \[[0-9]+\]: '
261 in1_regex = r'In \[[0-9]+\]: '
255 in2_regex = r' \.\.+\.: '
262 in2_regex = r' \.\.+\.: '
256 out_regex = r'Out\[[0-9]+\]: '
263 out_regex = r'Out\[[0-9]+\]: '
257
264
258 #: The regex to determine when a traceback starts.
265 #: The regex to determine when a traceback starts.
259 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
266 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
260
267
261 def __init__(self, **options):
268 def __init__(self, **options):
262 """Initialize the IPython console lexer.
269 """Initialize the IPython console lexer.
263
270
264 Parameters
271 Parameters
265 ----------
272 ----------
266 python3 : bool
273 python3 : bool
267 If `True`, then the console inputs are parsed using a Python 3
274 If `True`, then the console inputs are parsed using a Python 3
268 lexer. Otherwise, they are parsed using a Python 2 lexer.
275 lexer. Otherwise, they are parsed using a Python 2 lexer.
269 in1_regex : RegexObject
276 in1_regex : RegexObject
270 The compiled regular expression used to detect the start
277 The compiled regular expression used to detect the start
271 of inputs. Although the IPython configuration setting may have a
278 of inputs. Although the IPython configuration setting may have a
272 trailing whitespace, do not include it in the regex. If `None`,
279 trailing whitespace, do not include it in the regex. If `None`,
273 then the default input prompt is assumed.
280 then the default input prompt is assumed.
274 in2_regex : RegexObject
281 in2_regex : RegexObject
275 The compiled regular expression used to detect the continuation
282 The compiled regular expression used to detect the continuation
276 of inputs. Although the IPython configuration setting may have a
283 of inputs. Although the IPython configuration setting may have a
277 trailing whitespace, do not include it in the regex. If `None`,
284 trailing whitespace, do not include it in the regex. If `None`,
278 then the default input prompt is assumed.
285 then the default input prompt is assumed.
279 out_regex : RegexObject
286 out_regex : RegexObject
280 The compiled regular expression used to detect outputs. If `None`,
287 The compiled regular expression used to detect outputs. If `None`,
281 then the default output prompt is assumed.
288 then the default output prompt is assumed.
282
289
283 """
290 """
284 self.python3 = get_bool_opt(options, 'python3', False)
291 self.python3 = get_bool_opt(options, 'python3', False)
285 if self.python3:
292 if self.python3:
286 self.aliases = ['ipython3console']
293 self.aliases = ['ipython3console']
287 else:
294 else:
288 self.aliases = ['ipython2console', 'ipythonconsole']
295 self.aliases = ['ipython2console', 'ipythonconsole']
289
296
290 in1_regex = options.get('in1_regex', self.in1_regex)
297 in1_regex = options.get('in1_regex', self.in1_regex)
291 in2_regex = options.get('in2_regex', self.in2_regex)
298 in2_regex = options.get('in2_regex', self.in2_regex)
292 out_regex = options.get('out_regex', self.out_regex)
299 out_regex = options.get('out_regex', self.out_regex)
293
300
294 # So that we can work with input and output prompts which have been
301 # So that we can work with input and output prompts which have been
295 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
302 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
296 # we do not do this, then such prompts will be tagged as 'output'.
303 # we do not do this, then such prompts will be tagged as 'output'.
297 # The reason can't just use the rstrip'd variants instead is because
304 # The reason can't just use the rstrip'd variants instead is because
298 # we want any whitespace associated with the prompt to be inserted
305 # we want any whitespace associated with the prompt to be inserted
299 # with the token. This allows formatted code to be modified so as hide
306 # with the token. This allows formatted code to be modified so as hide
300 # the appearance of prompts, with the whitespace included. One example
307 # the appearance of prompts, with the whitespace included. One example
301 # use of this is in copybutton.js from the standard lib Python docs.
308 # use of this is in copybutton.js from the standard lib Python docs.
302 in1_regex_rstrip = in1_regex.rstrip() + '\n'
309 in1_regex_rstrip = in1_regex.rstrip() + '\n'
303 in2_regex_rstrip = in2_regex.rstrip() + '\n'
310 in2_regex_rstrip = in2_regex.rstrip() + '\n'
304 out_regex_rstrip = out_regex.rstrip() + '\n'
311 out_regex_rstrip = out_regex.rstrip() + '\n'
305
312
306 # Compile and save them all.
313 # Compile and save them all.
307 attrs = ['in1_regex', 'in2_regex', 'out_regex',
314 attrs = ['in1_regex', 'in2_regex', 'out_regex',
308 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
315 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
309 for attr in attrs:
316 for attr in attrs:
310 self.__setattr__(attr, re.compile(locals()[attr]))
317 self.__setattr__(attr, re.compile(locals()[attr]))
311
318
312 Lexer.__init__(self, **options)
319 Lexer.__init__(self, **options)
313
320
314 if self.python3:
321 if self.python3:
315 pylexer = IPython3Lexer
322 pylexer = IPython3Lexer
316 tblexer = IPythonTracebackLexer
323 tblexer = IPythonTracebackLexer
317 else:
324 else:
318 pylexer = IPythonLexer
325 pylexer = IPythonLexer
319 tblexer = IPythonTracebackLexer
326 tblexer = IPythonTracebackLexer
320
327
321 self.pylexer = pylexer(**options)
328 self.pylexer = pylexer(**options)
322 self.tblexer = tblexer(**options)
329 self.tblexer = tblexer(**options)
323
330
324 self.reset()
331 self.reset()
325
332
326 def reset(self):
333 def reset(self):
327 self.mode = 'output'
334 self.mode = 'output'
328 self.index = 0
335 self.index = 0
329 self.buffer = u''
336 self.buffer = u''
330 self.insertions = []
337 self.insertions = []
331
338
332 def buffered_tokens(self):
339 def buffered_tokens(self):
333 """
340 """
334 Generator of unprocessed tokens after doing insertions and before
341 Generator of unprocessed tokens after doing insertions and before
335 changing to a new state.
342 changing to a new state.
336
343
337 """
344 """
338 if self.mode == 'output':
345 if self.mode == 'output':
339 tokens = [(0, Generic.Output, self.buffer)]
346 tokens = [(0, Generic.Output, self.buffer)]
340 elif self.mode == 'input':
347 elif self.mode == 'input':
341 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
348 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
342 else: # traceback
349 else: # traceback
343 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
350 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
344
351
345 for i, t, v in do_insertions(self.insertions, tokens):
352 for i, t, v in do_insertions(self.insertions, tokens):
346 # All token indexes are relative to the buffer.
353 # All token indexes are relative to the buffer.
347 yield self.index + i, t, v
354 yield self.index + i, t, v
348
355
349 # Clear it all
356 # Clear it all
350 self.index += len(self.buffer)
357 self.index += len(self.buffer)
351 self.buffer = u''
358 self.buffer = u''
352 self.insertions = []
359 self.insertions = []
353
360
354 def get_mci(self, line):
361 def get_mci(self, line):
355 """
362 """
356 Parses the line and returns a 3-tuple: (mode, code, insertion).
363 Parses the line and returns a 3-tuple: (mode, code, insertion).
357
364
358 `mode` is the next mode (or state) of the lexer, and is always equal
365 `mode` is the next mode (or state) of the lexer, and is always equal
359 to 'input', 'output', or 'tb'.
366 to 'input', 'output', or 'tb'.
360
367
361 `code` is a portion of the line that should be added to the buffer
368 `code` is a portion of the line that should be added to the buffer
362 corresponding to the next mode and eventually lexed by another lexer.
369 corresponding to the next mode and eventually lexed by another lexer.
363 For example, `code` could be Python code if `mode` were 'input'.
370 For example, `code` could be Python code if `mode` were 'input'.
364
371
365 `insertion` is a 3-tuple (index, token, text) representing an
372 `insertion` is a 3-tuple (index, token, text) representing an
366 unprocessed "token" that will be inserted into the stream of tokens
373 unprocessed "token" that will be inserted into the stream of tokens
367 that are created from the buffer once we change modes. This is usually
374 that are created from the buffer once we change modes. This is usually
368 the input or output prompt.
375 the input or output prompt.
369
376
370 In general, the next mode depends on current mode and on the contents
377 In general, the next mode depends on current mode and on the contents
371 of `line`.
378 of `line`.
372
379
373 """
380 """
374 # To reduce the number of regex match checks, we have multiple
381 # To reduce the number of regex match checks, we have multiple
375 # 'if' blocks instead of 'if-elif' blocks.
382 # 'if' blocks instead of 'if-elif' blocks.
376
383
377 # Check for possible end of input
384 # Check for possible end of input
378 in2_match = self.in2_regex.match(line)
385 in2_match = self.in2_regex.match(line)
379 in2_match_rstrip = self.in2_regex_rstrip.match(line)
386 in2_match_rstrip = self.in2_regex_rstrip.match(line)
380 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
387 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
381 in2_match_rstrip:
388 in2_match_rstrip:
382 end_input = True
389 end_input = True
383 else:
390 else:
384 end_input = False
391 end_input = False
385 if end_input and self.mode != 'tb':
392 if end_input and self.mode != 'tb':
386 # Only look for an end of input when not in tb mode.
393 # Only look for an end of input when not in tb mode.
387 # An ellipsis could appear within the traceback.
394 # An ellipsis could appear within the traceback.
388 mode = 'output'
395 mode = 'output'
389 code = u''
396 code = u''
390 insertion = (0, Generic.Prompt, line)
397 insertion = (0, Generic.Prompt, line)
391 return mode, code, insertion
398 return mode, code, insertion
392
399
393 # Check for output prompt
400 # Check for output prompt
394 out_match = self.out_regex.match(line)
401 out_match = self.out_regex.match(line)
395 out_match_rstrip = self.out_regex_rstrip.match(line)
402 out_match_rstrip = self.out_regex_rstrip.match(line)
396 if out_match or out_match_rstrip:
403 if out_match or out_match_rstrip:
397 mode = 'output'
404 mode = 'output'
398 if out_match:
405 if out_match:
399 idx = out_match.end()
406 idx = out_match.end()
400 else:
407 else:
401 idx = out_match_rstrip.end()
408 idx = out_match_rstrip.end()
402 code = line[idx:]
409 code = line[idx:]
403 # Use the 'heading' token for output. We cannot use Generic.Error
410 # Use the 'heading' token for output. We cannot use Generic.Error
404 # since it would conflict with exceptions.
411 # since it would conflict with exceptions.
405 insertion = (0, Generic.Heading, line[:idx])
412 insertion = (0, Generic.Heading, line[:idx])
406 return mode, code, insertion
413 return mode, code, insertion
407
414
408
415
409 # Check for input or continuation prompt (non stripped version)
416 # Check for input or continuation prompt (non stripped version)
410 in1_match = self.in1_regex.match(line)
417 in1_match = self.in1_regex.match(line)
411 if in1_match or (in2_match and self.mode != 'tb'):
418 if in1_match or (in2_match and self.mode != 'tb'):
412 # New input or when not in tb, continued input.
419 # New input or when not in tb, continued input.
413 # We do not check for continued input when in tb since it is
420 # We do not check for continued input when in tb since it is
414 # allowable to replace a long stack with an ellipsis.
421 # allowable to replace a long stack with an ellipsis.
415 mode = 'input'
422 mode = 'input'
416 if in1_match:
423 if in1_match:
417 idx = in1_match.end()
424 idx = in1_match.end()
418 else: # in2_match
425 else: # in2_match
419 idx = in2_match.end()
426 idx = in2_match.end()
420 code = line[idx:]
427 code = line[idx:]
421 insertion = (0, Generic.Prompt, line[:idx])
428 insertion = (0, Generic.Prompt, line[:idx])
422 return mode, code, insertion
429 return mode, code, insertion
423
430
424 # Check for input or continuation prompt (stripped version)
431 # Check for input or continuation prompt (stripped version)
425 in1_match_rstrip = self.in1_regex_rstrip.match(line)
432 in1_match_rstrip = self.in1_regex_rstrip.match(line)
426 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
433 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
427 # New input or when not in tb, continued input.
434 # New input or when not in tb, continued input.
428 # We do not check for continued input when in tb since it is
435 # We do not check for continued input when in tb since it is
429 # allowable to replace a long stack with an ellipsis.
436 # allowable to replace a long stack with an ellipsis.
430 mode = 'input'
437 mode = 'input'
431 if in1_match_rstrip:
438 if in1_match_rstrip:
432 idx = in1_match_rstrip.end()
439 idx = in1_match_rstrip.end()
433 else: # in2_match
440 else: # in2_match
434 idx = in2_match_rstrip.end()
441 idx = in2_match_rstrip.end()
435 code = line[idx:]
442 code = line[idx:]
436 insertion = (0, Generic.Prompt, line[:idx])
443 insertion = (0, Generic.Prompt, line[:idx])
437 return mode, code, insertion
444 return mode, code, insertion
438
445
439 # Check for traceback
446 # Check for traceback
440 if self.ipytb_start.match(line):
447 if self.ipytb_start.match(line):
441 mode = 'tb'
448 mode = 'tb'
442 code = line
449 code = line
443 insertion = None
450 insertion = None
444 return mode, code, insertion
451 return mode, code, insertion
445
452
446 # All other stuff...
453 # All other stuff...
447 if self.mode in ('input', 'output'):
454 if self.mode in ('input', 'output'):
448 # We assume all other text is output. Multiline input that
455 # We assume all other text is output. Multiline input that
449 # does not use the continuation marker cannot be detected.
456 # does not use the continuation marker cannot be detected.
450 # For example, the 3 in the following is clearly output:
457 # For example, the 3 in the following is clearly output:
451 #
458 #
452 # In [1]: print 3
459 # In [1]: print 3
453 # 3
460 # 3
454 #
461 #
455 # But the following second line is part of the input:
462 # But the following second line is part of the input:
456 #
463 #
457 # In [2]: while True:
464 # In [2]: while True:
458 # print True
465 # print True
459 #
466 #
460 # In both cases, the 2nd line will be 'output'.
467 # In both cases, the 2nd line will be 'output'.
461 #
468 #
462 mode = 'output'
469 mode = 'output'
463 else:
470 else:
464 mode = 'tb'
471 mode = 'tb'
465
472
466 code = line
473 code = line
467 insertion = None
474 insertion = None
468
475
469 return mode, code, insertion
476 return mode, code, insertion
470
477
471 def get_tokens_unprocessed(self, text):
478 def get_tokens_unprocessed(self, text):
472 self.reset()
479 self.reset()
473 for match in line_re.finditer(text):
480 for match in line_re.finditer(text):
474 line = match.group()
481 line = match.group()
475 mode, code, insertion = self.get_mci(line)
482 mode, code, insertion = self.get_mci(line)
476
483
477 if mode != self.mode:
484 if mode != self.mode:
478 # Yield buffered tokens before transitioning to new mode.
485 # Yield buffered tokens before transitioning to new mode.
479 for token in self.buffered_tokens():
486 for token in self.buffered_tokens():
480 yield token
487 yield token
481 self.mode = mode
488 self.mode = mode
482
489
483 if insertion:
490 if insertion:
484 self.insertions.append((len(self.buffer), [insertion]))
491 self.insertions.append((len(self.buffer), [insertion]))
485 self.buffer += code
492 self.buffer += code
486
493
487 for token in self.buffered_tokens():
494 for token in self.buffered_tokens():
488 yield token
495 yield token
489
496
490 class IPyLexer(Lexer):
497 class IPyLexer(Lexer):
491 r"""
498 r"""
492 Primary lexer for all IPython-like code.
499 Primary lexer for all IPython-like code.
493
500
494 This is a simple helper lexer. If the first line of the text begins with
501 This is a simple helper lexer. If the first line of the text begins with
495 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
502 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
496 lexer. If not, then the entire text is parsed with an IPython lexer.
503 lexer. If not, then the entire text is parsed with an IPython lexer.
497
504
498 The goal is to reduce the number of lexers that are registered
505 The goal is to reduce the number of lexers that are registered
499 with Pygments.
506 with Pygments.
500
507
501 """
508 """
502 name = 'IPy session'
509 name = 'IPy session'
503 aliases = ['ipy']
510 aliases = ['ipy']
504
511
505 def __init__(self, **options):
512 def __init__(self, **options):
513 """
514 Create a new IPyLexer instance which dispatch to either an
515 IPythonCOnsoleLexer (if In prompts are present) or and IPythonLexer (if
516 In prompts are not present).
517 """
518 # init docstring is necessary for docs not to fail to build do to parent
519 # docs referenceing a section in pygments docs.
506 self.python3 = get_bool_opt(options, 'python3', False)
520 self.python3 = get_bool_opt(options, 'python3', False)
507 if self.python3:
521 if self.python3:
508 self.aliases = ['ipy3']
522 self.aliases = ['ipy3']
509 else:
523 else:
510 self.aliases = ['ipy2', 'ipy']
524 self.aliases = ['ipy2', 'ipy']
511
525
512 Lexer.__init__(self, **options)
526 Lexer.__init__(self, **options)
513
527
514 self.IPythonLexer = IPythonLexer(**options)
528 self.IPythonLexer = IPythonLexer(**options)
515 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
529 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
516
530
517 def get_tokens_unprocessed(self, text):
531 def get_tokens_unprocessed(self, text):
518 # Search for the input prompt anywhere...this allows code blocks to
532 # Search for the input prompt anywhere...this allows code blocks to
519 # begin with comments as well.
533 # begin with comments as well.
520 if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
534 if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
521 lex = self.IPythonConsoleLexer
535 lex = self.IPythonConsoleLexer
522 else:
536 else:
523 lex = self.IPythonLexer
537 lex = self.IPythonLexer
524 for token in lex.get_tokens_unprocessed(text):
538 for token in lex.get_tokens_unprocessed(text):
525 yield token
539 yield token
526
540
General Comments 0
You need to be logged in to leave comments. Login now