upstream/ipython Commit - r20625:857d102d

Move IPython lexers module to lib...

Thomas Kluyver -

r20625:857d102d

parent child

IPython/lib/lexers.py

0 created 644 +512 0

This diff has been collapsed as it changes many lines, (512 lines changed) Show them Hide them
	@@ -0,0 +1,512 b''
		1	# -- coding: utf-8 --
		2	"""
		3	Defines a variety of Pygments lexers for highlighting IPython code.
		4
		5	This includes:
		6
		7	IPythonLexer, IPython3Lexer
		8	Lexers for pure IPython (python + magic/shell commands)
		9
		10	IPythonPartialTracebackLexer, IPythonTracebackLexer
		11	Supports 2.x and 3.x via keyword `python3`. The partial traceback
		12	lexer reads everything but the Python code appearing in a traceback.
		13	The full lexer combines the partial lexer with an IPython lexer.
		14
		15	IPythonConsoleLexer
		16	A lexer for IPython console sessions, with support for tracebacks.
		17
		18	IPyLexer
		19	A friendly lexer which examines the first line of text and from it,
		20	decides whether to use an IPython lexer or an IPython console lexer.
		21	This is probably the only lexer that needs to be explicitly added
		22	to Pygments.
		23
		24	"""
		25	#-----------------------------------------------------------------------------
		26	# Copyright (c) 2013, the IPython Development Team.
		27	#
		28	# Distributed under the terms of the Modified BSD License.
		29	#
		30	# The full license is in the file COPYING.txt, distributed with this software.
		31	#-----------------------------------------------------------------------------
		32
		33	# Standard library
		34	import re
		35
		36	# Third party
		37	from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
		38	from pygments.lexer import (
		39	Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
		40	)
		41	from pygments.token import (
		42	Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
		43	)
		44	from pygments.util import get_bool_opt
		45
		46	# Local
		47	from IPython.testing.skipdoctest import skip_doctest
		48
		49	line_re = re.compile('.*?\n')
		50
		51	__all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',
		52	'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',
		53	'IPythonConsoleLexer', 'IPyLexer']
		54
		55	ipython_tokens = [
		56	(r"(?s)(\s)(%%)(\w+)(.)", bygroups(Text, Operator, Keyword, Text)),
		57	(r'(?s)(^\s)(%%!)([^\n]\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
		58	(r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),
		59	(r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),
		60	(r'(%)(sx\|sc\|system)(.*)(\n)', bygroups(Operator, Keyword,
		61	using(BashLexer), Text)),
		62	(r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
		63	(r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
		64	(r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
		65	(r'^(\s)(\?\??)(\s%{0,2}[\w\.\])', bygroups(Text, Operator, Text)),
		66	]
		67
		68	def build_ipy_lexer(python3):
		69	"""Builds IPython lexers depending on the value of `python3`.
		70
		71	The lexer inherits from an appropriate Python lexer and then adds
		72	information about IPython specific keywords (i.e. magic commands,
		73	shell commands, etc.)
		74
		75	Parameters
		76	----------
		77	python3 : bool
		78	If `True`, then build an IPython lexer from a Python 3 lexer.
		79
		80	"""
		81	# It would be nice to have a single IPython lexer class which takes
		82	# a boolean `python3`. But since there are two Python lexer classes,
		83	# we will also have two IPython lexer classes.
		84	if python3:
		85	PyLexer = Python3Lexer
		86	clsname = 'IPython3Lexer'
		87	name = 'IPython3'
		88	aliases = ['ipython3']
		89	doc = """IPython3 Lexer"""
		90	else:
		91	PyLexer = PythonLexer
		92	clsname = 'IPythonLexer'
		93	name = 'IPython'
		94	aliases = ['ipython2', 'ipython']
		95	doc = """IPython Lexer"""
		96
		97	tokens = PyLexer.tokens.copy()
		98	tokens['root'] = ipython_tokens + tokens['root']
		99
		100	attrs = {'name': name, 'aliases': aliases, 'filenames': [],
		101	'__doc__': doc, 'tokens': tokens}
		102
		103	return type(name, (PyLexer,), attrs)
		104
		105
		106	IPython3Lexer = build_ipy_lexer(python3=True)
		107	IPythonLexer = build_ipy_lexer(python3=False)
		108
		109
		110	class IPythonPartialTracebackLexer(RegexLexer):
		111	"""
		112	Partial lexer for IPython tracebacks.
		113
		114	Handles all the non-python output. This works for both Python 2.x and 3.x.
		115
		116	"""
		117	name = 'IPython Partial Traceback'
		118
		119	tokens = {
		120	'root': [
		121	# Tracebacks for syntax errors have a different style.
		122	# For both types of tracebacks, we mark the first line with
		123	# Generic.Traceback. For syntax errors, we mark the filename
		124	# as we mark the filenames for non-syntax tracebacks.
		125	#
		126	# These two regexps define how IPythonConsoleLexer finds a
		127	# traceback.
		128	#
		129	## Non-syntax traceback
		130	(r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
		131	## Syntax traceback
		132	(r'^( File)(.*)(, line )(\d+\n)',
		133	bygroups(Generic.Traceback, Name.Namespace,
		134	Generic.Traceback, Literal.Number.Integer)),
		135
		136	# (Exception Identifier)(Whitespace)(Traceback Message)
		137	(r'(?u)(^[^\d\W]\w)(\s)(Traceback.*?\n)',
		138	bygroups(Name.Exception, Generic.Whitespace, Text)),
		139	# (Module/Filename)(Text)(Callee)(Function Signature)
		140	# Better options for callee and function signature?
		141	(r'(.)( in )(.)($.*$\n)',
		142	bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
		143	# Regular line: (Whitespace)(Line Number)(Python Code)
		144	(r'(\s?)(\d+)(.?\n)',
		145	bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
		146	# Emphasized line: (Arrow)(Line Number)(Python Code)
		147	# Using Exception token so arrow color matches the Exception.
		148	(r'(->?\s?)(\d+)(.?\n)',
		149	bygroups(Name.Exception, Literal.Number.Integer, Other)),
		150	# (Exception Identifier)(Message)
		151	(r'(?u)(^[^\d\W]\w)(:.?\n)',
		152	bygroups(Name.Exception, Text)),
		153	# Tag everything else as Other, will be handled later.
		154	(r'.*\n', Other),
		155	],
		156	}
		157
		158
		159	class IPythonTracebackLexer(DelegatingLexer):
		160	"""
		161	IPython traceback lexer.
		162
		163	For doctests, the tracebacks can be snipped as much as desired with the
		164	exception to the lines that designate a traceback. For non-syntax error
		165	tracebacks, this is the line of hyphens. For syntax error tracebacks,
		166	this is the line which lists the File and line number.
		167
		168	"""
		169	# The lexer inherits from DelegatingLexer. The "root" lexer is an
		170	# appropriate IPython lexer, which depends on the value of the boolean
		171	# `python3`. First, we parse with the partial IPython traceback lexer.
		172	# Then, any code marked with the "Other" token is delegated to the root
		173	# lexer.
		174	#
		175	name = 'IPython Traceback'
		176	aliases = ['ipythontb']
		177
		178	def __init__(self, **options):
		179	self.python3 = get_bool_opt(options, 'python3', False)
		180	if self.python3:
		181	self.aliases = ['ipython3tb']
		182	else:
		183	self.aliases = ['ipython2tb', 'ipythontb']
		184
		185	if self.python3:
		186	IPyLexer = IPython3Lexer
		187	else:
		188	IPyLexer = IPythonLexer
		189
		190	DelegatingLexer.__init__(self, IPyLexer,
		191	IPythonPartialTracebackLexer, **options)
		192
		193	@skip_doctest
		194	class IPythonConsoleLexer(Lexer):
		195	"""
		196	An IPython console lexer for IPython code-blocks and doctests, such as:
		197
		198	.. code-block:: rst
		199
		200	.. code-block:: ipythonconsole
		201
		202	In [1]: a = 'foo'
		203
		204	In [2]: a
		205	Out[2]: 'foo'
		206
		207	In [3]: print a
		208	foo
		209
		210	In [4]: 1 / 0
		211
		212
		213	Support is also provided for IPython exceptions:
		214
		215	.. code-block:: rst
		216
		217	.. code-block:: ipythonconsole
		218
		219	In [1]: raise Exception
		220
		221	---------------------------------------------------------------------------
		222	Exception Traceback (most recent call last)
		223	<ipython-input-1-fca2ab0ca76b> in <module>()
		224	----> 1 raise Exception
		225
		226	Exception:
		227
		228	"""
		229	name = 'IPython console session'
		230	aliases = ['ipythonconsole']
		231	mimetypes = ['text/x-ipython-console']
		232
		233	# The regexps used to determine what is input and what is output.
		234	# The default prompts for IPython are:
		235	#
		236	# c.PromptManager.in_template = 'In [\#]: '
		237	# c.PromptManager.in2_template = ' .\D.: '
		238	# c.PromptManager.out_template = 'Out[\#]: '
		239	#
		240	in1_regex = r'In \[[0-9]+\]: '
		241	in2_regex = r' \.\.+\.: '
		242	out_regex = r'Out\[[0-9]+\]: '
		243
		244	#: The regex to determine when a traceback starts.
		245	ipytb_start = re.compile(r'^(\^C)?(-+\n)\|^( File)(.*)(, line )(\d+\n)')
		246
		247	def __init__(self, **options):
		248	"""Initialize the IPython console lexer.
		249
		250	Parameters
		251	----------
		252	python3 : bool
		253	If `True`, then the console inputs are parsed using a Python 3
		254	lexer. Otherwise, they are parsed using a Python 2 lexer.
		255	in1_regex : RegexObject
		256	The compiled regular expression used to detect the start
		257	of inputs. Although the IPython configuration setting may have a
		258	trailing whitespace, do not include it in the regex. If `None`,
		259	then the default input prompt is assumed.
		260	in2_regex : RegexObject
		261	The compiled regular expression used to detect the continuation
		262	of inputs. Although the IPython configuration setting may have a
		263	trailing whitespace, do not include it in the regex. If `None`,
		264	then the default input prompt is assumed.
		265	out_regex : RegexObject
		266	The compiled regular expression used to detect outputs. If `None`,
		267	then the default output prompt is assumed.
		268
		269	"""
		270	self.python3 = get_bool_opt(options, 'python3', False)
		271	if self.python3:
		272	self.aliases = ['ipython3console']
		273	else:
		274	self.aliases = ['ipython2console', 'ipythonconsole']
		275
		276	in1_regex = options.get('in1_regex', self.in1_regex)
		277	in2_regex = options.get('in2_regex', self.in2_regex)
		278	out_regex = options.get('out_regex', self.out_regex)
		279
		280	# So that we can work with input and output prompts which have been
		281	# rstrip'd (possibly by editors) we also need rstrip'd variants. If
		282	# we do not do this, then such prompts will be tagged as 'output'.
		283	# The reason can't just use the rstrip'd variants instead is because
		284	# we want any whitespace associated with the prompt to be inserted
		285	# with the token. This allows formatted code to be modified so as hide
		286	# the appearance of prompts, with the whitespace included. One example
		287	# use of this is in copybutton.js from the standard lib Python docs.
		288	in1_regex_rstrip = in1_regex.rstrip() + '\n'
		289	in2_regex_rstrip = in2_regex.rstrip() + '\n'
		290	out_regex_rstrip = out_regex.rstrip() + '\n'
		291
		292	# Compile and save them all.
		293	attrs = ['in1_regex', 'in2_regex', 'out_regex',
		294	'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
		295	for attr in attrs:
		296	self.__setattr__(attr, re.compile(locals()[attr]))
		297
		298	Lexer.__init__(self, **options)
		299
		300	if self.python3:
		301	pylexer = IPython3Lexer
		302	tblexer = IPythonTracebackLexer
		303	else:
		304	pylexer = IPythonLexer
		305	tblexer = IPythonTracebackLexer
		306
		307	self.pylexer = pylexer(**options)
		308	self.tblexer = tblexer(**options)
		309
		310	self.reset()
		311
		312	def reset(self):
		313	self.mode = 'output'
		314	self.index = 0
		315	self.buffer = u''
		316	self.insertions = []
		317
		318	def buffered_tokens(self):
		319	"""
		320	Generator of unprocessed tokens after doing insertions and before
		321	changing to a new state.
		322
		323	"""
		324	if self.mode == 'output':
		325	tokens = [(0, Generic.Output, self.buffer)]
		326	elif self.mode == 'input':
		327	tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
		328	else: # traceback
		329	tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
		330
		331	for i, t, v in do_insertions(self.insertions, tokens):
		332	# All token indexes are relative to the buffer.
		333	yield self.index + i, t, v
		334
		335	# Clear it all
		336	self.index += len(self.buffer)
		337	self.buffer = u''
		338	self.insertions = []
		339
		340	def get_mci(self, line):
		341	"""
		342	Parses the line and returns a 3-tuple: (mode, code, insertion).
		343
		344	`mode` is the next mode (or state) of the lexer, and is always equal
		345	to 'input', 'output', or 'tb'.
		346
		347	`code` is a portion of the line that should be added to the buffer
		348	corresponding to the next mode and eventually lexed by another lexer.
		349	For example, `code` could be Python code if `mode` were 'input'.
		350
		351	`insertion` is a 3-tuple (index, token, text) representing an
		352	unprocessed "token" that will be inserted into the stream of tokens
		353	that are created from the buffer once we change modes. This is usually
		354	the input or output prompt.
		355
		356	In general, the next mode depends on current mode and on the contents
		357	of `line`.
		358
		359	"""
		360	# To reduce the number of regex match checks, we have multiple
		361	# 'if' blocks instead of 'if-elif' blocks.
		362
		363	# Check for possible end of input
		364	in2_match = self.in2_regex.match(line)
		365	in2_match_rstrip = self.in2_regex_rstrip.match(line)
		366	if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
		367	in2_match_rstrip:
		368	end_input = True
		369	else:
		370	end_input = False
		371	if end_input and self.mode != 'tb':
		372	# Only look for an end of input when not in tb mode.
		373	# An ellipsis could appear within the traceback.
		374	mode = 'output'
		375	code = u''
		376	insertion = (0, Generic.Prompt, line)
		377	return mode, code, insertion
		378
		379	# Check for output prompt
		380	out_match = self.out_regex.match(line)
		381	out_match_rstrip = self.out_regex_rstrip.match(line)
		382	if out_match or out_match_rstrip:
		383	mode = 'output'
		384	if out_match:
		385	idx = out_match.end()
		386	else:
		387	idx = out_match_rstrip.end()
		388	code = line[idx:]
		389	# Use the 'heading' token for output. We cannot use Generic.Error
		390	# since it would conflict with exceptions.
		391	insertion = (0, Generic.Heading, line[:idx])
		392	return mode, code, insertion
		393
		394
		395	# Check for input or continuation prompt (non stripped version)
		396	in1_match = self.in1_regex.match(line)
		397	if in1_match or (in2_match and self.mode != 'tb'):
		398	# New input or when not in tb, continued input.
		399	# We do not check for continued input when in tb since it is
		400	# allowable to replace a long stack with an ellipsis.
		401	mode = 'input'
		402	if in1_match:
		403	idx = in1_match.end()
		404	else: # in2_match
		405	idx = in2_match.end()
		406	code = line[idx:]
		407	insertion = (0, Generic.Prompt, line[:idx])
		408	return mode, code, insertion
		409
		410	# Check for input or continuation prompt (stripped version)
		411	in1_match_rstrip = self.in1_regex_rstrip.match(line)
		412	if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
		413	# New input or when not in tb, continued input.
		414	# We do not check for continued input when in tb since it is
		415	# allowable to replace a long stack with an ellipsis.
		416	mode = 'input'
		417	if in1_match_rstrip:
		418	idx = in1_match_rstrip.end()
		419	else: # in2_match
		420	idx = in2_match_rstrip.end()
		421	code = line[idx:]
		422	insertion = (0, Generic.Prompt, line[:idx])
		423	return mode, code, insertion
		424
		425	# Check for traceback
		426	if self.ipytb_start.match(line):
		427	mode = 'tb'
		428	code = line
		429	insertion = None
		430	return mode, code, insertion
		431
		432	# All other stuff...
		433	if self.mode in ('input', 'output'):
		434	# We assume all other text is output. Multiline input that
		435	# does not use the continuation marker cannot be detected.
		436	# For example, the 3 in the following is clearly output:
		437	#
		438	# In [1]: print 3
		439	# 3
		440	#
		441	# But the following second line is part of the input:
		442	#
		443	# In [2]: while True:
		444	# print True
		445	#
		446	# In both cases, the 2nd line will be 'output'.
		447	#
		448	mode = 'output'
		449	else:
		450	mode = 'tb'
		451
		452	code = line
		453	insertion = None
		454
		455	return mode, code, insertion
		456
		457	def get_tokens_unprocessed(self, text):
		458	self.reset()
		459	for match in line_re.finditer(text):
		460	line = match.group()
		461	mode, code, insertion = self.get_mci(line)
		462
		463	if mode != self.mode:
		464	# Yield buffered tokens before transitioning to new mode.
		465	for token in self.buffered_tokens():
		466	yield token
		467	self.mode = mode
		468
		469	if insertion:
		470	self.insertions.append((len(self.buffer), [insertion]))
		471	self.buffer += code
		472	else:
		473	for token in self.buffered_tokens():
		474	yield token
		475
		476	class IPyLexer(Lexer):
		477	"""
		478	Primary lexer for all IPython-like code.
		479
		480	This is a simple helper lexer. If the first line of the text begins with
		481	"In \[[0-9]+\]:", then the entire text is parsed with an IPython console
		482	lexer. If not, then the entire text is parsed with an IPython lexer.
		483
		484	The goal is to reduce the number of lexers that are registered
		485	with Pygments.
		486
		487	"""
		488	name = 'IPy session'
		489	aliases = ['ipy']
		490
		491	def __init__(self, **options):
		492	self.python3 = get_bool_opt(options, 'python3', False)
		493	if self.python3:
		494	self.aliases = ['ipy3']
		495	else:
		496	self.aliases = ['ipy2', 'ipy']
		497
		498	Lexer.__init__(self, **options)
		499
		500	self.IPythonLexer = IPythonLexer(**options)
		501	self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
		502
		503	def get_tokens_unprocessed(self, text):
		504	# Search for the input prompt anywhere...this allows code blocks to
		505	# begin with comments as well.
		506	if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
		507	lex = self.IPythonConsoleLexer
		508	else:
		509	lex = self.IPythonLexer
		510	for token in lex.get_tokens_unprocessed(text):
		511	yield token
		512

IPython/nbconvert/filters/highlight.py

0 +1 -1

                 from pygments import highlight
                 from pygments.lexers import get_lexer_by_name
                 from pygments.util import ClassNotFound
-                from IPython.nbconvert.utils.lexers import IPythonLexer, IPython3Lexer
+                from IPython.lib.lexers import IPythonLexer, IPython3Lexer
                 # If the cell uses a magic extension language,
                 # use the magic language instead.

IPython/nbconvert/utils/lexers.py

0 +2 -507

This diff has been collapsed as it changes many lines, (509 lines changed) Show them Hide them
	@@ -1,508 +1,3 b''
	1	# -- coding: utf-8 --	1	"""Deprecated; import from IPython.lib.lexers instead."""
	2	"""
	3	Defines a variety of Pygments lexers for highlighting IPython code.
	4
	5	This includes:
	6
	7	IPythonLexer, IPython3Lexer
	8	Lexers for pure IPython (python + magic/shell commands)
	9
	10	IPythonPartialTracebackLexer, IPythonTracebackLexer
	11	Supports 2.x and 3.x via keyword `python3`. The partial traceback
	12	lexer reads everything but the Python code appearing in a traceback.
	13	The full lexer combines the partial lexer with an IPython lexer.
	14
	15	IPythonConsoleLexer
	16	A lexer for IPython console sessions, with support for tracebacks.
	17
	18	IPyLexer
	19	A friendly lexer which examines the first line of text and from it,
	20	decides whether to use an IPython lexer or an IPython console lexer.
	21	This is probably the only lexer that needs to be explicitly added
	22	to Pygments.
	23
	24	"""
	25	#-----------------------------------------------------------------------------
	26	# Copyright (c) 2013, the IPython Development Team.
	27	#
	28	# Distributed under the terms of the Modified BSD License.
	29	#
	30	# The full license is in the file COPYING.txt, distributed with this software.
	31	#-----------------------------------------------------------------------------
	32
	33	# Standard library
	34	import re
	35
	36	# Third party
	37	from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
	38	from pygments.lexer import (
	39	Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
	40	)
	41	from pygments.token import (
	42	Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
	43	)
	44	from pygments.util import get_bool_opt
	45
	46	# Local
	47	from IPython.testing.skipdoctest import skip_doctest
	48
	49	line_re = re.compile('.*?\n')
	50
	51	ipython_tokens = [
	52	(r"(?s)(\s)(%%)(\w+)(.)", bygroups(Text, Operator, Keyword, Text)),
	53	(r'(?s)(^\s)(%%!)([^\n]\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
	54	(r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),
	55	(r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),
	56	(r'(%)(sx\|sc\|system)(.*)(\n)', bygroups(Operator, Keyword,
	57	using(BashLexer), Text)),
	58	(r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
	59	(r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
	60	(r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
	61	(r'^(\s)(\?\??)(\s%{0,2}[\w\.\])', bygroups(Text, Operator, Text)),
	62	]
	63
	64	def build_ipy_lexer(python3):
	65	"""Builds IPython lexers depending on the value of `python3`.
	66
	67	The lexer inherits from an appropriate Python lexer and then adds
	68	information about IPython specific keywords (i.e. magic commands,
	69	shell commands, etc.)
	70
	71	Parameters
	72	----------
	73	python3 : bool
	74	If `True`, then build an IPython lexer from a Python 3 lexer.
	75
	76	"""
	77	# It would be nice to have a single IPython lexer class which takes
	78	# a boolean `python3`. But since there are two Python lexer classes,
	79	# we will also have two IPython lexer classes.
	80	if python3:
	81	PyLexer = Python3Lexer
	82	clsname = 'IPython3Lexer'
	83	name = 'IPython3'
	84	aliases = ['ipython3']
	85	doc = """IPython3 Lexer"""
	86	else:
	87	PyLexer = PythonLexer
	88	clsname = 'IPythonLexer'
	89	name = 'IPython'
	90	aliases = ['ipython2', 'ipython']
	91	doc = """IPython Lexer"""
	92
	93	tokens = PyLexer.tokens.copy()
	94	tokens['root'] = ipython_tokens + tokens['root']
	95
	96	attrs = {'name': name, 'aliases': aliases, 'filenames': [],
	97	'__doc__': doc, 'tokens': tokens}
	98
	99	return type(name, (PyLexer,), attrs)
	100
	101
	102	IPython3Lexer = build_ipy_lexer(python3=True)
	103	IPythonLexer = build_ipy_lexer(python3=False)
	104
	105
	106	class IPythonPartialTracebackLexer(RegexLexer):
	107	"""
	108	Partial lexer for IPython tracebacks.
	109
	110	Handles all the non-python output. This works for both Python 2.x and 3.x.
	111
	112	"""
	113	name = 'IPython Partial Traceback'
	114
	115	tokens = {
	116	'root': [
	117	# Tracebacks for syntax errors have a different style.
	118	# For both types of tracebacks, we mark the first line with
	119	# Generic.Traceback. For syntax errors, we mark the filename
	120	# as we mark the filenames for non-syntax tracebacks.
	121	#
	122	# These two regexps define how IPythonConsoleLexer finds a
	123	# traceback.
	124	#
	125	## Non-syntax traceback
	126	(r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
	127	## Syntax traceback
	128	(r'^( File)(.*)(, line )(\d+\n)',
	129	bygroups(Generic.Traceback, Name.Namespace,
	130	Generic.Traceback, Literal.Number.Integer)),
	131
	132	# (Exception Identifier)(Whitespace)(Traceback Message)
	133	(r'(?u)(^[^\d\W]\w)(\s)(Traceback.*?\n)',
	134	bygroups(Name.Exception, Generic.Whitespace, Text)),
	135	# (Module/Filename)(Text)(Callee)(Function Signature)
	136	# Better options for callee and function signature?
	137	(r'(.)( in )(.)($.*$\n)',
	138	bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
	139	# Regular line: (Whitespace)(Line Number)(Python Code)
	140	(r'(\s?)(\d+)(.?\n)',
	141	bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
	142	# Emphasized line: (Arrow)(Line Number)(Python Code)
	143	# Using Exception token so arrow color matches the Exception.
	144	(r'(->?\s?)(\d+)(.?\n)',
	145	bygroups(Name.Exception, Literal.Number.Integer, Other)),
	146	# (Exception Identifier)(Message)
	147	(r'(?u)(^[^\d\W]\w)(:.?\n)',
	148	bygroups(Name.Exception, Text)),
	149	# Tag everything else as Other, will be handled later.
	150	(r'.*\n', Other),
	151	],
	152	}
	153
	154
	155	class IPythonTracebackLexer(DelegatingLexer):
	156	"""
	157	IPython traceback lexer.
	158
	159	For doctests, the tracebacks can be snipped as much as desired with the
	160	exception to the lines that designate a traceback. For non-syntax error
	161	tracebacks, this is the line of hyphens. For syntax error tracebacks,
	162	this is the line which lists the File and line number.
	163
	164	"""
	165	# The lexer inherits from DelegatingLexer. The "root" lexer is an
	166	# appropriate IPython lexer, which depends on the value of the boolean
	167	# `python3`. First, we parse with the partial IPython traceback lexer.
	168	# Then, any code marked with the "Other" token is delegated to the root
	169	# lexer.
	170	#
	171	name = 'IPython Traceback'
	172	aliases = ['ipythontb']
	173
	174	def __init__(self, **options):
	175	self.python3 = get_bool_opt(options, 'python3', False)
	176	if self.python3:
	177	self.aliases = ['ipython3tb']
	178	else:
	179	self.aliases = ['ipython2tb', 'ipythontb']
	180
	181	if self.python3:
	182	IPyLexer = IPython3Lexer
	183	else:
	184	IPyLexer = IPythonLexer
	185
	186	DelegatingLexer.__init__(self, IPyLexer,
	187	IPythonPartialTracebackLexer, **options)
	188
	189	@skip_doctest
	190	class IPythonConsoleLexer(Lexer):
	191	"""
	192	An IPython console lexer for IPython code-blocks and doctests, such as:
	193
	194	.. code-block:: rst
	195
	196	.. code-block:: ipythonconsole
	197
	198	In [1]: a = 'foo'
	199
	200	In [2]: a
	201	Out[2]: 'foo'
	202
	203	In [3]: print a
	204	foo
	205
	206	In [4]: 1 / 0
	207
	208
	209	Support is also provided for IPython exceptions:
	210
	211	.. code-block:: rst
	212
	213	.. code-block:: ipythonconsole
	214
	215	In [1]: raise Exception
	216
	217	---------------------------------------------------------------------------
	218	Exception Traceback (most recent call last)
	219	<ipython-input-1-fca2ab0ca76b> in <module>()
	220	----> 1 raise Exception
	221
	222	Exception:
	223
	224	"""
	225	name = 'IPython console session'
	226	aliases = ['ipythonconsole']
	227	mimetypes = ['text/x-ipython-console']
	228
	229	# The regexps used to determine what is input and what is output.
	230	# The default prompts for IPython are:
	231	#
	232	# c.PromptManager.in_template = 'In [\#]: '
	233	# c.PromptManager.in2_template = ' .\D.: '
	234	# c.PromptManager.out_template = 'Out[\#]: '
	235	#
	236	in1_regex = r'In \[[0-9]+\]: '
	237	in2_regex = r' \.\.+\.: '
	238	out_regex = r'Out\[[0-9]+\]: '
	239
	240	#: The regex to determine when a traceback starts.
	241	ipytb_start = re.compile(r'^(\^C)?(-+\n)\|^( File)(.*)(, line )(\d+\n)')
	242
	243	def __init__(self, **options):
	244	"""Initialize the IPython console lexer.
	245
	246	Parameters
	247	----------
	248	python3 : bool
	249	If `True`, then the console inputs are parsed using a Python 3
	250	lexer. Otherwise, they are parsed using a Python 2 lexer.
	251	in1_regex : RegexObject
	252	The compiled regular expression used to detect the start
	253	of inputs. Although the IPython configuration setting may have a
	254	trailing whitespace, do not include it in the regex. If `None`,
	255	then the default input prompt is assumed.
	256	in2_regex : RegexObject
	257	The compiled regular expression used to detect the continuation
	258	of inputs. Although the IPython configuration setting may have a
	259	trailing whitespace, do not include it in the regex. If `None`,
	260	then the default input prompt is assumed.
	261	out_regex : RegexObject
	262	The compiled regular expression used to detect outputs. If `None`,
	263	then the default output prompt is assumed.
	264
	265	"""
	266	self.python3 = get_bool_opt(options, 'python3', False)
	267	if self.python3:
	268	self.aliases = ['ipython3console']
	269	else:
	270	self.aliases = ['ipython2console', 'ipythonconsole']
	271
	272	in1_regex = options.get('in1_regex', self.in1_regex)
	273	in2_regex = options.get('in2_regex', self.in2_regex)
	274	out_regex = options.get('out_regex', self.out_regex)
	275
	276	# So that we can work with input and output prompts which have been
	277	# rstrip'd (possibly by editors) we also need rstrip'd variants. If
	278	# we do not do this, then such prompts will be tagged as 'output'.
	279	# The reason can't just use the rstrip'd variants instead is because
	280	# we want any whitespace associated with the prompt to be inserted
	281	# with the token. This allows formatted code to be modified so as hide
	282	# the appearance of prompts, with the whitespace included. One example
	283	# use of this is in copybutton.js from the standard lib Python docs.
	284	in1_regex_rstrip = in1_regex.rstrip() + '\n'
	285	in2_regex_rstrip = in2_regex.rstrip() + '\n'
	286	out_regex_rstrip = out_regex.rstrip() + '\n'
	287
	288	# Compile and save them all.
	289	attrs = ['in1_regex', 'in2_regex', 'out_regex',
	290	'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
	291	for attr in attrs:
	292	self.__setattr__(attr, re.compile(locals()[attr]))
	293
	294	Lexer.__init__(self, **options)
	295
	296	if self.python3:
	297	pylexer = IPython3Lexer
	298	tblexer = IPythonTracebackLexer
	299	else:
	300	pylexer = IPythonLexer
	301	tblexer = IPythonTracebackLexer
	302
	303	self.pylexer = pylexer(**options)
	304	self.tblexer = tblexer(**options)
	305
	306	self.reset()
	307
	308	def reset(self):
	309	self.mode = 'output'
	310	self.index = 0
	311	self.buffer = u''
	312	self.insertions = []
	313
	314	def buffered_tokens(self):
	315	"""
	316	Generator of unprocessed tokens after doing insertions and before
	317	changing to a new state.
	318
	319	"""
	320	if self.mode == 'output':
	321	tokens = [(0, Generic.Output, self.buffer)]
	322	elif self.mode == 'input':
	323	tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
	324	else: # traceback
	325	tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
	326
	327	for i, t, v in do_insertions(self.insertions, tokens):
	328	# All token indexes are relative to the buffer.
	329	yield self.index + i, t, v
	330
	331	# Clear it all
	332	self.index += len(self.buffer)
	333	self.buffer = u''
	334	self.insertions = []
	335
	336	def get_mci(self, line):
	337	"""
	338	Parses the line and returns a 3-tuple: (mode, code, insertion).
	339
	340	`mode` is the next mode (or state) of the lexer, and is always equal
	341	to 'input', 'output', or 'tb'.
	342
	343	`code` is a portion of the line that should be added to the buffer
	344	corresponding to the next mode and eventually lexed by another lexer.
	345	For example, `code` could be Python code if `mode` were 'input'.
	346
	347	`insertion` is a 3-tuple (index, token, text) representing an
	348	unprocessed "token" that will be inserted into the stream of tokens
	349	that are created from the buffer once we change modes. This is usually
	350	the input or output prompt.
	351
	352	In general, the next mode depends on current mode and on the contents
	353	of `line`.
	354
	355	"""
	356	# To reduce the number of regex match checks, we have multiple
	357	# 'if' blocks instead of 'if-elif' blocks.
	358
	359	# Check for possible end of input
	360	in2_match = self.in2_regex.match(line)
	361	in2_match_rstrip = self.in2_regex_rstrip.match(line)
	362	if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
	363	in2_match_rstrip:
	364	end_input = True
	365	else:
	366	end_input = False
	367	if end_input and self.mode != 'tb':
	368	# Only look for an end of input when not in tb mode.
	369	# An ellipsis could appear within the traceback.
	370	mode = 'output'
	371	code = u''
	372	insertion = (0, Generic.Prompt, line)
	373	return mode, code, insertion
	374
	375	# Check for output prompt
	376	out_match = self.out_regex.match(line)
	377	out_match_rstrip = self.out_regex_rstrip.match(line)
	378	if out_match or out_match_rstrip:
	379	mode = 'output'
	380	if out_match:
	381	idx = out_match.end()
	382	else:
	383	idx = out_match_rstrip.end()
	384	code = line[idx:]
	385	# Use the 'heading' token for output. We cannot use Generic.Error
	386	# since it would conflict with exceptions.
	387	insertion = (0, Generic.Heading, line[:idx])
	388	return mode, code, insertion
	389
	390
	391	# Check for input or continuation prompt (non stripped version)
	392	in1_match = self.in1_regex.match(line)
	393	if in1_match or (in2_match and self.mode != 'tb'):
	394	# New input or when not in tb, continued input.
	395	# We do not check for continued input when in tb since it is
	396	# allowable to replace a long stack with an ellipsis.
	397	mode = 'input'
	398	if in1_match:
	399	idx = in1_match.end()
	400	else: # in2_match
	401	idx = in2_match.end()
	402	code = line[idx:]
	403	insertion = (0, Generic.Prompt, line[:idx])
	404	return mode, code, insertion
	405
	406	# Check for input or continuation prompt (stripped version)
	407	in1_match_rstrip = self.in1_regex_rstrip.match(line)
	408	if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
	409	# New input or when not in tb, continued input.
	410	# We do not check for continued input when in tb since it is
	411	# allowable to replace a long stack with an ellipsis.
	412	mode = 'input'
	413	if in1_match_rstrip:
	414	idx = in1_match_rstrip.end()
	415	else: # in2_match
	416	idx = in2_match_rstrip.end()
	417	code = line[idx:]
	418	insertion = (0, Generic.Prompt, line[:idx])
	419	return mode, code, insertion
	420
	421	# Check for traceback
	422	if self.ipytb_start.match(line):
	423	mode = 'tb'
	424	code = line
	425	insertion = None
	426	return mode, code, insertion
	427
	428	# All other stuff...
	429	if self.mode in ('input', 'output'):
	430	# We assume all other text is output. Multiline input that
	431	# does not use the continuation marker cannot be detected.
	432	# For example, the 3 in the following is clearly output:
	433	#
	434	# In [1]: print 3
	435	# 3
	436	#
	437	# But the following second line is part of the input:
	438	#
	439	# In [2]: while True:
	440	# print True
	441	#
	442	# In both cases, the 2nd line will be 'output'.
	443	#
	444	mode = 'output'
	445	else:
	446	mode = 'tb'
	447
	448	code = line
	449	insertion = None
	450
	451	return mode, code, insertion
	452
	453	def get_tokens_unprocessed(self, text):
	454	self.reset()
	455	for match in line_re.finditer(text):
	456	line = match.group()
	457	mode, code, insertion = self.get_mci(line)
	458
	459	if mode != self.mode:
	460	# Yield buffered tokens before transitioning to new mode.
	461	for token in self.buffered_tokens():
	462	yield token
	463	self.mode = mode
	464
	465	if insertion:
	466	self.insertions.append((len(self.buffer), [insertion]))
	467	self.buffer += code
	468	else:
	469	for token in self.buffered_tokens():
	470	yield token
	471
	472	class IPyLexer(Lexer):
	473	"""
	474	Primary lexer for all IPython-like code.
	475
	476	This is a simple helper lexer. If the first line of the text begins with
	477	"In \[[0-9]+\]:", then the entire text is parsed with an IPython console
	478	lexer. If not, then the entire text is parsed with an IPython lexer.
	479
	480	The goal is to reduce the number of lexers that are registered
	481	with Pygments.
	482
	483	"""
	484	name = 'IPy session'
	485	aliases = ['ipy']
	486
	487	def __init__(self, **options):
	488	self.python3 = get_bool_opt(options, 'python3', False)
	489	if self.python3:
	490	self.aliases = ['ipy3']
	491	else:
	492	self.aliases = ['ipy2', 'ipy']
	493
	494	Lexer.__init__(self, **options)
	495
	496	self.IPythonLexer = IPythonLexer(**options)
	497	self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
	498
	499	def get_tokens_unprocessed(self, text):
	500	# Search for the input prompt anywhere...this allows code blocks to
	501	# begin with comments as well.
	502	if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
	503	lex = self.IPythonConsoleLexer
	504	else:
	505	lex = self.IPythonLexer
	506	for token in lex.get_tokens_unprocessed(text):
	507	yield token
	508		2
			3	from IPython.lib.lexers import *

IPython/sphinxext/ipython_console_highlighting.py

0 +1 -1

             """
             from sphinx import highlighting
-            from ..nbconvert.utils.lexers import IPyLexer
+            from IPython.lib.lexers import IPyLexer
             def setup(app):
                 """Setup as a sphinx extension."""

docs/autogen_api.py

0 +2 0

                                                     r'\.frontend$',
                                                     # Deprecated:
                                                     r'\.core\.magics\.deprecated',
+                                                    # Backwards compat import for lib.lexers
+                                                    r'\.nbconvert\.utils\.lexers',
                                                     # We document this manually.
                                                     r'\.utils\.py3compat',
                                                     # These are exposed by nbformat

docs/source/development/lexer.rst

0 +1 -1

             The IPython console lexer has been rewritten and now supports tracebacks
             and customized input/output prompts. An entire suite of lexers is now
-            available at :mod:`IPython.nbconvert.utils.lexers`. These include:
+            available at :mod:`IPython.lib.lexers`. These include:
             IPythonLexer & IPython3Lexer
               Lexers for pure IPython (python + magic/shell commands)

setup.py

0 +3 -3

                 setuptools_extra_args['entry_points'] = {
                     'console_scripts': find_entry_points(),
                     'pygments.lexers': [
-                        'ipythonconsole = IPython.nbconvert.utils.lexers:IPythonConsoleLexer',
+                        'ipythonconsole = IPython.lib.lexers:IPythonConsoleLexer',
-                        'ipython = IPython.nbconvert.utils.lexers:IPythonLexer',
+                        'ipython = IPython.lib.lexers:IPythonLexer',
-                        'ipython3 = IPython.nbconvert.utils.lexers:IPython3Lexer',
+                        'ipython3 = IPython.lib.lexers:IPython3Lexer',
                     ],
                 }
                 setup_args['extras_require'] = extras_require

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages