upstream/ipython Commit - r20625:857d102d

Move IPython lexers module to lib...

Thomas Kluyver -

r20625:857d102d

parent child

IPython/lib/lexers.py

0 created 644 +512 0

This diff has been collapsed as it changes many lines, (512 lines changed) Show them Hide them
		@@ -0,0 +1,512 b''
	1	# -- coding: utf-8 --
	2	"""
	3	Defines a variety of Pygments lexers for highlighting IPython code.
	4
	5	This includes:
	6
	7	IPythonLexer, IPython3Lexer
	8	Lexers for pure IPython (python + magic/shell commands)
	9
	10	IPythonPartialTracebackLexer, IPythonTracebackLexer
	11	Supports 2.x and 3.x via keyword `python3`. The partial traceback
	12	lexer reads everything but the Python code appearing in a traceback.
	13	The full lexer combines the partial lexer with an IPython lexer.
	14
	15	IPythonConsoleLexer
	16	A lexer for IPython console sessions, with support for tracebacks.
	17
	18	IPyLexer
	19	A friendly lexer which examines the first line of text and from it,
	20	decides whether to use an IPython lexer or an IPython console lexer.
	21	This is probably the only lexer that needs to be explicitly added
	22	to Pygments.
	23
	24	"""
	25	#-----------------------------------------------------------------------------
	26	# Copyright (c) 2013, the IPython Development Team.
	27	#
	28	# Distributed under the terms of the Modified BSD License.
	29	#
	30	# The full license is in the file COPYING.txt, distributed with this software.
	31	#-----------------------------------------------------------------------------
	32
	33	# Standard library
	34	import re
	35
	36	# Third party
	37	from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
	38	from pygments.lexer import (
	39	Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
	40	)
	41	from pygments.token import (
	42	Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
	43	)
	44	from pygments.util import get_bool_opt
	45
	46	# Local
	47	from IPython.testing.skipdoctest import skip_doctest
	48
	49	line_re = re.compile('.*?\n')
	50
	51	__all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',
	52	'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',
	53	'IPythonConsoleLexer', 'IPyLexer']
	54
	55	ipython_tokens = [
	56	(r"(?s)(\s)(%%)(\w+)(.)", bygroups(Text, Operator, Keyword, Text)),
	57	(r'(?s)(^\s)(%%!)([^\n]\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
	58	(r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),
	59	(r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),
	60	(r'(%)(sx\|sc\|system)(.*)(\n)', bygroups(Operator, Keyword,
	61	using(BashLexer), Text)),
	62	(r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
	63	(r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
	64	(r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
	65	(r'^(\s)(\?\??)(\s%{0,2}[\w\.\])', bygroups(Text, Operator, Text)),
	66	]
	67
	68	def build_ipy_lexer(python3):
	69	"""Builds IPython lexers depending on the value of `python3`.
	70
	71	The lexer inherits from an appropriate Python lexer and then adds
	72	information about IPython specific keywords (i.e. magic commands,
	73	shell commands, etc.)
	74
	75	Parameters
	76	----------
	77	python3 : bool
	78	If `True`, then build an IPython lexer from a Python 3 lexer.
	79
	80	"""
	81	# It would be nice to have a single IPython lexer class which takes
	82	# a boolean `python3`. But since there are two Python lexer classes,
	83	# we will also have two IPython lexer classes.
	84	if python3:
	85	PyLexer = Python3Lexer
	86	clsname = 'IPython3Lexer'
	87	name = 'IPython3'
	88	aliases = ['ipython3']
	89	doc = """IPython3 Lexer"""
	90	else:
	91	PyLexer = PythonLexer
	92	clsname = 'IPythonLexer'
	93	name = 'IPython'
	94	aliases = ['ipython2', 'ipython']
	95	doc = """IPython Lexer"""
	96
	97	tokens = PyLexer.tokens.copy()
	98	tokens['root'] = ipython_tokens + tokens['root']
	99
	100	attrs = {'name': name, 'aliases': aliases, 'filenames': [],
	101	'__doc__': doc, 'tokens': tokens}
	102
	103	return type(name, (PyLexer,), attrs)
	104
	105
	106	IPython3Lexer = build_ipy_lexer(python3=True)
	107	IPythonLexer = build_ipy_lexer(python3=False)
	108
	109
	110	class IPythonPartialTracebackLexer(RegexLexer):
	111	"""
	112	Partial lexer for IPython tracebacks.
	113
	114	Handles all the non-python output. This works for both Python 2.x and 3.x.
	115
	116	"""
	117	name = 'IPython Partial Traceback'
	118
	119	tokens = {
	120	'root': [
	121	# Tracebacks for syntax errors have a different style.
	122	# For both types of tracebacks, we mark the first line with
	123	# Generic.Traceback. For syntax errors, we mark the filename
	124	# as we mark the filenames for non-syntax tracebacks.
	125	#
	126	# These two regexps define how IPythonConsoleLexer finds a
	127	# traceback.
	128	#
	129	## Non-syntax traceback
	130	(r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
	131	## Syntax traceback
	132	(r'^( File)(.*)(, line )(\d+\n)',
	133	bygroups(Generic.Traceback, Name.Namespace,
	134	Generic.Traceback, Literal.Number.Integer)),
	135
	136	# (Exception Identifier)(Whitespace)(Traceback Message)
	137	(r'(?u)(^[^\d\W]\w)(\s)(Traceback.*?\n)',
	138	bygroups(Name.Exception, Generic.Whitespace, Text)),
	139	# (Module/Filename)(Text)(Callee)(Function Signature)
	140	# Better options for callee and function signature?
	141	(r'(.)( in )(.)($.*$\n)',
	142	bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
	143	# Regular line: (Whitespace)(Line Number)(Python Code)
	144	(r'(\s?)(\d+)(.?\n)',
	145	bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
	146	# Emphasized line: (Arrow)(Line Number)(Python Code)
	147	# Using Exception token so arrow color matches the Exception.
	148	(r'(->?\s?)(\d+)(.?\n)',
	149	bygroups(Name.Exception, Literal.Number.Integer, Other)),
	150	# (Exception Identifier)(Message)
	151	(r'(?u)(^[^\d\W]\w)(:.?\n)',
	152	bygroups(Name.Exception, Text)),
	153	# Tag everything else as Other, will be handled later.
	154	(r'.*\n', Other),
	155	],
	156	}
	157
	158
	159	class IPythonTracebackLexer(DelegatingLexer):
	160	"""
	161	IPython traceback lexer.
	162
	163	For doctests, the tracebacks can be snipped as much as desired with the
	164	exception to the lines that designate a traceback. For non-syntax error
	165	tracebacks, this is the line of hyphens. For syntax error tracebacks,
	166	this is the line which lists the File and line number.
	167
	168	"""
	169	# The lexer inherits from DelegatingLexer. The "root" lexer is an
	170	# appropriate IPython lexer, which depends on the value of the boolean
	171	# `python3`. First, we parse with the partial IPython traceback lexer.
	172	# Then, any code marked with the "Other" token is delegated to the root
	173	# lexer.
	174	#
	175	name = 'IPython Traceback'
	176	aliases = ['ipythontb']
	177
	178	def __init__(self, **options):
	179	self.python3 = get_bool_opt(options, 'python3', False)
	180	if self.python3:
	181	self.aliases = ['ipython3tb']
	182	else:
	183	self.aliases = ['ipython2tb', 'ipythontb']
	184
	185	if self.python3:
	186	IPyLexer = IPython3Lexer
	187	else:
	188	IPyLexer = IPythonLexer
	189
	190	DelegatingLexer.__init__(self, IPyLexer,
	191	IPythonPartialTracebackLexer, **options)
	192
	193	@skip_doctest
	194	class IPythonConsoleLexer(Lexer):
	195	"""
	196	An IPython console lexer for IPython code-blocks and doctests, such as:
	197
	198	.. code-block:: rst
	199
	200	.. code-block:: ipythonconsole
	201
	202	In [1]: a = 'foo'
	203
	204	In [2]: a
	205	Out[2]: 'foo'
	206
	207	In [3]: print a
	208	foo
	209
	210	In [4]: 1 / 0
	211
	212
	213	Support is also provided for IPython exceptions:
	214
	215	.. code-block:: rst
	216
	217	.. code-block:: ipythonconsole
	218
	219	In [1]: raise Exception
	220
	221	---------------------------------------------------------------------------
	222	Exception Traceback (most recent call last)
	223	<ipython-input-1-fca2ab0ca76b> in <module>()
	224	----> 1 raise Exception
	225
	226	Exception:
	227
	228	"""
	229	name = 'IPython console session'
	230	aliases = ['ipythonconsole']
	231	mimetypes = ['text/x-ipython-console']
	232
	233	# The regexps used to determine what is input and what is output.
	234	# The default prompts for IPython are:
	235	#
	236	# c.PromptManager.in_template = 'In [\#]: '
	237	# c.PromptManager.in2_template = ' .\D.: '
	238	# c.PromptManager.out_template = 'Out[\#]: '
	239	#
	240	in1_regex = r'In \[[0-9]+\]: '
	241	in2_regex = r' \.\.+\.: '
	242	out_regex = r'Out\[[0-9]+\]: '
	243
	244	#: The regex to determine when a traceback starts.
	245	ipytb_start = re.compile(r'^(\^C)?(-+\n)\|^( File)(.*)(, line )(\d+\n)')
	246
	247	def __init__(self, **options):
	248	"""Initialize the IPython console lexer.
	249
	250	Parameters
	251	----------
	252	python3 : bool
	253	If `True`, then the console inputs are parsed using a Python 3
	254	lexer. Otherwise, they are parsed using a Python 2 lexer.
	255	in1_regex : RegexObject
	256	The compiled regular expression used to detect the start
	257	of inputs. Although the IPython configuration setting may have a
	258	trailing whitespace, do not include it in the regex. If `None`,
	259	then the default input prompt is assumed.
	260	in2_regex : RegexObject
	261	The compiled regular expression used to detect the continuation
	262	of inputs. Although the IPython configuration setting may have a
	263	trailing whitespace, do not include it in the regex. If `None`,
	264	then the default input prompt is assumed.
	265	out_regex : RegexObject
	266	The compiled regular expression used to detect outputs. If `None`,
	267	then the default output prompt is assumed.
	268
	269	"""
	270	self.python3 = get_bool_opt(options, 'python3', False)
	271	if self.python3:
	272	self.aliases = ['ipython3console']
	273	else:
	274	self.aliases = ['ipython2console', 'ipythonconsole']
	275
	276	in1_regex = options.get('in1_regex', self.in1_regex)
	277	in2_regex = options.get('in2_regex', self.in2_regex)
	278	out_regex = options.get('out_regex', self.out_regex)
	279
	280	# So that we can work with input and output prompts which have been
	281	# rstrip'd (possibly by editors) we also need rstrip'd variants. If
	282	# we do not do this, then such prompts will be tagged as 'output'.
	283	# The reason can't just use the rstrip'd variants instead is because
	284	# we want any whitespace associated with the prompt to be inserted
	285	# with the token. This allows formatted code to be modified so as hide
	286	# the appearance of prompts, with the whitespace included. One example
	287	# use of this is in copybutton.js from the standard lib Python docs.
	288	in1_regex_rstrip = in1_regex.rstrip() + '\n'
	289	in2_regex_rstrip = in2_regex.rstrip() + '\n'
	290	out_regex_rstrip = out_regex.rstrip() + '\n'
	291
	292	# Compile and save them all.
	293	attrs = ['in1_regex', 'in2_regex', 'out_regex',
	294	'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
	295	for attr in attrs:
	296	self.__setattr__(attr, re.compile(locals()[attr]))
	297
	298	Lexer.__init__(self, **options)
	299
	300	if self.python3:
	301	pylexer = IPython3Lexer
	302	tblexer = IPythonTracebackLexer
	303	else:
	304	pylexer = IPythonLexer
	305	tblexer = IPythonTracebackLexer
	306
	307	self.pylexer = pylexer(**options)
	308	self.tblexer = tblexer(**options)
	309
	310	self.reset()
	311
	312	def reset(self):
	313	self.mode = 'output'
	314	self.index = 0
	315	self.buffer = u''
	316	self.insertions = []
	317
	318	def buffered_tokens(self):
	319	"""
	320	Generator of unprocessed tokens after doing insertions and before
	321	changing to a new state.
	322
	323	"""
	324	if self.mode == 'output':
	325	tokens = [(0, Generic.Output, self.buffer)]
	326	elif self.mode == 'input':
	327	tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
	328	else: # traceback
	329	tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
	330
	331	for i, t, v in do_insertions(self.insertions, tokens):
	332	# All token indexes are relative to the buffer.
	333	yield self.index + i, t, v
	334
	335	# Clear it all
	336	self.index += len(self.buffer)
	337	self.buffer = u''
	338	self.insertions = []
	339
	340	def get_mci(self, line):
	341	"""
	342	Parses the line and returns a 3-tuple: (mode, code, insertion).
	343
	344	`mode` is the next mode (or state) of the lexer, and is always equal
	345	to 'input', 'output', or 'tb'.
	346
	347	`code` is a portion of the line that should be added to the buffer
	348	corresponding to the next mode and eventually lexed by another lexer.
	349	For example, `code` could be Python code if `mode` were 'input'.
	350
	351	`insertion` is a 3-tuple (index, token, text) representing an
	352	unprocessed "token" that will be inserted into the stream of tokens
	353	that are created from the buffer once we change modes. This is usually
	354	the input or output prompt.
	355
	356	In general, the next mode depends on current mode and on the contents
	357	of `line`.
	358
	359	"""
	360	# To reduce the number of regex match checks, we have multiple
	361	# 'if' blocks instead of 'if-elif' blocks.
	362
	363	# Check for possible end of input
	364	in2_match = self.in2_regex.match(line)
	365	in2_match_rstrip = self.in2_regex_rstrip.match(line)
	366	if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
	367	in2_match_rstrip:
	368	end_input = True
	369	else:
	370	end_input = False
	371	if end_input and self.mode != 'tb':
	372	# Only look for an end of input when not in tb mode.
	373	# An ellipsis could appear within the traceback.
	374	mode = 'output'
	375	code = u''
	376	insertion = (0, Generic.Prompt, line)
	377	return mode, code, insertion
	378
	379	# Check for output prompt
	380	out_match = self.out_regex.match(line)
	381	out_match_rstrip = self.out_regex_rstrip.match(line)
	382	if out_match or out_match_rstrip:
	383	mode = 'output'
	384	if out_match:
	385	idx = out_match.end()
	386	else:
	387	idx = out_match_rstrip.end()
	388	code = line[idx:]
	389	# Use the 'heading' token for output. We cannot use Generic.Error
	390	# since it would conflict with exceptions.
	391	insertion = (0, Generic.Heading, line[:idx])
	392	return mode, code, insertion
	393
	394
	395	# Check for input or continuation prompt (non stripped version)
	396	in1_match = self.in1_regex.match(line)
	397	if in1_match or (in2_match and self.mode != 'tb'):
	398	# New input or when not in tb, continued input.
	399	# We do not check for continued input when in tb since it is
	400	# allowable to replace a long stack with an ellipsis.
	401	mode = 'input'
	402	if in1_match:
	403	idx = in1_match.end()
	404	else: # in2_match
	405	idx = in2_match.end()
	406	code = line[idx:]
	407	insertion = (0, Generic.Prompt, line[:idx])
	408	return mode, code, insertion
	409
	410	# Check for input or continuation prompt (stripped version)
	411	in1_match_rstrip = self.in1_regex_rstrip.match(line)
	412	if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
	413	# New input or when not in tb, continued input.
	414	# We do not check for continued input when in tb since it is
	415	# allowable to replace a long stack with an ellipsis.
	416	mode = 'input'
	417	if in1_match_rstrip:
	418	idx = in1_match_rstrip.end()
	419	else: # in2_match
	420	idx = in2_match_rstrip.end()
	421	code = line[idx:]
	422	insertion = (0, Generic.Prompt, line[:idx])
	423	return mode, code, insertion
	424
	425	# Check for traceback
	426	if self.ipytb_start.match(line):
	427	mode = 'tb'
	428	code = line
	429	insertion = None
	430	return mode, code, insertion
	431
	432	# All other stuff...
	433	if self.mode in ('input', 'output'):
	434	# We assume all other text is output. Multiline input that
	435	# does not use the continuation marker cannot be detected.
	436	# For example, the 3 in the following is clearly output:
	437	#
	438	# In [1]: print 3
	439	# 3
	440	#
	441	# But the following second line is part of the input:
	442	#
	443	# In [2]: while True:
	444	# print True
	445	#
	446	# In both cases, the 2nd line will be 'output'.
	447	#
	448	mode = 'output'
	449	else:
	450	mode = 'tb'
	451
	452	code = line
	453	insertion = None
	454
	455	return mode, code, insertion
	456
	457	def get_tokens_unprocessed(self, text):
	458	self.reset()
	459	for match in line_re.finditer(text):
	460	line = match.group()
	461	mode, code, insertion = self.get_mci(line)
	462
	463	if mode != self.mode:
	464	# Yield buffered tokens before transitioning to new mode.
	465	for token in self.buffered_tokens():
	466	yield token
	467	self.mode = mode
	468
	469	if insertion:
	470	self.insertions.append((len(self.buffer), [insertion]))
	471	self.buffer += code
	472	else:
	473	for token in self.buffered_tokens():
	474	yield token
	475
	476	class IPyLexer(Lexer):
	477	"""
	478	Primary lexer for all IPython-like code.
	479
	480	This is a simple helper lexer. If the first line of the text begins with
	481	"In \[[0-9]+\]:", then the entire text is parsed with an IPython console
	482	lexer. If not, then the entire text is parsed with an IPython lexer.
	483
	484	The goal is to reduce the number of lexers that are registered
	485	with Pygments.
	486
	487	"""
	488	name = 'IPy session'
	489	aliases = ['ipy']
	490
	491	def __init__(self, **options):
	492	self.python3 = get_bool_opt(options, 'python3', False)
	493	if self.python3:
	494	self.aliases = ['ipy3']
	495	else:
	496	self.aliases = ['ipy2', 'ipy']
	497
	498	Lexer.__init__(self, **options)
	499
	500	self.IPythonLexer = IPythonLexer(**options)
	501	self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
	502
	503	def get_tokens_unprocessed(self, text):
	504	# Search for the input prompt anywhere...this allows code blocks to
	505	# begin with comments as well.
	506	if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
	507	lex = self.IPythonConsoleLexer
	508	else:
	509	lex = self.IPythonLexer
	510	for token in lex.get_tokens_unprocessed(text):
	511	yield token
	512

IPython/nbconvert/filters/highlight.py

0 +1 -1

                  from pygments import highlight
                  from pygments.lexers import get_lexer_by_name
                  from pygments.util import ClassNotFound
-                 from IPython.nbconvert.utils.lexers import IPythonLexer, IPython3Lexer
+                 from IPython.lib.lexers import IPythonLexer, IPython3Lexer
                  # If the cell uses a magic extension language,
                  # use the magic language instead.

IPython/nbconvert/utils/lexers.py

0 +2 -507

This diff has been collapsed as it changes many lines, (509 lines changed) Show them Hide them
			@@ -1,508 +1,3 b''
	1		# -- coding: utf-8 --
	2		"""
	3		Defines a variety of Pygments lexers for highlighting IPython code.
	4
	5		This includes:
	6
	7		IPythonLexer, IPython3Lexer
	8		Lexers for pure IPython (python + magic/shell commands)
	9
	10		IPythonPartialTracebackLexer, IPythonTracebackLexer
	11		Supports 2.x and 3.x via keyword `python3`. The partial traceback
	12		lexer reads everything but the Python code appearing in a traceback.
	13		The full lexer combines the partial lexer with an IPython lexer.
	14
	15		IPythonConsoleLexer
	16		A lexer for IPython console sessions, with support for tracebacks.
	17
	18		IPyLexer
	19		A friendly lexer which examines the first line of text and from it,
	20		decides whether to use an IPython lexer or an IPython console lexer.
	21		This is probably the only lexer that needs to be explicitly added
	22		to Pygments.
	23
	24		"""
	25		#-----------------------------------------------------------------------------
	26		# Copyright (c) 2013, the IPython Development Team.
	27		#
	28		# Distributed under the terms of the Modified BSD License.
	29		#
	30		# The full license is in the file COPYING.txt, distributed with this software.
	31		#-----------------------------------------------------------------------------
	32
	33		# Standard library
	34		import re
	35
	36		# Third party
	37		from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
	38		from pygments.lexer import (
	39		Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
	40		)
	41		from pygments.token import (
	42		Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
	43		)
	44		from pygments.util import get_bool_opt
	45
	46		# Local
	47		from IPython.testing.skipdoctest import skip_doctest
	48
	49		line_re = re.compile('.*?\n')
	50
	51		ipython_tokens = [
	52		(r"(?s)(\s)(%%)(\w+)(.)", bygroups(Text, Operator, Keyword, Text)),
	53		(r'(?s)(^\s)(%%!)([^\n]\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
	54		(r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),
	55		(r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),
	56		(r'(%)(sx\|sc\|system)(.*)(\n)', bygroups(Operator, Keyword,
	57		using(BashLexer), Text)),
	58		(r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
	59		(r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
	60		(r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
	61		(r'^(\s)(\?\??)(\s%{0,2}[\w\.\])', bygroups(Text, Operator, Text)),
	62		]
	63
	64		def build_ipy_lexer(python3):
	65		"""Builds IPython lexers depending on the value of `python3`.
	66
	67		The lexer inherits from an appropriate Python lexer and then adds
	68		information about IPython specific keywords (i.e. magic commands,
	69		shell commands, etc.)
	70
	71		Parameters
	72		----------
	73		python3 : bool
	74		If `True`, then build an IPython lexer from a Python 3 lexer.
	75
	76		"""
	77		# It would be nice to have a single IPython lexer class which takes
	78		# a boolean `python3`. But since there are two Python lexer classes,
	79		# we will also have two IPython lexer classes.
	80		if python3:
	81		PyLexer = Python3Lexer
	82		clsname = 'IPython3Lexer'
	83		name = 'IPython3'
	84		aliases = ['ipython3']
	85		doc = """IPython3 Lexer"""
	86		else:
	87		PyLexer = PythonLexer
	88		clsname = 'IPythonLexer'
	89		name = 'IPython'
	90		aliases = ['ipython2', 'ipython']
	91		doc = """IPython Lexer"""
	92
	93		tokens = PyLexer.tokens.copy()
	94		tokens['root'] = ipython_tokens + tokens['root']
	95
	96		attrs = {'name': name, 'aliases': aliases, 'filenames': [],
	97		'__doc__': doc, 'tokens': tokens}
	98
	99		return type(name, (PyLexer,), attrs)
	100
	101
	102		IPython3Lexer = build_ipy_lexer(python3=True)
	103		IPythonLexer = build_ipy_lexer(python3=False)
	104
	105
	106		class IPythonPartialTracebackLexer(RegexLexer):
	107		"""
	108		Partial lexer for IPython tracebacks.
	109
	110		Handles all the non-python output. This works for both Python 2.x and 3.x.
	111
	112		"""
	113		name = 'IPython Partial Traceback'
	114
	115		tokens = {
	116		'root': [
	117		# Tracebacks for syntax errors have a different style.
	118		# For both types of tracebacks, we mark the first line with
	119		# Generic.Traceback. For syntax errors, we mark the filename
	120		# as we mark the filenames for non-syntax tracebacks.
	121		#
	122		# These two regexps define how IPythonConsoleLexer finds a
	123		# traceback.
	124		#
	125		## Non-syntax traceback
	126		(r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
	127		## Syntax traceback
	128		(r'^( File)(.*)(, line )(\d+\n)',
	129		bygroups(Generic.Traceback, Name.Namespace,
	130		Generic.Traceback, Literal.Number.Integer)),
	131
	132		# (Exception Identifier)(Whitespace)(Traceback Message)
	133		(r'(?u)(^[^\d\W]\w)(\s)(Traceback.*?\n)',
	134		bygroups(Name.Exception, Generic.Whitespace, Text)),
	135		# (Module/Filename)(Text)(Callee)(Function Signature)
	136		# Better options for callee and function signature?
	137		(r'(.)( in )(.)($.*$\n)',
	138		bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
	139		# Regular line: (Whitespace)(Line Number)(Python Code)
	140		(r'(\s?)(\d+)(.?\n)',
	141		bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
	142		# Emphasized line: (Arrow)(Line Number)(Python Code)
	143		# Using Exception token so arrow color matches the Exception.
	144		(r'(->?\s?)(\d+)(.?\n)',
	145		bygroups(Name.Exception, Literal.Number.Integer, Other)),
	146		# (Exception Identifier)(Message)
	147		(r'(?u)(^[^\d\W]\w)(:.?\n)',
	148		bygroups(Name.Exception, Text)),
	149		# Tag everything else as Other, will be handled later.
	150		(r'.*\n', Other),
	151		],
	152		}
	153
	154
	155		class IPythonTracebackLexer(DelegatingLexer):
	156		"""
	157		IPython traceback lexer.
	158
	159		For doctests, the tracebacks can be snipped as much as desired with the
	160		exception to the lines that designate a traceback. For non-syntax error
	161		tracebacks, this is the line of hyphens. For syntax error tracebacks,
	162		this is the line which lists the File and line number.
	163
	164		"""
	165		# The lexer inherits from DelegatingLexer. The "root" lexer is an
	166		# appropriate IPython lexer, which depends on the value of the boolean
	167		# `python3`. First, we parse with the partial IPython traceback lexer.
	168		# Then, any code marked with the "Other" token is delegated to the root
	169		# lexer.
	170		#
	171		name = 'IPython Traceback'
	172		aliases = ['ipythontb']
	173
	174		def __init__(self, **options):
	175		self.python3 = get_bool_opt(options, 'python3', False)
	176		if self.python3:
	177		self.aliases = ['ipython3tb']
	178		else:
	179		self.aliases = ['ipython2tb', 'ipythontb']
	180
	181		if self.python3:
	182		IPyLexer = IPython3Lexer
	183		else:
	184		IPyLexer = IPythonLexer
	185
	186		DelegatingLexer.__init__(self, IPyLexer,
	187		IPythonPartialTracebackLexer, **options)
	188
	189		@skip_doctest
	190		class IPythonConsoleLexer(Lexer):
	191		"""
	192		An IPython console lexer for IPython code-blocks and doctests, such as:
	193
	194		.. code-block:: rst
	195
	196		.. code-block:: ipythonconsole
	197
	198		In [1]: a = 'foo'
	199
	200		In [2]: a
	201		Out[2]: 'foo'
	202
	203		In [3]: print a
	204		foo
	205
	206		In [4]: 1 / 0
	207
	208
	209		Support is also provided for IPython exceptions:
	210
	211		.. code-block:: rst
	212
	213		.. code-block:: ipythonconsole
	214
	215		In [1]: raise Exception
	216
	217		---------------------------------------------------------------------------
	218		Exception Traceback (most recent call last)
	219		<ipython-input-1-fca2ab0ca76b> in <module>()
	220		----> 1 raise Exception
	221
	222		Exception:
	223
	224		"""
	225		name = 'IPython console session'
	226		aliases = ['ipythonconsole']
	227		mimetypes = ['text/x-ipython-console']
	228
	229		# The regexps used to determine what is input and what is output.
	230		# The default prompts for IPython are:
	231		#
	232		# c.PromptManager.in_template = 'In [\#]: '
	233		# c.PromptManager.in2_template = ' .\D.: '
	234		# c.PromptManager.out_template = 'Out[\#]: '
	235		#
	236		in1_regex = r'In \[[0-9]+\]: '
	237		in2_regex = r' \.\.+\.: '
	238		out_regex = r'Out\[[0-9]+\]: '
	239
	240		#: The regex to determine when a traceback starts.
	241		ipytb_start = re.compile(r'^(\^C)?(-+\n)\|^( File)(.*)(, line )(\d+\n)')
	242
	243		def __init__(self, **options):
	244		"""Initialize the IPython console lexer.
	245
	246		Parameters
	247		----------
	248		python3 : bool
	249		If `True`, then the console inputs are parsed using a Python 3
	250		lexer. Otherwise, they are parsed using a Python 2 lexer.
	251		in1_regex : RegexObject
	252		The compiled regular expression used to detect the start
	253		of inputs. Although the IPython configuration setting may have a
	254		trailing whitespace, do not include it in the regex. If `None`,
	255		then the default input prompt is assumed.
	256		in2_regex : RegexObject
	257		The compiled regular expression used to detect the continuation
	258		of inputs. Although the IPython configuration setting may have a
	259		trailing whitespace, do not include it in the regex. If `None`,
	260		then the default input prompt is assumed.
	261		out_regex : RegexObject
	262		The compiled regular expression used to detect outputs. If `None`,
	263		then the default output prompt is assumed.
	264
	265		"""
	266		self.python3 = get_bool_opt(options, 'python3', False)
	267		if self.python3:
	268		self.aliases = ['ipython3console']
	269		else:
	270		self.aliases = ['ipython2console', 'ipythonconsole']
	271
	272		in1_regex = options.get('in1_regex', self.in1_regex)
	273		in2_regex = options.get('in2_regex', self.in2_regex)
	274		out_regex = options.get('out_regex', self.out_regex)
	275
	276		# So that we can work with input and output prompts which have been
	277		# rstrip'd (possibly by editors) we also need rstrip'd variants. If
	278		# we do not do this, then such prompts will be tagged as 'output'.
	279		# The reason can't just use the rstrip'd variants instead is because
	280		# we want any whitespace associated with the prompt to be inserted
	281		# with the token. This allows formatted code to be modified so as hide
	282		# the appearance of prompts, with the whitespace included. One example
	283		# use of this is in copybutton.js from the standard lib Python docs.
	284		in1_regex_rstrip = in1_regex.rstrip() + '\n'
	285		in2_regex_rstrip = in2_regex.rstrip() + '\n'
	286		out_regex_rstrip = out_regex.rstrip() + '\n'
	287
	288		# Compile and save them all.
	289		attrs = ['in1_regex', 'in2_regex', 'out_regex',
	290		'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
	291		for attr in attrs:
	292		self.__setattr__(attr, re.compile(locals()[attr]))
	293
	294		Lexer.__init__(self, **options)
	295
	296		if self.python3:
	297		pylexer = IPython3Lexer
	298		tblexer = IPythonTracebackLexer
	299		else:
	300		pylexer = IPythonLexer
	301		tblexer = IPythonTracebackLexer
	302
	303		self.pylexer = pylexer(**options)
	304		self.tblexer = tblexer(**options)
	305
	306		self.reset()
	307
	308		def reset(self):
	309		self.mode = 'output'
	310		self.index = 0
	311		self.buffer = u''
	312		self.insertions = []
	313
	314		def buffered_tokens(self):
	315		"""
	316		Generator of unprocessed tokens after doing insertions and before
	317		changing to a new state.
	318
	319		"""
	320		if self.mode == 'output':
	321		tokens = [(0, Generic.Output, self.buffer)]
	322		elif self.mode == 'input':
	323		tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
	324		else: # traceback
	325		tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
	326
	327		for i, t, v in do_insertions(self.insertions, tokens):
	328		# All token indexes are relative to the buffer.
	329		yield self.index + i, t, v
	330
	331		# Clear it all
	332		self.index += len(self.buffer)
	333		self.buffer = u''
	334		self.insertions = []
	335
	336		def get_mci(self, line):
	337		"""
	338		Parses the line and returns a 3-tuple: (mode, code, insertion).
	339
	340		`mode` is the next mode (or state) of the lexer, and is always equal
	341		to 'input', 'output', or 'tb'.
	342
	343		`code` is a portion of the line that should be added to the buffer
	344		corresponding to the next mode and eventually lexed by another lexer.
	345		For example, `code` could be Python code if `mode` were 'input'.
	346
	347		`insertion` is a 3-tuple (index, token, text) representing an
	348		unprocessed "token" that will be inserted into the stream of tokens
	349		that are created from the buffer once we change modes. This is usually
	350		the input or output prompt.
	351
	352		In general, the next mode depends on current mode and on the contents
	353		of `line`.
	354
	355		"""
	356		# To reduce the number of regex match checks, we have multiple
	357		# 'if' blocks instead of 'if-elif' blocks.
	358
	359		# Check for possible end of input
	360		in2_match = self.in2_regex.match(line)
	361		in2_match_rstrip = self.in2_regex_rstrip.match(line)
	362		if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
	363		in2_match_rstrip:
	364		end_input = True
	365		else:
	366		end_input = False
	367		if end_input and self.mode != 'tb':
	368		# Only look for an end of input when not in tb mode.
	369		# An ellipsis could appear within the traceback.
	370		mode = 'output'
	371		code = u''
	372		insertion = (0, Generic.Prompt, line)
	373		return mode, code, insertion
	374
	375		# Check for output prompt
	376		out_match = self.out_regex.match(line)
	377		out_match_rstrip = self.out_regex_rstrip.match(line)
	378		if out_match or out_match_rstrip:
	379		mode = 'output'
	380		if out_match:
	381		idx = out_match.end()
	382		else:
	383		idx = out_match_rstrip.end()
	384		code = line[idx:]
	385		# Use the 'heading' token for output. We cannot use Generic.Error
	386		# since it would conflict with exceptions.
	387		insertion = (0, Generic.Heading, line[:idx])
	388		return mode, code, insertion
	389
	390
	391		# Check for input or continuation prompt (non stripped version)
	392		in1_match = self.in1_regex.match(line)
	393		if in1_match or (in2_match and self.mode != 'tb'):
	394		# New input or when not in tb, continued input.
	395		# We do not check for continued input when in tb since it is
	396		# allowable to replace a long stack with an ellipsis.
	397		mode = 'input'
	398		if in1_match:
	399		idx = in1_match.end()
	400		else: # in2_match
	401		idx = in2_match.end()
	402		code = line[idx:]
	403		insertion = (0, Generic.Prompt, line[:idx])
	404		return mode, code, insertion
	405
	406		# Check for input or continuation prompt (stripped version)
	407		in1_match_rstrip = self.in1_regex_rstrip.match(line)
	408		if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
	409		# New input or when not in tb, continued input.
	410		# We do not check for continued input when in tb since it is
	411		# allowable to replace a long stack with an ellipsis.
	412		mode = 'input'
	413		if in1_match_rstrip:
	414		idx = in1_match_rstrip.end()
	415		else: # in2_match
	416		idx = in2_match_rstrip.end()
	417		code = line[idx:]
	418		insertion = (0, Generic.Prompt, line[:idx])
	419		return mode, code, insertion
	420
	421		# Check for traceback
	422		if self.ipytb_start.match(line):
	423		mode = 'tb'
	424		code = line
	425		insertion = None
	426		return mode, code, insertion
	427
	428		# All other stuff...
	429		if self.mode in ('input', 'output'):
	430		# We assume all other text is output. Multiline input that
	431		# does not use the continuation marker cannot be detected.
	432		# For example, the 3 in the following is clearly output:
	433		#
	434		# In [1]: print 3
	435		# 3
	436		#
	437		# But the following second line is part of the input:
	438		#
	439		# In [2]: while True:
	440		# print True
	441		#
	442		# In both cases, the 2nd line will be 'output'.
	443		#
	444		mode = 'output'
	445		else:
	446		mode = 'tb'
	447
	448		code = line
	449		insertion = None
	450
	451		return mode, code, insertion
	452
	453		def get_tokens_unprocessed(self, text):
	454		self.reset()
	455		for match in line_re.finditer(text):
	456		line = match.group()
	457		mode, code, insertion = self.get_mci(line)
	458
	459		if mode != self.mode:
	460		# Yield buffered tokens before transitioning to new mode.
	461		for token in self.buffered_tokens():
	462		yield token
	463		self.mode = mode
	464
	465		if insertion:
	466		self.insertions.append((len(self.buffer), [insertion]))
	467		self.buffer += code
	468		else:
	469		for token in self.buffered_tokens():
	470		yield token
	471
	472		class IPyLexer(Lexer):
	473		"""
	474		Primary lexer for all IPython-like code.
	475
	476		This is a simple helper lexer. If the first line of the text begins with
	477		"In \[[0-9]+\]:", then the entire text is parsed with an IPython console
	478		lexer. If not, then the entire text is parsed with an IPython lexer.
	479
	480		The goal is to reduce the number of lexers that are registered
	481		with Pygments.
	482
	483		"""
	484		name = 'IPy session'
	485		aliases = ['ipy']
	486
	487		def __init__(self, **options):
	488		self.python3 = get_bool_opt(options, 'python3', False)
	489		if self.python3:
	490		self.aliases = ['ipy3']
	491		else:
	492		self.aliases = ['ipy2', 'ipy']
	493
	494		Lexer.__init__(self, **options)
	495
	496		self.IPythonLexer = IPythonLexer(**options)
	497		self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
	498
	499		def get_tokens_unprocessed(self, text):
	500		# Search for the input prompt anywhere...this allows code blocks to
	501		# begin with comments as well.
	502		if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
	503		lex = self.IPythonConsoleLexer
	504		else:
	505		lex = self.IPythonLexer
	506		for token in lex.get_tokens_unprocessed(text):
	507		yield token
		1	"""Deprecated; import from IPython.lib.lexers instead."""
	508	2
		3	from IPython.lib.lexers import *

IPython/sphinxext/ipython_console_highlighting.py

0 +1 -1

              """
              from sphinx import highlighting
-             from ..nbconvert.utils.lexers import IPyLexer
+             from IPython.lib.lexers import IPyLexer
              def setup(app):
                  """Setup as a sphinx extension."""

docs/autogen_api.py

0 +2 0

                                                      r'\.frontend$',
                                                      # Deprecated:
                                                      r'\.core\.magics\.deprecated',
+                                                     # Backwards compat import for lib.lexers
+                                                     r'\.nbconvert\.utils\.lexers',
                                                      # We document this manually.
                                                      r'\.utils\.py3compat',
                                                      # These are exposed by nbformat

docs/source/development/lexer.rst

0 +1 -1

              The IPython console lexer has been rewritten and now supports tracebacks
              and customized input/output prompts. An entire suite of lexers is now
-             available at :mod:`IPython.nbconvert.utils.lexers`. These include:
+             available at :mod:`IPython.lib.lexers`. These include:
              IPythonLexer & IPython3Lexer
                Lexers for pure IPython (python + magic/shell commands)

setup.py

0 +3 -3

                  setuptools_extra_args['entry_points'] = {
                      'console_scripts': find_entry_points(),
                      'pygments.lexers': [
-                         'ipythonconsole = IPython.nbconvert.utils.lexers:IPythonConsoleLexer',
-                         'ipython = IPython.nbconvert.utils.lexers:IPythonLexer',
-                         'ipython3 = IPython.nbconvert.utils.lexers:IPython3Lexer',
+                         'ipythonconsole = IPython.lib.lexers:IPythonConsoleLexer',
+                         'ipython = IPython.lib.lexers:IPythonLexer',
+                         'ipython3 = IPython.lib.lexers:IPython3Lexer',
                      ],
                  }
                  setup_args['extras_require'] = extras_require

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages