upstream/ipython Commit - r23331:f1d1b091

Calculate indentation based on tokens, not regexes...

Thomas Kluyver -

r23331:f1d1b091

parent child

IPython/core/inputsplitter.py

0 +113 -5

		@@ -18,8 +18,10 b' For more details, see the class docstrings below.'
18	18	# Distributed under the terms of the Modified BSD License.
19	19	import ast
20	20	import codeop
	21	import io
21	22	import re
22	23	import sys
	24	import tokenize
23	25	import warnings
24	26
25	27	from IPython.utils.py3compat import cast_unicode
		@@ -87,6 +89,112 b' def num_ini_spaces(s):'
87	89	else:
88	90	return 0
89	91
	92	# Fake token types for partial_tokenize:
	93	INCOMPLETE_STRING = tokenize.N_TOKENS
	94	IN_MULTILINE_STATEMENT = tokenize.N_TOKENS + 1
	95
	96	# The 2 classes below have the same API as TokenInfo, but don't try to look up
	97	# a token type name that they won't find.
	98	class IncompleteString:
	99	type = exact_type = INCOMPLETE_STRING
	100	def __init__(self, s, start, end, line):
	101	self.s = s
	102	self.start = start
	103	self.end = end
	104	self.line = line
	105
	106	class InMultilineStatement:
	107	type = exact_type = IN_MULTILINE_STATEMENT
	108	def __init__(self, pos, line):
	109	self.s = ''
	110	self.start = self.end = pos
	111	self.line = line
	112
	113	def partial_tokens(s):
	114	"""Iterate over tokens from a possibly-incomplete string of code.
	115
	116	This adds two special token types: INCOMPLETE_STRING and
	117	IN_MULTILINE_STATEMENT. These can only occur as the last token yielded, and
	118	represent the two main ways for code to be incomplete.
	119	"""
	120	readline = io.StringIO(s).readline
	121	token = tokenize.TokenInfo(tokenize.NEWLINE, '', (1, 0), (1, 0), '')
	122	try:
	123	for token in tokenize.generate_tokens(readline):
	124	yield token
	125	except tokenize.TokenError as e:
	126	# catch EOF error
	127	lines = s.splitlines(keepends=True)
	128	end = len(lines), len(lines[-1])
	129	if 'multi-line string' in e.args[0]:
	130	l, c = start = token.end
	131	s = lines[l-1][c:] + ''.join(lines[l:])
	132	yield IncompleteString(s, start, end, lines[-1])
	133	elif 'multi-line statement' in e.args[0]:
	134	yield InMultilineStatement(end, lines[-1])
	135	else:
	136	raise
	137
	138	def find_next_indent(code):
	139	"""Find the number of spaces for the next line of indentation"""
	140	tokens = list(partial_tokens(code))
	141	if tokens[-1].type == tokenize.ENDMARKER:
	142	tokens.pop()
	143	if not tokens:
	144	return 0
	145	if tokens[-1].type in {tokenize.DEDENT, tokenize.NEWLINE, tokenize.COMMENT}:
	146	tokens.pop()
	147
	148	if tokens[-1].type == INCOMPLETE_STRING:
	149	# Inside a multiline string
	150	return 0
	151
	152	# Find the indents used before
	153	prev_indents = [0]
	154	def _add_indent(n):
	155	if n != prev_indents[-1]:
	156	prev_indents.append(n)
	157
	158	tokiter = iter(tokens)
	159	for tok in tokiter:
	160	if tok.type in {tokenize.INDENT, tokenize.DEDENT}:
	161	_add_indent(tok.end[1])
	162	elif (tok.type == tokenize.NL):
	163	try:
	164	_add_indent(next(tokiter).start[1])
	165	except StopIteration:
	166	break
	167
	168	last_indent = prev_indents.pop()
	169
	170	if tokens[-1].type == IN_MULTILINE_STATEMENT:
	171	if tokens[-2].exact_type in {tokenize.LPAR, tokenize.LSQB, tokenize.LBRACE}:
	172	return last_indent + 4
	173	return last_indent
	174
	175	if tokens[-1].exact_type == tokenize.COLON:
	176	# Line ends with colon - indent
	177	return last_indent + 4
	178
	179	if last_indent:
	180	# Examine the last line for dedent cues - statements like return or
	181	# raise which normally end a block of code.
	182	last_line_starts = 0
	183	for i, tok in enumerate(tokens):
	184	if tok.type == tokenize.NEWLINE:
	185	last_line_starts = i + 1
	186
	187	last_line_tokens = tokens[last_line_starts:]
	188	names = [t.string for t in last_line_tokens if t.type == tokenize.NAME]
	189	if names and names[0] in {'raise', 'return', 'pass', 'break', 'continue'}:
	190	# Find the most recent indentation less than the current level
	191	for indent in reversed(prev_indents):
	192	if indent < last_indent:
	193	return indent
	194
	195	return last_indent
	196
	197
90	198	def last_blank(src):
91	199	"""Determine if the input source ends in a blank.
92	200
		@@ -306,7 +414,7 b' class InputSplitter(object):'
306	414	if source.endswith('\\\n'):
307	415	return False
308	416
309		self._update_indent(~~lines~~)
	417	self._update_indent()
310	418	try:
311	419	with warnings.catch_warnings():
312	420	warnings.simplefilter('error', SyntaxWarning)
		@@ -427,10 +535,10 b' class InputSplitter(object):'
427	535
428	536	return indent_spaces, full_dedent
429	537
430		def _update_indent(self, ~~lines~~):
431		for line in remove_comments(lines).splitlines():
432		if line and not line.isspace():
433		~~self~~.~~indent_spaces~~, self._full_dedent = self.~~_find_~~indent(~~line~~)
	538	def _update_indent(self):
	539	# self.source always has a trailing newline
	540	self.indent_spaces = find_next_indent(self.source[:-1])
	541	self._full_dedent = (self.indent_spaces == 0)
434	542
435	543	def _store(self, lines, buffer=None, store='source'):
436	544	"""Store one or more lines of input.

IPython/core/tests/test_inputsplitter.py

0 +24 0

		@@ -612,3 +612,27 b' class LineModeCellMagics(CellMagicsCommon, unittest.TestCase):'
612	612	sp.push('\n')
613	613	# In this case, a blank line should end the cell magic
614	614	nt.assert_false(sp.push_accepts_more()) #2
	615
	616	indentation_samples = [
	617	('a = 1', 0),
	618	('for a in b:', 4),
	619	('def f():', 4),
	620	('def f(): #comment', 4),
	621	('a = ":#not a comment"', 0),
	622	('def f():\n a = 1', 4),
	623	('def f():\n return 1', 0),
	624	('for a in b:\n'
	625	' if a < 0:'
	626	' continue', 3),
	627	('a = {', 4),
	628	('a = {\n'
	629	' 1,', 5),
	630	('b = """123', 0),
	631	('', 0),
	632	]
	633
	634	def test_find_next_indent():
	635	for code, exp in indentation_samples:
	636	res = isp.find_next_indent(code)
	637	msg = "{!r} != {!r} (expected)\n Code: {!r}".format(res, exp, code)
	638	assert res == exp, msg

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages