##// END OF EJS Templates
Calculate indentation based on tokens, not regexes...
Thomas Kluyver -
Show More
@@ -18,8 +18,10 b' For more details, see the class docstrings below.'
18 18 # Distributed under the terms of the Modified BSD License.
19 19 import ast
20 20 import codeop
21 import io
21 22 import re
22 23 import sys
24 import tokenize
23 25 import warnings
24 26
25 27 from IPython.utils.py3compat import cast_unicode
@@ -87,6 +89,112 b' def num_ini_spaces(s):'
87 89 else:
88 90 return 0
89 91
92 # Fake token types for partial_tokenize:
93 INCOMPLETE_STRING = tokenize.N_TOKENS
94 IN_MULTILINE_STATEMENT = tokenize.N_TOKENS + 1
95
96 # The 2 classes below have the same API as TokenInfo, but don't try to look up
97 # a token type name that they won't find.
98 class IncompleteString:
99 type = exact_type = INCOMPLETE_STRING
100 def __init__(self, s, start, end, line):
101 self.s = s
102 self.start = start
103 self.end = end
104 self.line = line
105
106 class InMultilineStatement:
107 type = exact_type = IN_MULTILINE_STATEMENT
108 def __init__(self, pos, line):
109 self.s = ''
110 self.start = self.end = pos
111 self.line = line
112
113 def partial_tokens(s):
114 """Iterate over tokens from a possibly-incomplete string of code.
115
116 This adds two special token types: INCOMPLETE_STRING and
117 IN_MULTILINE_STATEMENT. These can only occur as the last token yielded, and
118 represent the two main ways for code to be incomplete.
119 """
120 readline = io.StringIO(s).readline
121 token = tokenize.TokenInfo(tokenize.NEWLINE, '', (1, 0), (1, 0), '')
122 try:
123 for token in tokenize.generate_tokens(readline):
124 yield token
125 except tokenize.TokenError as e:
126 # catch EOF error
127 lines = s.splitlines(keepends=True)
128 end = len(lines), len(lines[-1])
129 if 'multi-line string' in e.args[0]:
130 l, c = start = token.end
131 s = lines[l-1][c:] + ''.join(lines[l:])
132 yield IncompleteString(s, start, end, lines[-1])
133 elif 'multi-line statement' in e.args[0]:
134 yield InMultilineStatement(end, lines[-1])
135 else:
136 raise
137
138 def find_next_indent(code):
139 """Find the number of spaces for the next line of indentation"""
140 tokens = list(partial_tokens(code))
141 if tokens[-1].type == tokenize.ENDMARKER:
142 tokens.pop()
143 if not tokens:
144 return 0
145 if tokens[-1].type in {tokenize.DEDENT, tokenize.NEWLINE, tokenize.COMMENT}:
146 tokens.pop()
147
148 if tokens[-1].type == INCOMPLETE_STRING:
149 # Inside a multiline string
150 return 0
151
152 # Find the indents used before
153 prev_indents = [0]
154 def _add_indent(n):
155 if n != prev_indents[-1]:
156 prev_indents.append(n)
157
158 tokiter = iter(tokens)
159 for tok in tokiter:
160 if tok.type in {tokenize.INDENT, tokenize.DEDENT}:
161 _add_indent(tok.end[1])
162 elif (tok.type == tokenize.NL):
163 try:
164 _add_indent(next(tokiter).start[1])
165 except StopIteration:
166 break
167
168 last_indent = prev_indents.pop()
169
170 if tokens[-1].type == IN_MULTILINE_STATEMENT:
171 if tokens[-2].exact_type in {tokenize.LPAR, tokenize.LSQB, tokenize.LBRACE}:
172 return last_indent + 4
173 return last_indent
174
175 if tokens[-1].exact_type == tokenize.COLON:
176 # Line ends with colon - indent
177 return last_indent + 4
178
179 if last_indent:
180 # Examine the last line for dedent cues - statements like return or
181 # raise which normally end a block of code.
182 last_line_starts = 0
183 for i, tok in enumerate(tokens):
184 if tok.type == tokenize.NEWLINE:
185 last_line_starts = i + 1
186
187 last_line_tokens = tokens[last_line_starts:]
188 names = [t.string for t in last_line_tokens if t.type == tokenize.NAME]
189 if names and names[0] in {'raise', 'return', 'pass', 'break', 'continue'}:
190 # Find the most recent indentation less than the current level
191 for indent in reversed(prev_indents):
192 if indent < last_indent:
193 return indent
194
195 return last_indent
196
197
90 198 def last_blank(src):
91 199 """Determine if the input source ends in a blank.
92 200
@@ -306,7 +414,7 b' class InputSplitter(object):'
306 414 if source.endswith('\\\n'):
307 415 return False
308 416
309 self._update_indent(lines)
417 self._update_indent()
310 418 try:
311 419 with warnings.catch_warnings():
312 420 warnings.simplefilter('error', SyntaxWarning)
@@ -427,10 +535,10 b' class InputSplitter(object):'
427 535
428 536 return indent_spaces, full_dedent
429 537
430 def _update_indent(self, lines):
431 for line in remove_comments(lines).splitlines():
432 if line and not line.isspace():
433 self.indent_spaces, self._full_dedent = self._find_indent(line)
538 def _update_indent(self):
539 # self.source always has a trailing newline
540 self.indent_spaces = find_next_indent(self.source[:-1])
541 self._full_dedent = (self.indent_spaces == 0)
434 542
435 543 def _store(self, lines, buffer=None, store='source'):
436 544 """Store one or more lines of input.
@@ -612,3 +612,27 b' class LineModeCellMagics(CellMagicsCommon, unittest.TestCase):'
612 612 sp.push('\n')
613 613 # In this case, a blank line should end the cell magic
614 614 nt.assert_false(sp.push_accepts_more()) #2
615
616 indentation_samples = [
617 ('a = 1', 0),
618 ('for a in b:', 4),
619 ('def f():', 4),
620 ('def f(): #comment', 4),
621 ('a = ":#not a comment"', 0),
622 ('def f():\n a = 1', 4),
623 ('def f():\n return 1', 0),
624 ('for a in b:\n'
625 ' if a < 0:'
626 ' continue', 3),
627 ('a = {', 4),
628 ('a = {\n'
629 ' 1,', 5),
630 ('b = """123', 0),
631 ('', 0),
632 ]
633
634 def test_find_next_indent():
635 for code, exp in indentation_samples:
636 res = isp.find_next_indent(code)
637 msg = "{!r} != {!r} (expected)\n Code: {!r}".format(res, exp, code)
638 assert res == exp, msg
General Comments 0
You need to be logged in to leave comments. Login now