##// END OF EJS Templates
Use custom CommandCompiler for input validation
Artur Svistunov -
Show More
@@ -1,775 +1,794 b''
1 """Input transformer machinery to support IPython special syntax.
1 """Input transformer machinery to support IPython special syntax.
2
2
3 This includes the machinery to recognise and transform ``%magic`` commands,
3 This includes the machinery to recognise and transform ``%magic`` commands,
4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5
5
6 Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
6 Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
7 deprecated in 7.0.
7 deprecated in 7.0.
8 """
8 """
9
9
10 # Copyright (c) IPython Development Team.
10 # Copyright (c) IPython Development Team.
11 # Distributed under the terms of the Modified BSD License.
11 # Distributed under the terms of the Modified BSD License.
12
12
13 from codeop import compile_command
13 import ast
14 import sys
15 from codeop import CommandCompiler
14 import re
16 import re
15 import tokenize
17 import tokenize
16 from typing import List, Tuple, Optional, Any
18 from typing import List, Tuple, Optional, Any
17 import warnings
19 import warnings
18
20
19 _indent_re = re.compile(r'^[ \t]+')
21 _indent_re = re.compile(r'^[ \t]+')
20
22
21 def leading_empty_lines(lines):
23 def leading_empty_lines(lines):
22 """Remove leading empty lines
24 """Remove leading empty lines
23
25
24 If the leading lines are empty or contain only whitespace, they will be
26 If the leading lines are empty or contain only whitespace, they will be
25 removed.
27 removed.
26 """
28 """
27 if not lines:
29 if not lines:
28 return lines
30 return lines
29 for i, line in enumerate(lines):
31 for i, line in enumerate(lines):
30 if line and not line.isspace():
32 if line and not line.isspace():
31 return lines[i:]
33 return lines[i:]
32 return lines
34 return lines
33
35
34 def leading_indent(lines):
36 def leading_indent(lines):
35 """Remove leading indentation.
37 """Remove leading indentation.
36
38
37 If the first line starts with a spaces or tabs, the same whitespace will be
39 If the first line starts with a spaces or tabs, the same whitespace will be
38 removed from each following line in the cell.
40 removed from each following line in the cell.
39 """
41 """
40 if not lines:
42 if not lines:
41 return lines
43 return lines
42 m = _indent_re.match(lines[0])
44 m = _indent_re.match(lines[0])
43 if not m:
45 if not m:
44 return lines
46 return lines
45 space = m.group(0)
47 space = m.group(0)
46 n = len(space)
48 n = len(space)
47 return [l[n:] if l.startswith(space) else l
49 return [l[n:] if l.startswith(space) else l
48 for l in lines]
50 for l in lines]
49
51
50 class PromptStripper:
52 class PromptStripper:
51 """Remove matching input prompts from a block of input.
53 """Remove matching input prompts from a block of input.
52
54
53 Parameters
55 Parameters
54 ----------
56 ----------
55 prompt_re : regular expression
57 prompt_re : regular expression
56 A regular expression matching any input prompt (including continuation,
58 A regular expression matching any input prompt (including continuation,
57 e.g. ``...``)
59 e.g. ``...``)
58 initial_re : regular expression, optional
60 initial_re : regular expression, optional
59 A regular expression matching only the initial prompt, but not continuation.
61 A regular expression matching only the initial prompt, but not continuation.
60 If no initial expression is given, prompt_re will be used everywhere.
62 If no initial expression is given, prompt_re will be used everywhere.
61 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
63 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
62 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
64 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
63
65
64 Notes
66 Notes
65 -----
67 -----
66
68
67 If initial_re and prompt_re differ,
69 If initial_re and prompt_re differ,
68 only initial_re will be tested against the first line.
70 only initial_re will be tested against the first line.
69 If any prompt is found on the first two lines,
71 If any prompt is found on the first two lines,
70 prompts will be stripped from the rest of the block.
72 prompts will be stripped from the rest of the block.
71 """
73 """
72 def __init__(self, prompt_re, initial_re=None):
74 def __init__(self, prompt_re, initial_re=None):
73 self.prompt_re = prompt_re
75 self.prompt_re = prompt_re
74 self.initial_re = initial_re or prompt_re
76 self.initial_re = initial_re or prompt_re
75
77
76 def _strip(self, lines):
78 def _strip(self, lines):
77 return [self.prompt_re.sub('', l, count=1) for l in lines]
79 return [self.prompt_re.sub('', l, count=1) for l in lines]
78
80
79 def __call__(self, lines):
81 def __call__(self, lines):
80 if not lines:
82 if not lines:
81 return lines
83 return lines
82 if self.initial_re.match(lines[0]) or \
84 if self.initial_re.match(lines[0]) or \
83 (len(lines) > 1 and self.prompt_re.match(lines[1])):
85 (len(lines) > 1 and self.prompt_re.match(lines[1])):
84 return self._strip(lines)
86 return self._strip(lines)
85 return lines
87 return lines
86
88
87 classic_prompt = PromptStripper(
89 classic_prompt = PromptStripper(
88 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
90 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
89 initial_re=re.compile(r'^>>>( |$)')
91 initial_re=re.compile(r'^>>>( |$)')
90 )
92 )
91
93
92 ipython_prompt = PromptStripper(
94 ipython_prompt = PromptStripper(
93 re.compile(
95 re.compile(
94 r"""
96 r"""
95 ^( # Match from the beginning of a line, either:
97 ^( # Match from the beginning of a line, either:
96
98
97 # 1. First-line prompt:
99 # 1. First-line prompt:
98 ((\[nav\]|\[ins\])?\ )? # Vi editing mode prompt, if it's there
100 ((\[nav\]|\[ins\])?\ )? # Vi editing mode prompt, if it's there
99 In\ # The 'In' of the prompt, with a space
101 In\ # The 'In' of the prompt, with a space
100 \[\d+\]: # Command index, as displayed in the prompt
102 \[\d+\]: # Command index, as displayed in the prompt
101 \ # With a mandatory trailing space
103 \ # With a mandatory trailing space
102
104
103 | # ... or ...
105 | # ... or ...
104
106
105 # 2. The three dots of the multiline prompt
107 # 2. The three dots of the multiline prompt
106 \s* # All leading whitespace characters
108 \s* # All leading whitespace characters
107 \.{3,}: # The three (or more) dots
109 \.{3,}: # The three (or more) dots
108 \ ? # With an optional trailing space
110 \ ? # With an optional trailing space
109
111
110 )
112 )
111 """,
113 """,
112 re.VERBOSE,
114 re.VERBOSE,
113 )
115 )
114 )
116 )
115
117
116
118
117 def cell_magic(lines):
119 def cell_magic(lines):
118 if not lines or not lines[0].startswith('%%'):
120 if not lines or not lines[0].startswith('%%'):
119 return lines
121 return lines
120 if re.match(r'%%\w+\?', lines[0]):
122 if re.match(r'%%\w+\?', lines[0]):
121 # This case will be handled by help_end
123 # This case will be handled by help_end
122 return lines
124 return lines
123 magic_name, _, first_line = lines[0][2:].rstrip().partition(' ')
125 magic_name, _, first_line = lines[0][2:].rstrip().partition(' ')
124 body = ''.join(lines[1:])
126 body = ''.join(lines[1:])
125 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
127 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
126 % (magic_name, first_line, body)]
128 % (magic_name, first_line, body)]
127
129
128
130
129 def _find_assign_op(token_line) -> Optional[int]:
131 def _find_assign_op(token_line) -> Optional[int]:
130 """Get the index of the first assignment in the line ('=' not inside brackets)
132 """Get the index of the first assignment in the line ('=' not inside brackets)
131
133
132 Note: We don't try to support multiple special assignment (a = b = %foo)
134 Note: We don't try to support multiple special assignment (a = b = %foo)
133 """
135 """
134 paren_level = 0
136 paren_level = 0
135 for i, ti in enumerate(token_line):
137 for i, ti in enumerate(token_line):
136 s = ti.string
138 s = ti.string
137 if s == '=' and paren_level == 0:
139 if s == '=' and paren_level == 0:
138 return i
140 return i
139 if s in {'(','[','{'}:
141 if s in {'(','[','{'}:
140 paren_level += 1
142 paren_level += 1
141 elif s in {')', ']', '}'}:
143 elif s in {')', ']', '}'}:
142 if paren_level > 0:
144 if paren_level > 0:
143 paren_level -= 1
145 paren_level -= 1
144 return None
146 return None
145
147
146 def find_end_of_continued_line(lines, start_line: int):
148 def find_end_of_continued_line(lines, start_line: int):
147 """Find the last line of a line explicitly extended using backslashes.
149 """Find the last line of a line explicitly extended using backslashes.
148
150
149 Uses 0-indexed line numbers.
151 Uses 0-indexed line numbers.
150 """
152 """
151 end_line = start_line
153 end_line = start_line
152 while lines[end_line].endswith('\\\n'):
154 while lines[end_line].endswith('\\\n'):
153 end_line += 1
155 end_line += 1
154 if end_line >= len(lines):
156 if end_line >= len(lines):
155 break
157 break
156 return end_line
158 return end_line
157
159
158 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
160 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
159 r"""Assemble a single line from multiple continued line pieces
161 r"""Assemble a single line from multiple continued line pieces
160
162
161 Continued lines are lines ending in ``\``, and the line following the last
163 Continued lines are lines ending in ``\``, and the line following the last
162 ``\`` in the block.
164 ``\`` in the block.
163
165
164 For example, this code continues over multiple lines::
166 For example, this code continues over multiple lines::
165
167
166 if (assign_ix is not None) \
168 if (assign_ix is not None) \
167 and (len(line) >= assign_ix + 2) \
169 and (len(line) >= assign_ix + 2) \
168 and (line[assign_ix+1].string == '%') \
170 and (line[assign_ix+1].string == '%') \
169 and (line[assign_ix+2].type == tokenize.NAME):
171 and (line[assign_ix+2].type == tokenize.NAME):
170
172
171 This statement contains four continued line pieces.
173 This statement contains four continued line pieces.
172 Assembling these pieces into a single line would give::
174 Assembling these pieces into a single line would give::
173
175
174 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
176 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
175
177
176 This uses 0-indexed line numbers. *start* is (lineno, colno).
178 This uses 0-indexed line numbers. *start* is (lineno, colno).
177
179
178 Used to allow ``%magic`` and ``!system`` commands to be continued over
180 Used to allow ``%magic`` and ``!system`` commands to be continued over
179 multiple lines.
181 multiple lines.
180 """
182 """
181 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
183 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
182 return ' '.join([p.rstrip()[:-1] for p in parts[:-1]] # Strip backslash+newline
184 return ' '.join([p.rstrip()[:-1] for p in parts[:-1]] # Strip backslash+newline
183 + [parts[-1].rstrip()]) # Strip newline from last line
185 + [parts[-1].rstrip()]) # Strip newline from last line
184
186
185 class TokenTransformBase:
187 class TokenTransformBase:
186 """Base class for transformations which examine tokens.
188 """Base class for transformations which examine tokens.
187
189
188 Special syntax should not be transformed when it occurs inside strings or
190 Special syntax should not be transformed when it occurs inside strings or
189 comments. This is hard to reliably avoid with regexes. The solution is to
191 comments. This is hard to reliably avoid with regexes. The solution is to
190 tokenise the code as Python, and recognise the special syntax in the tokens.
192 tokenise the code as Python, and recognise the special syntax in the tokens.
191
193
192 IPython's special syntax is not valid Python syntax, so tokenising may go
194 IPython's special syntax is not valid Python syntax, so tokenising may go
193 wrong after the special syntax starts. These classes therefore find and
195 wrong after the special syntax starts. These classes therefore find and
194 transform *one* instance of special syntax at a time into regular Python
196 transform *one* instance of special syntax at a time into regular Python
195 syntax. After each transformation, tokens are regenerated to find the next
197 syntax. After each transformation, tokens are regenerated to find the next
196 piece of special syntax.
198 piece of special syntax.
197
199
198 Subclasses need to implement one class method (find)
200 Subclasses need to implement one class method (find)
199 and one regular method (transform).
201 and one regular method (transform).
200
202
201 The priority attribute can select which transformation to apply if multiple
203 The priority attribute can select which transformation to apply if multiple
202 transformers match in the same place. Lower numbers have higher priority.
204 transformers match in the same place. Lower numbers have higher priority.
203 This allows "%magic?" to be turned into a help call rather than a magic call.
205 This allows "%magic?" to be turned into a help call rather than a magic call.
204 """
206 """
205 # Lower numbers -> higher priority (for matches in the same location)
207 # Lower numbers -> higher priority (for matches in the same location)
206 priority = 10
208 priority = 10
207
209
208 def sortby(self):
210 def sortby(self):
209 return self.start_line, self.start_col, self.priority
211 return self.start_line, self.start_col, self.priority
210
212
211 def __init__(self, start):
213 def __init__(self, start):
212 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
214 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
213 self.start_col = start[1]
215 self.start_col = start[1]
214
216
215 @classmethod
217 @classmethod
216 def find(cls, tokens_by_line):
218 def find(cls, tokens_by_line):
217 """Find one instance of special syntax in the provided tokens.
219 """Find one instance of special syntax in the provided tokens.
218
220
219 Tokens are grouped into logical lines for convenience,
221 Tokens are grouped into logical lines for convenience,
220 so it is easy to e.g. look at the first token of each line.
222 so it is easy to e.g. look at the first token of each line.
221 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
223 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
222
224
223 This should return an instance of its class, pointing to the start
225 This should return an instance of its class, pointing to the start
224 position it has found, or None if it found no match.
226 position it has found, or None if it found no match.
225 """
227 """
226 raise NotImplementedError
228 raise NotImplementedError
227
229
228 def transform(self, lines: List[str]):
230 def transform(self, lines: List[str]):
229 """Transform one instance of special syntax found by ``find()``
231 """Transform one instance of special syntax found by ``find()``
230
232
231 Takes a list of strings representing physical lines,
233 Takes a list of strings representing physical lines,
232 returns a similar list of transformed lines.
234 returns a similar list of transformed lines.
233 """
235 """
234 raise NotImplementedError
236 raise NotImplementedError
235
237
236 class MagicAssign(TokenTransformBase):
238 class MagicAssign(TokenTransformBase):
237 """Transformer for assignments from magics (a = %foo)"""
239 """Transformer for assignments from magics (a = %foo)"""
238 @classmethod
240 @classmethod
239 def find(cls, tokens_by_line):
241 def find(cls, tokens_by_line):
240 """Find the first magic assignment (a = %foo) in the cell.
242 """Find the first magic assignment (a = %foo) in the cell.
241 """
243 """
242 for line in tokens_by_line:
244 for line in tokens_by_line:
243 assign_ix = _find_assign_op(line)
245 assign_ix = _find_assign_op(line)
244 if (assign_ix is not None) \
246 if (assign_ix is not None) \
245 and (len(line) >= assign_ix + 2) \
247 and (len(line) >= assign_ix + 2) \
246 and (line[assign_ix+1].string == '%') \
248 and (line[assign_ix+1].string == '%') \
247 and (line[assign_ix+2].type == tokenize.NAME):
249 and (line[assign_ix+2].type == tokenize.NAME):
248 return cls(line[assign_ix+1].start)
250 return cls(line[assign_ix+1].start)
249
251
250 def transform(self, lines: List[str]):
252 def transform(self, lines: List[str]):
251 """Transform a magic assignment found by the ``find()`` classmethod.
253 """Transform a magic assignment found by the ``find()`` classmethod.
252 """
254 """
253 start_line, start_col = self.start_line, self.start_col
255 start_line, start_col = self.start_line, self.start_col
254 lhs = lines[start_line][:start_col]
256 lhs = lines[start_line][:start_col]
255 end_line = find_end_of_continued_line(lines, start_line)
257 end_line = find_end_of_continued_line(lines, start_line)
256 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
258 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
257 assert rhs.startswith('%'), rhs
259 assert rhs.startswith('%'), rhs
258 magic_name, _, args = rhs[1:].partition(' ')
260 magic_name, _, args = rhs[1:].partition(' ')
259
261
260 lines_before = lines[:start_line]
262 lines_before = lines[:start_line]
261 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
263 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
262 new_line = lhs + call + '\n'
264 new_line = lhs + call + '\n'
263 lines_after = lines[end_line+1:]
265 lines_after = lines[end_line+1:]
264
266
265 return lines_before + [new_line] + lines_after
267 return lines_before + [new_line] + lines_after
266
268
267
269
268 class SystemAssign(TokenTransformBase):
270 class SystemAssign(TokenTransformBase):
269 """Transformer for assignments from system commands (a = !foo)"""
271 """Transformer for assignments from system commands (a = !foo)"""
270 @classmethod
272 @classmethod
271 def find(cls, tokens_by_line):
273 def find(cls, tokens_by_line):
272 """Find the first system assignment (a = !foo) in the cell.
274 """Find the first system assignment (a = !foo) in the cell.
273 """
275 """
274 for line in tokens_by_line:
276 for line in tokens_by_line:
275 assign_ix = _find_assign_op(line)
277 assign_ix = _find_assign_op(line)
276 if (assign_ix is not None) \
278 if (assign_ix is not None) \
277 and not line[assign_ix].line.strip().startswith('=') \
279 and not line[assign_ix].line.strip().startswith('=') \
278 and (len(line) >= assign_ix + 2) \
280 and (len(line) >= assign_ix + 2) \
279 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
281 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
280 ix = assign_ix + 1
282 ix = assign_ix + 1
281
283
282 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
284 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
283 if line[ix].string == '!':
285 if line[ix].string == '!':
284 return cls(line[ix].start)
286 return cls(line[ix].start)
285 elif not line[ix].string.isspace():
287 elif not line[ix].string.isspace():
286 break
288 break
287 ix += 1
289 ix += 1
288
290
289 def transform(self, lines: List[str]):
291 def transform(self, lines: List[str]):
290 """Transform a system assignment found by the ``find()`` classmethod.
292 """Transform a system assignment found by the ``find()`` classmethod.
291 """
293 """
292 start_line, start_col = self.start_line, self.start_col
294 start_line, start_col = self.start_line, self.start_col
293
295
294 lhs = lines[start_line][:start_col]
296 lhs = lines[start_line][:start_col]
295 end_line = find_end_of_continued_line(lines, start_line)
297 end_line = find_end_of_continued_line(lines, start_line)
296 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
298 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
297 assert rhs.startswith('!'), rhs
299 assert rhs.startswith('!'), rhs
298 cmd = rhs[1:]
300 cmd = rhs[1:]
299
301
300 lines_before = lines[:start_line]
302 lines_before = lines[:start_line]
301 call = "get_ipython().getoutput({!r})".format(cmd)
303 call = "get_ipython().getoutput({!r})".format(cmd)
302 new_line = lhs + call + '\n'
304 new_line = lhs + call + '\n'
303 lines_after = lines[end_line + 1:]
305 lines_after = lines[end_line + 1:]
304
306
305 return lines_before + [new_line] + lines_after
307 return lines_before + [new_line] + lines_after
306
308
307 # The escape sequences that define the syntax transformations IPython will
309 # The escape sequences that define the syntax transformations IPython will
308 # apply to user input. These can NOT be just changed here: many regular
310 # apply to user input. These can NOT be just changed here: many regular
309 # expressions and other parts of the code may use their hardcoded values, and
311 # expressions and other parts of the code may use their hardcoded values, and
310 # for all intents and purposes they constitute the 'IPython syntax', so they
312 # for all intents and purposes they constitute the 'IPython syntax', so they
311 # should be considered fixed.
313 # should be considered fixed.
312
314
313 ESC_SHELL = '!' # Send line to underlying system shell
315 ESC_SHELL = '!' # Send line to underlying system shell
314 ESC_SH_CAP = '!!' # Send line to system shell and capture output
316 ESC_SH_CAP = '!!' # Send line to system shell and capture output
315 ESC_HELP = '?' # Find information about object
317 ESC_HELP = '?' # Find information about object
316 ESC_HELP2 = '??' # Find extra-detailed information about object
318 ESC_HELP2 = '??' # Find extra-detailed information about object
317 ESC_MAGIC = '%' # Call magic function
319 ESC_MAGIC = '%' # Call magic function
318 ESC_MAGIC2 = '%%' # Call cell-magic function
320 ESC_MAGIC2 = '%%' # Call cell-magic function
319 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
321 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
320 ESC_QUOTE2 = ';' # Quote all args as a single string, call
322 ESC_QUOTE2 = ';' # Quote all args as a single string, call
321 ESC_PAREN = '/' # Call first argument with rest of line as arguments
323 ESC_PAREN = '/' # Call first argument with rest of line as arguments
322
324
323 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
325 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
324 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
326 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
325
327
326 def _make_help_call(target, esc, next_input=None):
328 def _make_help_call(target, esc, next_input=None):
327 """Prepares a pinfo(2)/psearch call from a target name and the escape
329 """Prepares a pinfo(2)/psearch call from a target name and the escape
328 (i.e. ? or ??)"""
330 (i.e. ? or ??)"""
329 method = 'pinfo2' if esc == '??' \
331 method = 'pinfo2' if esc == '??' \
330 else 'psearch' if '*' in target \
332 else 'psearch' if '*' in target \
331 else 'pinfo'
333 else 'pinfo'
332 arg = " ".join([method, target])
334 arg = " ".join([method, target])
333 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
335 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
334 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
336 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
335 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
337 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
336 if next_input is None:
338 if next_input is None:
337 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
339 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
338 else:
340 else:
339 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
341 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
340 (next_input, t_magic_name, t_magic_arg_s)
342 (next_input, t_magic_name, t_magic_arg_s)
341
343
342 def _tr_help(content):
344 def _tr_help(content):
343 """Translate lines escaped with: ?
345 """Translate lines escaped with: ?
344
346
345 A naked help line should fire the intro help screen (shell.show_usage())
347 A naked help line should fire the intro help screen (shell.show_usage())
346 """
348 """
347 if not content:
349 if not content:
348 return 'get_ipython().show_usage()'
350 return 'get_ipython().show_usage()'
349
351
350 return _make_help_call(content, '?')
352 return _make_help_call(content, '?')
351
353
352 def _tr_help2(content):
354 def _tr_help2(content):
353 """Translate lines escaped with: ??
355 """Translate lines escaped with: ??
354
356
355 A naked help line should fire the intro help screen (shell.show_usage())
357 A naked help line should fire the intro help screen (shell.show_usage())
356 """
358 """
357 if not content:
359 if not content:
358 return 'get_ipython().show_usage()'
360 return 'get_ipython().show_usage()'
359
361
360 return _make_help_call(content, '??')
362 return _make_help_call(content, '??')
361
363
362 def _tr_magic(content):
364 def _tr_magic(content):
363 "Translate lines escaped with a percent sign: %"
365 "Translate lines escaped with a percent sign: %"
364 name, _, args = content.partition(' ')
366 name, _, args = content.partition(' ')
365 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
367 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
366
368
367 def _tr_quote(content):
369 def _tr_quote(content):
368 "Translate lines escaped with a comma: ,"
370 "Translate lines escaped with a comma: ,"
369 name, _, args = content.partition(' ')
371 name, _, args = content.partition(' ')
370 return '%s("%s")' % (name, '", "'.join(args.split()) )
372 return '%s("%s")' % (name, '", "'.join(args.split()) )
371
373
372 def _tr_quote2(content):
374 def _tr_quote2(content):
373 "Translate lines escaped with a semicolon: ;"
375 "Translate lines escaped with a semicolon: ;"
374 name, _, args = content.partition(' ')
376 name, _, args = content.partition(' ')
375 return '%s("%s")' % (name, args)
377 return '%s("%s")' % (name, args)
376
378
377 def _tr_paren(content):
379 def _tr_paren(content):
378 "Translate lines escaped with a slash: /"
380 "Translate lines escaped with a slash: /"
379 name, _, args = content.partition(' ')
381 name, _, args = content.partition(' ')
380 return '%s(%s)' % (name, ", ".join(args.split()))
382 return '%s(%s)' % (name, ", ".join(args.split()))
381
383
382 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
384 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
383 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
385 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
384 ESC_HELP : _tr_help,
386 ESC_HELP : _tr_help,
385 ESC_HELP2 : _tr_help2,
387 ESC_HELP2 : _tr_help2,
386 ESC_MAGIC : _tr_magic,
388 ESC_MAGIC : _tr_magic,
387 ESC_QUOTE : _tr_quote,
389 ESC_QUOTE : _tr_quote,
388 ESC_QUOTE2 : _tr_quote2,
390 ESC_QUOTE2 : _tr_quote2,
389 ESC_PAREN : _tr_paren }
391 ESC_PAREN : _tr_paren }
390
392
391 class EscapedCommand(TokenTransformBase):
393 class EscapedCommand(TokenTransformBase):
392 """Transformer for escaped commands like %foo, !foo, or /foo"""
394 """Transformer for escaped commands like %foo, !foo, or /foo"""
393 @classmethod
395 @classmethod
394 def find(cls, tokens_by_line):
396 def find(cls, tokens_by_line):
395 """Find the first escaped command (%foo, !foo, etc.) in the cell.
397 """Find the first escaped command (%foo, !foo, etc.) in the cell.
396 """
398 """
397 for line in tokens_by_line:
399 for line in tokens_by_line:
398 if not line:
400 if not line:
399 continue
401 continue
400 ix = 0
402 ix = 0
401 ll = len(line)
403 ll = len(line)
402 while ll > ix and line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
404 while ll > ix and line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
403 ix += 1
405 ix += 1
404 if ix >= ll:
406 if ix >= ll:
405 continue
407 continue
406 if line[ix].string in ESCAPE_SINGLES:
408 if line[ix].string in ESCAPE_SINGLES:
407 return cls(line[ix].start)
409 return cls(line[ix].start)
408
410
409 def transform(self, lines):
411 def transform(self, lines):
410 """Transform an escaped line found by the ``find()`` classmethod.
412 """Transform an escaped line found by the ``find()`` classmethod.
411 """
413 """
412 start_line, start_col = self.start_line, self.start_col
414 start_line, start_col = self.start_line, self.start_col
413
415
414 indent = lines[start_line][:start_col]
416 indent = lines[start_line][:start_col]
415 end_line = find_end_of_continued_line(lines, start_line)
417 end_line = find_end_of_continued_line(lines, start_line)
416 line = assemble_continued_line(lines, (start_line, start_col), end_line)
418 line = assemble_continued_line(lines, (start_line, start_col), end_line)
417
419
418 if len(line) > 1 and line[:2] in ESCAPE_DOUBLES:
420 if len(line) > 1 and line[:2] in ESCAPE_DOUBLES:
419 escape, content = line[:2], line[2:]
421 escape, content = line[:2], line[2:]
420 else:
422 else:
421 escape, content = line[:1], line[1:]
423 escape, content = line[:1], line[1:]
422
424
423 if escape in tr:
425 if escape in tr:
424 call = tr[escape](content)
426 call = tr[escape](content)
425 else:
427 else:
426 call = ''
428 call = ''
427
429
428 lines_before = lines[:start_line]
430 lines_before = lines[:start_line]
429 new_line = indent + call + '\n'
431 new_line = indent + call + '\n'
430 lines_after = lines[end_line + 1:]
432 lines_after = lines[end_line + 1:]
431
433
432 return lines_before + [new_line] + lines_after
434 return lines_before + [new_line] + lines_after
433
435
434 _help_end_re = re.compile(r"""(%{0,2}
436 _help_end_re = re.compile(r"""(%{0,2}
435 (?!\d)[\w*]+ # Variable name
437 (?!\d)[\w*]+ # Variable name
436 (\.(?!\d)[\w*]+)* # .etc.etc
438 (\.(?!\d)[\w*]+)* # .etc.etc
437 )
439 )
438 (\?\??)$ # ? or ??
440 (\?\??)$ # ? or ??
439 """,
441 """,
440 re.VERBOSE)
442 re.VERBOSE)
441
443
442 class HelpEnd(TokenTransformBase):
444 class HelpEnd(TokenTransformBase):
443 """Transformer for help syntax: obj? and obj??"""
445 """Transformer for help syntax: obj? and obj??"""
444 # This needs to be higher priority (lower number) than EscapedCommand so
446 # This needs to be higher priority (lower number) than EscapedCommand so
445 # that inspecting magics (%foo?) works.
447 # that inspecting magics (%foo?) works.
446 priority = 5
448 priority = 5
447
449
448 def __init__(self, start, q_locn):
450 def __init__(self, start, q_locn):
449 super().__init__(start)
451 super().__init__(start)
450 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
452 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
451 self.q_col = q_locn[1]
453 self.q_col = q_locn[1]
452
454
453 @classmethod
455 @classmethod
454 def find(cls, tokens_by_line):
456 def find(cls, tokens_by_line):
455 """Find the first help command (foo?) in the cell.
457 """Find the first help command (foo?) in the cell.
456 """
458 """
457 for line in tokens_by_line:
459 for line in tokens_by_line:
458 # Last token is NEWLINE; look at last but one
460 # Last token is NEWLINE; look at last but one
459 if len(line) > 2 and line[-2].string == '?':
461 if len(line) > 2 and line[-2].string == '?':
460 # Find the first token that's not INDENT/DEDENT
462 # Find the first token that's not INDENT/DEDENT
461 ix = 0
463 ix = 0
462 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
464 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
463 ix += 1
465 ix += 1
464 return cls(line[ix].start, line[-2].start)
466 return cls(line[ix].start, line[-2].start)
465
467
466 def transform(self, lines):
468 def transform(self, lines):
467 """Transform a help command found by the ``find()`` classmethod.
469 """Transform a help command found by the ``find()`` classmethod.
468 """
470 """
469 piece = ''.join(lines[self.start_line:self.q_line+1])
471 piece = ''.join(lines[self.start_line:self.q_line+1])
470 indent, content = piece[:self.start_col], piece[self.start_col:]
472 indent, content = piece[:self.start_col], piece[self.start_col:]
471 lines_before = lines[:self.start_line]
473 lines_before = lines[:self.start_line]
472 lines_after = lines[self.q_line + 1:]
474 lines_after = lines[self.q_line + 1:]
473
475
474 m = _help_end_re.search(content)
476 m = _help_end_re.search(content)
475 if not m:
477 if not m:
476 raise SyntaxError(content)
478 raise SyntaxError(content)
477 assert m is not None, content
479 assert m is not None, content
478 target = m.group(1)
480 target = m.group(1)
479 esc = m.group(3)
481 esc = m.group(3)
480
482
481 # If we're mid-command, put it back on the next prompt for the user.
483 # If we're mid-command, put it back on the next prompt for the user.
482 next_input = None
484 next_input = None
483 if (not lines_before) and (not lines_after) \
485 if (not lines_before) and (not lines_after) \
484 and content.strip() != m.group(0):
486 and content.strip() != m.group(0):
485 next_input = content.rstrip('?\n')
487 next_input = content.rstrip('?\n')
486
488
487 call = _make_help_call(target, esc, next_input=next_input)
489 call = _make_help_call(target, esc, next_input=next_input)
488 new_line = indent + call + '\n'
490 new_line = indent + call + '\n'
489
491
490 return lines_before + [new_line] + lines_after
492 return lines_before + [new_line] + lines_after
491
493
492 def make_tokens_by_line(lines:List[str]):
494 def make_tokens_by_line(lines:List[str]):
493 """Tokenize a series of lines and group tokens by line.
495 """Tokenize a series of lines and group tokens by line.
494
496
495 The tokens for a multiline Python string or expression are grouped as one
497 The tokens for a multiline Python string or expression are grouped as one
496 line. All lines except the last lines should keep their line ending ('\\n',
498 line. All lines except the last lines should keep their line ending ('\\n',
497 '\\r\\n') for this to properly work. Use `.splitlines(keeplineending=True)`
499 '\\r\\n') for this to properly work. Use `.splitlines(keeplineending=True)`
498 for example when passing block of text to this function.
500 for example when passing block of text to this function.
499
501
500 """
502 """
501 # NL tokens are used inside multiline expressions, but also after blank
503 # NL tokens are used inside multiline expressions, but also after blank
502 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
504 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
503 # We want to group the former case together but split the latter, so we
505 # We want to group the former case together but split the latter, so we
504 # track parentheses level, similar to the internals of tokenize.
506 # track parentheses level, similar to the internals of tokenize.
505
507
506 # reexported from token on 3.7+
508 # reexported from token on 3.7+
507 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL # type: ignore
509 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL # type: ignore
508 tokens_by_line:List[List[Any]] = [[]]
510 tokens_by_line:List[List[Any]] = [[]]
509 if len(lines) > 1 and not lines[0].endswith(('\n', '\r', '\r\n', '\x0b', '\x0c')):
511 if len(lines) > 1 and not lines[0].endswith(('\n', '\r', '\r\n', '\x0b', '\x0c')):
510 warnings.warn("`make_tokens_by_line` received a list of lines which do not have lineending markers ('\\n', '\\r', '\\r\\n', '\\x0b', '\\x0c'), behavior will be unspecified")
512 warnings.warn("`make_tokens_by_line` received a list of lines which do not have lineending markers ('\\n', '\\r', '\\r\\n', '\\x0b', '\\x0c'), behavior will be unspecified")
511 parenlev = 0
513 parenlev = 0
512 try:
514 try:
513 for token in tokenize.generate_tokens(iter(lines).__next__):
515 for token in tokenize.generate_tokens(iter(lines).__next__):
514 tokens_by_line[-1].append(token)
516 tokens_by_line[-1].append(token)
515 if (token.type == NEWLINE) \
517 if (token.type == NEWLINE) \
516 or ((token.type == NL) and (parenlev <= 0)):
518 or ((token.type == NL) and (parenlev <= 0)):
517 tokens_by_line.append([])
519 tokens_by_line.append([])
518 elif token.string in {'(', '[', '{'}:
520 elif token.string in {'(', '[', '{'}:
519 parenlev += 1
521 parenlev += 1
520 elif token.string in {')', ']', '}'}:
522 elif token.string in {')', ']', '}'}:
521 if parenlev > 0:
523 if parenlev > 0:
522 parenlev -= 1
524 parenlev -= 1
523 except tokenize.TokenError:
525 except tokenize.TokenError:
524 # Input ended in a multiline string or expression. That's OK for us.
526 # Input ended in a multiline string or expression. That's OK for us.
525 pass
527 pass
526
528
527
529
528 if not tokens_by_line[-1]:
530 if not tokens_by_line[-1]:
529 tokens_by_line.pop()
531 tokens_by_line.pop()
530
532
531
533
532 return tokens_by_line
534 return tokens_by_line
533
535
534
536
535 def has_sunken_brackets(tokens: List[tokenize.TokenInfo]):
537 def has_sunken_brackets(tokens: List[tokenize.TokenInfo]):
536 """Check if the depth of brackets in the list of tokens drops below 0"""
538 """Check if the depth of brackets in the list of tokens drops below 0"""
537 parenlev = 0
539 parenlev = 0
538 for token in tokens:
540 for token in tokens:
539 if token.string in {"(", "[", "{"}:
541 if token.string in {"(", "[", "{"}:
540 parenlev += 1
542 parenlev += 1
541 elif token.string in {")", "]", "}"}:
543 elif token.string in {")", "]", "}"}:
542 parenlev -= 1
544 parenlev -= 1
543 if parenlev < 0:
545 if parenlev < 0:
544 return True
546 return True
545 return False
547 return False
546
548
547
549
548 def show_linewise_tokens(s: str):
550 def show_linewise_tokens(s: str):
549 """For investigation and debugging"""
551 """For investigation and debugging"""
550 if not s.endswith('\n'):
552 if not s.endswith('\n'):
551 s += '\n'
553 s += '\n'
552 lines = s.splitlines(keepends=True)
554 lines = s.splitlines(keepends=True)
553 for line in make_tokens_by_line(lines):
555 for line in make_tokens_by_line(lines):
554 print("Line -------")
556 print("Line -------")
555 for tokinfo in line:
557 for tokinfo in line:
556 print(" ", tokinfo)
558 print(" ", tokinfo)
557
559
558 # Arbitrary limit to prevent getting stuck in infinite loops
560 # Arbitrary limit to prevent getting stuck in infinite loops
559 TRANSFORM_LOOP_LIMIT = 500
561 TRANSFORM_LOOP_LIMIT = 500
560
562
561 class TransformerManager:
563 class TransformerManager:
562 """Applies various transformations to a cell or code block.
564 """Applies various transformations to a cell or code block.
563
565
564 The key methods for external use are ``transform_cell()``
566 The key methods for external use are ``transform_cell()``
565 and ``check_complete()``.
567 and ``check_complete()``.
566 """
568 """
567 def __init__(self):
569 def __init__(self):
568 self.cleanup_transforms = [
570 self.cleanup_transforms = [
569 leading_empty_lines,
571 leading_empty_lines,
570 leading_indent,
572 leading_indent,
571 classic_prompt,
573 classic_prompt,
572 ipython_prompt,
574 ipython_prompt,
573 ]
575 ]
574 self.line_transforms = [
576 self.line_transforms = [
575 cell_magic,
577 cell_magic,
576 ]
578 ]
577 self.token_transformers = [
579 self.token_transformers = [
578 MagicAssign,
580 MagicAssign,
579 SystemAssign,
581 SystemAssign,
580 EscapedCommand,
582 EscapedCommand,
581 HelpEnd,
583 HelpEnd,
582 ]
584 ]
583
585
584 def do_one_token_transform(self, lines):
586 def do_one_token_transform(self, lines):
585 """Find and run the transform earliest in the code.
587 """Find and run the transform earliest in the code.
586
588
587 Returns (changed, lines).
589 Returns (changed, lines).
588
590
589 This method is called repeatedly until changed is False, indicating
591 This method is called repeatedly until changed is False, indicating
590 that all available transformations are complete.
592 that all available transformations are complete.
591
593
592 The tokens following IPython special syntax might not be valid, so
594 The tokens following IPython special syntax might not be valid, so
593 the transformed code is retokenised every time to identify the next
595 the transformed code is retokenised every time to identify the next
594 piece of special syntax. Hopefully long code cells are mostly valid
596 piece of special syntax. Hopefully long code cells are mostly valid
595 Python, not using lots of IPython special syntax, so this shouldn't be
597 Python, not using lots of IPython special syntax, so this shouldn't be
596 a performance issue.
598 a performance issue.
597 """
599 """
598 tokens_by_line = make_tokens_by_line(lines)
600 tokens_by_line = make_tokens_by_line(lines)
599 candidates = []
601 candidates = []
600 for transformer_cls in self.token_transformers:
602 for transformer_cls in self.token_transformers:
601 transformer = transformer_cls.find(tokens_by_line)
603 transformer = transformer_cls.find(tokens_by_line)
602 if transformer:
604 if transformer:
603 candidates.append(transformer)
605 candidates.append(transformer)
604
606
605 if not candidates:
607 if not candidates:
606 # Nothing to transform
608 # Nothing to transform
607 return False, lines
609 return False, lines
608 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
610 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
609 for transformer in ordered_transformers:
611 for transformer in ordered_transformers:
610 try:
612 try:
611 return True, transformer.transform(lines)
613 return True, transformer.transform(lines)
612 except SyntaxError:
614 except SyntaxError:
613 pass
615 pass
614 return False, lines
616 return False, lines
615
617
616 def do_token_transforms(self, lines):
618 def do_token_transforms(self, lines):
617 for _ in range(TRANSFORM_LOOP_LIMIT):
619 for _ in range(TRANSFORM_LOOP_LIMIT):
618 changed, lines = self.do_one_token_transform(lines)
620 changed, lines = self.do_one_token_transform(lines)
619 if not changed:
621 if not changed:
620 return lines
622 return lines
621
623
622 raise RuntimeError("Input transformation still changing after "
624 raise RuntimeError("Input transformation still changing after "
623 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
625 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
624
626
625 def transform_cell(self, cell: str) -> str:
627 def transform_cell(self, cell: str) -> str:
626 """Transforms a cell of input code"""
628 """Transforms a cell of input code"""
627 if not cell.endswith('\n'):
629 if not cell.endswith('\n'):
628 cell += '\n' # Ensure the cell has a trailing newline
630 cell += '\n' # Ensure the cell has a trailing newline
629 lines = cell.splitlines(keepends=True)
631 lines = cell.splitlines(keepends=True)
630 for transform in self.cleanup_transforms + self.line_transforms:
632 for transform in self.cleanup_transforms + self.line_transforms:
631 lines = transform(lines)
633 lines = transform(lines)
632
634
633 lines = self.do_token_transforms(lines)
635 lines = self.do_token_transforms(lines)
634 return ''.join(lines)
636 return ''.join(lines)
635
637
636 def check_complete(self, cell: str):
638 def check_complete(self, cell: str):
637 """Return whether a block of code is ready to execute, or should be continued
639 """Return whether a block of code is ready to execute, or should be continued
638
640
639 Parameters
641 Parameters
640 ----------
642 ----------
641 source : string
643 source : string
642 Python input code, which can be multiline.
644 Python input code, which can be multiline.
643
645
644 Returns
646 Returns
645 -------
647 -------
646 status : str
648 status : str
647 One of 'complete', 'incomplete', or 'invalid' if source is not a
649 One of 'complete', 'incomplete', or 'invalid' if source is not a
648 prefix of valid code.
650 prefix of valid code.
649 indent_spaces : int or None
651 indent_spaces : int or None
650 The number of spaces by which to indent the next line of code. If
652 The number of spaces by which to indent the next line of code. If
651 status is not 'incomplete', this is None.
653 status is not 'incomplete', this is None.
652 """
654 """
653 # Remember if the lines ends in a new line.
655 # Remember if the lines ends in a new line.
654 ends_with_newline = False
656 ends_with_newline = False
655 for character in reversed(cell):
657 for character in reversed(cell):
656 if character == '\n':
658 if character == '\n':
657 ends_with_newline = True
659 ends_with_newline = True
658 break
660 break
659 elif character.strip():
661 elif character.strip():
660 break
662 break
661 else:
663 else:
662 continue
664 continue
663
665
664 if not ends_with_newline:
666 if not ends_with_newline:
665 # Append an newline for consistent tokenization
667 # Append an newline for consistent tokenization
666 # See https://bugs.python.org/issue33899
668 # See https://bugs.python.org/issue33899
667 cell += '\n'
669 cell += '\n'
668
670
669 lines = cell.splitlines(keepends=True)
671 lines = cell.splitlines(keepends=True)
670
672
671 if not lines:
673 if not lines:
672 return 'complete', None
674 return 'complete', None
673
675
674 if lines[-1].endswith('\\'):
676 if lines[-1].endswith('\\'):
675 # Explicit backslash continuation
677 # Explicit backslash continuation
676 return 'incomplete', find_last_indent(lines)
678 return 'incomplete', find_last_indent(lines)
677
679
678 try:
680 try:
679 for transform in self.cleanup_transforms:
681 for transform in self.cleanup_transforms:
680 if not getattr(transform, 'has_side_effects', False):
682 if not getattr(transform, 'has_side_effects', False):
681 lines = transform(lines)
683 lines = transform(lines)
682 except SyntaxError:
684 except SyntaxError:
683 return 'invalid', None
685 return 'invalid', None
684
686
685 if lines[0].startswith('%%'):
687 if lines[0].startswith('%%'):
686 # Special case for cell magics - completion marked by blank line
688 # Special case for cell magics - completion marked by blank line
687 if lines[-1].strip():
689 if lines[-1].strip():
688 return 'incomplete', find_last_indent(lines)
690 return 'incomplete', find_last_indent(lines)
689 else:
691 else:
690 return 'complete', None
692 return 'complete', None
691
693
692 try:
694 try:
693 for transform in self.line_transforms:
695 for transform in self.line_transforms:
694 if not getattr(transform, 'has_side_effects', False):
696 if not getattr(transform, 'has_side_effects', False):
695 lines = transform(lines)
697 lines = transform(lines)
696 lines = self.do_token_transforms(lines)
698 lines = self.do_token_transforms(lines)
697 except SyntaxError:
699 except SyntaxError:
698 return 'invalid', None
700 return 'invalid', None
699
701
700 tokens_by_line = make_tokens_by_line(lines)
702 tokens_by_line = make_tokens_by_line(lines)
701
703
702 # Bail if we got one line and there are more closing parentheses than
704 # Bail if we got one line and there are more closing parentheses than
703 # the opening ones
705 # the opening ones
704 if (
706 if (
705 len(lines) == 1
707 len(lines) == 1
706 and tokens_by_line
708 and tokens_by_line
707 and has_sunken_brackets(tokens_by_line[0])
709 and has_sunken_brackets(tokens_by_line[0])
708 ):
710 ):
709 return "invalid", None
711 return "invalid", None
710
712
711 if not tokens_by_line:
713 if not tokens_by_line:
712 return 'incomplete', find_last_indent(lines)
714 return 'incomplete', find_last_indent(lines)
713
715
714 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
716 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
715 # We're in a multiline string or expression
717 # We're in a multiline string or expression
716 return 'incomplete', find_last_indent(lines)
718 return 'incomplete', find_last_indent(lines)
717
719
718 newline_types = {tokenize.NEWLINE, tokenize.COMMENT, tokenize.ENDMARKER} # type: ignore
720 newline_types = {tokenize.NEWLINE, tokenize.COMMENT, tokenize.ENDMARKER} # type: ignore
719
721
720 # Pop the last line which only contains DEDENTs and ENDMARKER
722 # Pop the last line which only contains DEDENTs and ENDMARKER
721 last_token_line = None
723 last_token_line = None
722 if {t.type for t in tokens_by_line[-1]} in [
724 if {t.type for t in tokens_by_line[-1]} in [
723 {tokenize.DEDENT, tokenize.ENDMARKER},
725 {tokenize.DEDENT, tokenize.ENDMARKER},
724 {tokenize.ENDMARKER}
726 {tokenize.ENDMARKER}
725 ] and len(tokens_by_line) > 1:
727 ] and len(tokens_by_line) > 1:
726 last_token_line = tokens_by_line.pop()
728 last_token_line = tokens_by_line.pop()
727
729
728 while tokens_by_line[-1] and tokens_by_line[-1][-1].type in newline_types:
730 while tokens_by_line[-1] and tokens_by_line[-1][-1].type in newline_types:
729 tokens_by_line[-1].pop()
731 tokens_by_line[-1].pop()
730
732
731 if not tokens_by_line[-1]:
733 if not tokens_by_line[-1]:
732 return 'incomplete', find_last_indent(lines)
734 return 'incomplete', find_last_indent(lines)
733
735
734 if tokens_by_line[-1][-1].string == ':':
736 if tokens_by_line[-1][-1].string == ':':
735 # The last line starts a block (e.g. 'if foo:')
737 # The last line starts a block (e.g. 'if foo:')
736 ix = 0
738 ix = 0
737 while tokens_by_line[-1][ix].type in {tokenize.INDENT, tokenize.DEDENT}:
739 while tokens_by_line[-1][ix].type in {tokenize.INDENT, tokenize.DEDENT}:
738 ix += 1
740 ix += 1
739
741
740 indent = tokens_by_line[-1][ix].start[1]
742 indent = tokens_by_line[-1][ix].start[1]
741 return 'incomplete', indent + 4
743 return 'incomplete', indent + 4
742
744
743 if tokens_by_line[-1][0].line.endswith('\\'):
745 if tokens_by_line[-1][0].line.endswith('\\'):
744 return 'incomplete', None
746 return 'incomplete', None
745
747
746 # At this point, our checks think the code is complete (or invalid).
748 # At this point, our checks think the code is complete (or invalid).
747 # We'll use codeop.compile_command to check this with the real parser
749 # We'll use codeop.compile_command to check this with the real parser
748 try:
750 try:
749 with warnings.catch_warnings():
751 with warnings.catch_warnings():
750 warnings.simplefilter('error', SyntaxWarning)
752 warnings.simplefilter('error', SyntaxWarning)
751 res = compile_command(''.join(lines), symbol='exec')
753 res = compile_command(''.join(lines), symbol='exec')
752 except (SyntaxError, OverflowError, ValueError, TypeError,
754 except (SyntaxError, OverflowError, ValueError, TypeError,
753 MemoryError, SyntaxWarning):
755 MemoryError, SyntaxWarning):
754 return 'invalid', None
756 return 'invalid', None
755 else:
757 else:
756 if res is None:
758 if res is None:
757 return 'incomplete', find_last_indent(lines)
759 return 'incomplete', find_last_indent(lines)
758
760
759 if last_token_line and last_token_line[0].type == tokenize.DEDENT:
761 if last_token_line and last_token_line[0].type == tokenize.DEDENT:
760 if ends_with_newline:
762 if ends_with_newline:
761 return 'complete', None
763 return 'complete', None
762 return 'incomplete', find_last_indent(lines)
764 return 'incomplete', find_last_indent(lines)
763
765
764 # If there's a blank line at the end, assume we're ready to execute
766 # If there's a blank line at the end, assume we're ready to execute
765 if not lines[-1].strip():
767 if not lines[-1].strip():
766 return 'complete', None
768 return 'complete', None
767
769
768 return 'complete', None
770 return 'complete', None
769
771
770
772
771 def find_last_indent(lines):
773 def find_last_indent(lines):
772 m = _indent_re.match(lines[-1])
774 m = _indent_re.match(lines[-1])
773 if not m:
775 if not m:
774 return 0
776 return 0
775 return len(m.group(0).replace('\t', ' '*4))
777 return len(m.group(0).replace('\t', ' '*4))
778
779
780 class MaybeAsyncCommandCompiler(CommandCompiler):
781 def __init__(self, extra_flags=0):
782 self.compiler = self._compiler
783 self.extra_flags = extra_flags
784
785 def _compiler(self, source, filename, symbol, flags, feature):
786 flags |= self.extra_flags
787 return compile(source, filename, symbol, flags, feature)
788
789 if (sys.version_info.major, sys.version_info.minor) >= (3, 8):
790 _extra_flags = ast.PyCF_ALLOW_TOP_LEVEL_AWAIT
791 else:
792 _extra_flags = ast.PyCF_ONLY_AST
793
794 compile_command = MaybeAsyncCommandCompiler(extra_flags=_extra_flags)
General Comments 0
You need to be logged in to leave comments. Login now