##// END OF EJS Templates
Prevent infinite loops in input transformation
Thomas Kluyver -
Show More
@@ -1,558 +1,564 b''
1 """Input transformer machinery to support IPython special syntax.
1 """Input transformer machinery to support IPython special syntax.
2
2
3 This includes the machinery to recognise and transform ``%magic`` commands,
3 This includes the machinery to recognise and transform ``%magic`` commands,
4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5 """
5 """
6
6
7 # Copyright (c) IPython Development Team.
7 # Copyright (c) IPython Development Team.
8 # Distributed under the terms of the Modified BSD License.
8 # Distributed under the terms of the Modified BSD License.
9
9
10 from codeop import compile_command
10 from codeop import compile_command
11 import re
11 import re
12 import tokenize
12 import tokenize
13 from typing import List, Tuple
13 from typing import List, Tuple
14 import warnings
14 import warnings
15
15
16 _indent_re = re.compile(r'^[ \t]+')
16 _indent_re = re.compile(r'^[ \t]+')
17
17
18 def leading_indent(lines):
18 def leading_indent(lines):
19 """Remove leading indentation.
19 """Remove leading indentation.
20
20
21 If the first line starts with a spaces or tabs, the same whitespace will be
21 If the first line starts with a spaces or tabs, the same whitespace will be
22 removed from each following line.
22 removed from each following line.
23 """
23 """
24 m = _indent_re.match(lines[0])
24 m = _indent_re.match(lines[0])
25 if not m:
25 if not m:
26 return lines
26 return lines
27 space = m.group(0)
27 space = m.group(0)
28 n = len(space)
28 n = len(space)
29 return [l[n:] if l.startswith(space) else l
29 return [l[n:] if l.startswith(space) else l
30 for l in lines]
30 for l in lines]
31
31
32 class PromptStripper:
32 class PromptStripper:
33 """Remove matching input prompts from a block of input.
33 """Remove matching input prompts from a block of input.
34
34
35 Parameters
35 Parameters
36 ----------
36 ----------
37 prompt_re : regular expression
37 prompt_re : regular expression
38 A regular expression matching any input prompt (including continuation)
38 A regular expression matching any input prompt (including continuation)
39 initial_re : regular expression, optional
39 initial_re : regular expression, optional
40 A regular expression matching only the initial prompt, but not continuation.
40 A regular expression matching only the initial prompt, but not continuation.
41 If no initial expression is given, prompt_re will be used everywhere.
41 If no initial expression is given, prompt_re will be used everywhere.
42 Used mainly for plain Python prompts, where the continuation prompt
42 Used mainly for plain Python prompts, where the continuation prompt
43 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
43 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
44
44
45 If initial_re and prompt_re differ,
45 If initial_re and prompt_re differ,
46 only initial_re will be tested against the first line.
46 only initial_re will be tested against the first line.
47 If any prompt is found on the first two lines,
47 If any prompt is found on the first two lines,
48 prompts will be stripped from the rest of the block.
48 prompts will be stripped from the rest of the block.
49 """
49 """
50 def __init__(self, prompt_re, initial_re=None):
50 def __init__(self, prompt_re, initial_re=None):
51 self.prompt_re = prompt_re
51 self.prompt_re = prompt_re
52 self.initial_re = initial_re or prompt_re
52 self.initial_re = initial_re or prompt_re
53
53
54 def _strip(self, lines):
54 def _strip(self, lines):
55 return [self.prompt_re.sub('', l, count=1) for l in lines]
55 return [self.prompt_re.sub('', l, count=1) for l in lines]
56
56
57 def __call__(self, lines):
57 def __call__(self, lines):
58 if self.initial_re.match(lines[0]) or \
58 if self.initial_re.match(lines[0]) or \
59 (len(lines) > 1 and self.prompt_re.match(lines[1])):
59 (len(lines) > 1 and self.prompt_re.match(lines[1])):
60 return self._strip(lines)
60 return self._strip(lines)
61 return lines
61 return lines
62
62
63 classic_prompt = PromptStripper(
63 classic_prompt = PromptStripper(
64 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
64 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
65 initial_re=re.compile(r'^>>>( |$)')
65 initial_re=re.compile(r'^>>>( |$)')
66 )
66 )
67
67
68 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
68 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
69
69
70 def cell_magic(lines):
70 def cell_magic(lines):
71 if not lines[0].startswith('%%'):
71 if not lines[0].startswith('%%'):
72 return lines
72 return lines
73 if re.match('%%\w+\?', lines[0]):
73 if re.match('%%\w+\?', lines[0]):
74 # This case will be handled by help_end
74 # This case will be handled by help_end
75 return lines
75 return lines
76 magic_name, _, first_line = lines[0][2:-1].partition(' ')
76 magic_name, _, first_line = lines[0][2:-1].partition(' ')
77 body = ''.join(lines[1:])
77 body = ''.join(lines[1:])
78 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
78 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
79 % (magic_name, first_line, body)]
79 % (magic_name, first_line, body)]
80
80
81 # -----
81 # -----
82
82
83 def _find_assign_op(token_line):
83 def _find_assign_op(token_line):
84 # Get the index of the first assignment in the line ('=' not inside brackets)
84 # Get the index of the first assignment in the line ('=' not inside brackets)
85 # We don't try to support multiple special assignment (a = b = %foo)
85 # We don't try to support multiple special assignment (a = b = %foo)
86 paren_level = 0
86 paren_level = 0
87 for i, ti in enumerate(token_line):
87 for i, ti in enumerate(token_line):
88 s = ti.string
88 s = ti.string
89 if s == '=' and paren_level == 0:
89 if s == '=' and paren_level == 0:
90 return i
90 return i
91 if s in '([{':
91 if s in '([{':
92 paren_level += 1
92 paren_level += 1
93 elif s in ')]}':
93 elif s in ')]}':
94 if paren_level > 0:
94 if paren_level > 0:
95 paren_level -= 1
95 paren_level -= 1
96
96
97 def find_end_of_continued_line(lines, start_line: int):
97 def find_end_of_continued_line(lines, start_line: int):
98 """Find the last line of a line explicitly extended using backslashes.
98 """Find the last line of a line explicitly extended using backslashes.
99
99
100 Uses 0-indexed line numbers.
100 Uses 0-indexed line numbers.
101 """
101 """
102 end_line = start_line
102 end_line = start_line
103 while lines[end_line].endswith('\\\n'):
103 while lines[end_line].endswith('\\\n'):
104 end_line += 1
104 end_line += 1
105 if end_line >= len(lines):
105 if end_line >= len(lines):
106 break
106 break
107 return end_line
107 return end_line
108
108
109 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
109 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
110 """Assemble pieces of a continued line into a single line.
110 """Assemble pieces of a continued line into a single line.
111
111
112 Uses 0-indexed line numbers. *start* is (lineno, colno).
112 Uses 0-indexed line numbers. *start* is (lineno, colno).
113 """
113 """
114 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
114 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
115 return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline
115 return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline
116 + [parts[-1][:-1]]) # Strip newline from last line
116 + [parts[-1][:-1]]) # Strip newline from last line
117
117
118 class TokenTransformBase:
118 class TokenTransformBase:
119 # Lower numbers -> higher priority (for matches in the same location)
119 # Lower numbers -> higher priority (for matches in the same location)
120 priority = 10
120 priority = 10
121
121
122 def sortby(self):
122 def sortby(self):
123 return self.start_line, self.start_col, self.priority
123 return self.start_line, self.start_col, self.priority
124
124
125 def __init__(self, start):
125 def __init__(self, start):
126 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
126 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
127 self.start_col = start[1]
127 self.start_col = start[1]
128
128
129 def transform(self, lines: List[str]):
129 def transform(self, lines: List[str]):
130 raise NotImplementedError
130 raise NotImplementedError
131
131
132 class MagicAssign(TokenTransformBase):
132 class MagicAssign(TokenTransformBase):
133 @classmethod
133 @classmethod
134 def find(cls, tokens_by_line):
134 def find(cls, tokens_by_line):
135 """Find the first magic assignment (a = %foo) in the cell.
135 """Find the first magic assignment (a = %foo) in the cell.
136
136
137 Returns (line, column) of the % if found, or None. *line* is 1-indexed.
137 Returns (line, column) of the % if found, or None. *line* is 1-indexed.
138 """
138 """
139 for line in tokens_by_line:
139 for line in tokens_by_line:
140 assign_ix = _find_assign_op(line)
140 assign_ix = _find_assign_op(line)
141 if (assign_ix is not None) \
141 if (assign_ix is not None) \
142 and (len(line) >= assign_ix + 2) \
142 and (len(line) >= assign_ix + 2) \
143 and (line[assign_ix+1].string == '%') \
143 and (line[assign_ix+1].string == '%') \
144 and (line[assign_ix+2].type == tokenize.NAME):
144 and (line[assign_ix+2].type == tokenize.NAME):
145 return cls(line[assign_ix+1].start)
145 return cls(line[assign_ix+1].start)
146
146
147 def transform(self, lines: List[str]):
147 def transform(self, lines: List[str]):
148 """Transform a magic assignment found by find
148 """Transform a magic assignment found by find
149 """
149 """
150 start_line, start_col = self.start_line, self.start_col
150 start_line, start_col = self.start_line, self.start_col
151 lhs = lines[start_line][:start_col]
151 lhs = lines[start_line][:start_col]
152 end_line = find_end_of_continued_line(lines, start_line)
152 end_line = find_end_of_continued_line(lines, start_line)
153 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
153 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
154 assert rhs.startswith('%'), rhs
154 assert rhs.startswith('%'), rhs
155 magic_name, _, args = rhs[1:].partition(' ')
155 magic_name, _, args = rhs[1:].partition(' ')
156
156
157 lines_before = lines[:start_line]
157 lines_before = lines[:start_line]
158 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
158 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
159 new_line = lhs + call + '\n'
159 new_line = lhs + call + '\n'
160 lines_after = lines[end_line+1:]
160 lines_after = lines[end_line+1:]
161
161
162 return lines_before + [new_line] + lines_after
162 return lines_before + [new_line] + lines_after
163
163
164
164
165 class SystemAssign(TokenTransformBase):
165 class SystemAssign(TokenTransformBase):
166 @classmethod
166 @classmethod
167 def find(cls, tokens_by_line):
167 def find(cls, tokens_by_line):
168 """Find the first system assignment (a = !foo) in the cell.
168 """Find the first system assignment (a = !foo) in the cell.
169
169
170 Returns (line, column) of the ! if found, or None. *line* is 1-indexed.
170 Returns (line, column) of the ! if found, or None. *line* is 1-indexed.
171 """
171 """
172 for line in tokens_by_line:
172 for line in tokens_by_line:
173 assign_ix = _find_assign_op(line)
173 assign_ix = _find_assign_op(line)
174 if (assign_ix is not None) \
174 if (assign_ix is not None) \
175 and (len(line) >= assign_ix + 2) \
175 and (len(line) >= assign_ix + 2) \
176 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
176 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
177 ix = assign_ix + 1
177 ix = assign_ix + 1
178
178
179 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
179 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
180 if line[ix].string == '!':
180 if line[ix].string == '!':
181 return cls(line[ix].start)
181 return cls(line[ix].start)
182 elif not line[ix].string.isspace():
182 elif not line[ix].string.isspace():
183 break
183 break
184 ix += 1
184 ix += 1
185
185
186 def transform(self, lines: List[str]):
186 def transform(self, lines: List[str]):
187 """Transform a system assignment found by find
187 """Transform a system assignment found by find
188 """
188 """
189 start_line, start_col = self.start_line, self.start_col
189 start_line, start_col = self.start_line, self.start_col
190
190
191 lhs = lines[start_line][:start_col]
191 lhs = lines[start_line][:start_col]
192 end_line = find_end_of_continued_line(lines, start_line)
192 end_line = find_end_of_continued_line(lines, start_line)
193 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
193 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
194 assert rhs.startswith('!'), rhs
194 assert rhs.startswith('!'), rhs
195 cmd = rhs[1:]
195 cmd = rhs[1:]
196
196
197 lines_before = lines[:start_line]
197 lines_before = lines[:start_line]
198 call = "get_ipython().getoutput({!r})".format(cmd)
198 call = "get_ipython().getoutput({!r})".format(cmd)
199 new_line = lhs + call + '\n'
199 new_line = lhs + call + '\n'
200 lines_after = lines[end_line + 1:]
200 lines_after = lines[end_line + 1:]
201
201
202 return lines_before + [new_line] + lines_after
202 return lines_before + [new_line] + lines_after
203
203
204 # The escape sequences that define the syntax transformations IPython will
204 # The escape sequences that define the syntax transformations IPython will
205 # apply to user input. These can NOT be just changed here: many regular
205 # apply to user input. These can NOT be just changed here: many regular
206 # expressions and other parts of the code may use their hardcoded values, and
206 # expressions and other parts of the code may use their hardcoded values, and
207 # for all intents and purposes they constitute the 'IPython syntax', so they
207 # for all intents and purposes they constitute the 'IPython syntax', so they
208 # should be considered fixed.
208 # should be considered fixed.
209
209
210 ESC_SHELL = '!' # Send line to underlying system shell
210 ESC_SHELL = '!' # Send line to underlying system shell
211 ESC_SH_CAP = '!!' # Send line to system shell and capture output
211 ESC_SH_CAP = '!!' # Send line to system shell and capture output
212 ESC_HELP = '?' # Find information about object
212 ESC_HELP = '?' # Find information about object
213 ESC_HELP2 = '??' # Find extra-detailed information about object
213 ESC_HELP2 = '??' # Find extra-detailed information about object
214 ESC_MAGIC = '%' # Call magic function
214 ESC_MAGIC = '%' # Call magic function
215 ESC_MAGIC2 = '%%' # Call cell-magic function
215 ESC_MAGIC2 = '%%' # Call cell-magic function
216 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
216 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
217 ESC_QUOTE2 = ';' # Quote all args as a single string, call
217 ESC_QUOTE2 = ';' # Quote all args as a single string, call
218 ESC_PAREN = '/' # Call first argument with rest of line as arguments
218 ESC_PAREN = '/' # Call first argument with rest of line as arguments
219
219
220 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
220 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
221 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
221 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
222
222
223 def _make_help_call(target, esc, next_input=None):
223 def _make_help_call(target, esc, next_input=None):
224 """Prepares a pinfo(2)/psearch call from a target name and the escape
224 """Prepares a pinfo(2)/psearch call from a target name and the escape
225 (i.e. ? or ??)"""
225 (i.e. ? or ??)"""
226 method = 'pinfo2' if esc == '??' \
226 method = 'pinfo2' if esc == '??' \
227 else 'psearch' if '*' in target \
227 else 'psearch' if '*' in target \
228 else 'pinfo'
228 else 'pinfo'
229 arg = " ".join([method, target])
229 arg = " ".join([method, target])
230 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
230 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
231 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
231 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
232 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
232 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
233 if next_input is None:
233 if next_input is None:
234 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
234 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
235 else:
235 else:
236 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
236 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
237 (next_input, t_magic_name, t_magic_arg_s)
237 (next_input, t_magic_name, t_magic_arg_s)
238
238
239 def _tr_help(content):
239 def _tr_help(content):
240 "Translate lines escaped with: ?"
240 "Translate lines escaped with: ?"
241 # A naked help line should just fire the intro help screen
241 # A naked help line should just fire the intro help screen
242 if not content:
242 if not content:
243 return 'get_ipython().show_usage()'
243 return 'get_ipython().show_usage()'
244
244
245 return _make_help_call(content, '?')
245 return _make_help_call(content, '?')
246
246
247 def _tr_help2(content):
247 def _tr_help2(content):
248 "Translate lines escaped with: ??"
248 "Translate lines escaped with: ??"
249 # A naked help line should just fire the intro help screen
249 # A naked help line should just fire the intro help screen
250 if not content:
250 if not content:
251 return 'get_ipython().show_usage()'
251 return 'get_ipython().show_usage()'
252
252
253 return _make_help_call(content, '??')
253 return _make_help_call(content, '??')
254
254
255 def _tr_magic(content):
255 def _tr_magic(content):
256 "Translate lines escaped with: %"
256 "Translate lines escaped with: %"
257 name, _, args = content.partition(' ')
257 name, _, args = content.partition(' ')
258 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
258 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
259
259
260 def _tr_quote(content):
260 def _tr_quote(content):
261 "Translate lines escaped with: ,"
261 "Translate lines escaped with: ,"
262 name, _, args = content.partition(' ')
262 name, _, args = content.partition(' ')
263 return '%s("%s")' % (name, '", "'.join(args.split()) )
263 return '%s("%s")' % (name, '", "'.join(args.split()) )
264
264
265 def _tr_quote2(content):
265 def _tr_quote2(content):
266 "Translate lines escaped with: ;"
266 "Translate lines escaped with: ;"
267 name, _, args = content.partition(' ')
267 name, _, args = content.partition(' ')
268 return '%s("%s")' % (name, args)
268 return '%s("%s")' % (name, args)
269
269
270 def _tr_paren(content):
270 def _tr_paren(content):
271 "Translate lines escaped with: /"
271 "Translate lines escaped with: /"
272 name, _, args = content.partition(' ')
272 name, _, args = content.partition(' ')
273 return '%s(%s)' % (name, ", ".join(args.split()))
273 return '%s(%s)' % (name, ", ".join(args.split()))
274
274
275 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
275 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
276 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
276 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
277 ESC_HELP : _tr_help,
277 ESC_HELP : _tr_help,
278 ESC_HELP2 : _tr_help2,
278 ESC_HELP2 : _tr_help2,
279 ESC_MAGIC : _tr_magic,
279 ESC_MAGIC : _tr_magic,
280 ESC_QUOTE : _tr_quote,
280 ESC_QUOTE : _tr_quote,
281 ESC_QUOTE2 : _tr_quote2,
281 ESC_QUOTE2 : _tr_quote2,
282 ESC_PAREN : _tr_paren }
282 ESC_PAREN : _tr_paren }
283
283
284 class EscapedCommand(TokenTransformBase):
284 class EscapedCommand(TokenTransformBase):
285 @classmethod
285 @classmethod
286 def find(cls, tokens_by_line):
286 def find(cls, tokens_by_line):
287 """Find the first escaped command (%foo, !foo, etc.) in the cell.
287 """Find the first escaped command (%foo, !foo, etc.) in the cell.
288
288
289 Returns (line, column) of the escape if found, or None. *line* is 1-indexed.
289 Returns (line, column) of the escape if found, or None. *line* is 1-indexed.
290 """
290 """
291 for line in tokens_by_line:
291 for line in tokens_by_line:
292 ix = 0
292 ix = 0
293 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
293 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
294 ix += 1
294 ix += 1
295 if line[ix].string in ESCAPE_SINGLES:
295 if line[ix].string in ESCAPE_SINGLES:
296 return cls(line[ix].start)
296 return cls(line[ix].start)
297
297
298 def transform(self, lines):
298 def transform(self, lines):
299 start_line, start_col = self.start_line, self.start_col
299 start_line, start_col = self.start_line, self.start_col
300
300
301 indent = lines[start_line][:start_col]
301 indent = lines[start_line][:start_col]
302 end_line = find_end_of_continued_line(lines, start_line)
302 end_line = find_end_of_continued_line(lines, start_line)
303 line = assemble_continued_line(lines, (start_line, start_col), end_line)
303 line = assemble_continued_line(lines, (start_line, start_col), end_line)
304
304
305 if line[:2] in ESCAPE_DOUBLES:
305 if line[:2] in ESCAPE_DOUBLES:
306 escape, content = line[:2], line[2:]
306 escape, content = line[:2], line[2:]
307 else:
307 else:
308 escape, content = line[:1], line[1:]
308 escape, content = line[:1], line[1:]
309 call = tr[escape](content)
309 call = tr[escape](content)
310
310
311 lines_before = lines[:start_line]
311 lines_before = lines[:start_line]
312 new_line = indent + call + '\n'
312 new_line = indent + call + '\n'
313 lines_after = lines[end_line + 1:]
313 lines_after = lines[end_line + 1:]
314
314
315 return lines_before + [new_line] + lines_after
315 return lines_before + [new_line] + lines_after
316
316
317 _help_end_re = re.compile(r"""(%{0,2}
317 _help_end_re = re.compile(r"""(%{0,2}
318 [a-zA-Z_*][\w*]* # Variable name
318 [a-zA-Z_*][\w*]* # Variable name
319 (\.[a-zA-Z_*][\w*]*)* # .etc.etc
319 (\.[a-zA-Z_*][\w*]*)* # .etc.etc
320 )
320 )
321 (\?\??)$ # ? or ??
321 (\?\??)$ # ? or ??
322 """,
322 """,
323 re.VERBOSE)
323 re.VERBOSE)
324
324
325 class HelpEnd(TokenTransformBase):
325 class HelpEnd(TokenTransformBase):
326 # This needs to be higher priority (lower number) than EscapedCommand so
326 # This needs to be higher priority (lower number) than EscapedCommand so
327 # that inspecting magics (%foo?) works.
327 # that inspecting magics (%foo?) works.
328 priority = 5
328 priority = 5
329
329
330 def __init__(self, start, q_locn):
330 def __init__(self, start, q_locn):
331 super().__init__(start)
331 super().__init__(start)
332 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
332 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
333 self.q_col = q_locn[1]
333 self.q_col = q_locn[1]
334
334
335 @classmethod
335 @classmethod
336 def find(cls, tokens_by_line):
336 def find(cls, tokens_by_line):
337 for line in tokens_by_line:
337 for line in tokens_by_line:
338 # Last token is NEWLINE; look at last but one
338 # Last token is NEWLINE; look at last but one
339 if len(line) > 2 and line[-2].string == '?':
339 if len(line) > 2 and line[-2].string == '?':
340 # Find the first token that's not INDENT/DEDENT
340 # Find the first token that's not INDENT/DEDENT
341 ix = 0
341 ix = 0
342 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
342 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
343 ix += 1
343 ix += 1
344 return cls(line[ix].start, line[-2].start)
344 return cls(line[ix].start, line[-2].start)
345
345
346 def transform(self, lines):
346 def transform(self, lines):
347 piece = ''.join(lines[self.start_line:self.q_line+1])
347 piece = ''.join(lines[self.start_line:self.q_line+1])
348 indent, content = piece[:self.start_col], piece[self.start_col:]
348 indent, content = piece[:self.start_col], piece[self.start_col:]
349 lines_before = lines[:self.start_line]
349 lines_before = lines[:self.start_line]
350 lines_after = lines[self.q_line + 1:]
350 lines_after = lines[self.q_line + 1:]
351
351
352 m = _help_end_re.search(content)
352 m = _help_end_re.search(content)
353 assert m is not None, content
353 assert m is not None, content
354 target = m.group(1)
354 target = m.group(1)
355 esc = m.group(3)
355 esc = m.group(3)
356
356
357 # If we're mid-command, put it back on the next prompt for the user.
357 # If we're mid-command, put it back on the next prompt for the user.
358 next_input = None
358 next_input = None
359 if (not lines_before) and (not lines_after) \
359 if (not lines_before) and (not lines_after) \
360 and content.strip() != m.group(0):
360 and content.strip() != m.group(0):
361 next_input = content.rstrip('?\n')
361 next_input = content.rstrip('?\n')
362
362
363 call = _make_help_call(target, esc, next_input=next_input)
363 call = _make_help_call(target, esc, next_input=next_input)
364 new_line = indent + call + '\n'
364 new_line = indent + call + '\n'
365
365
366 return lines_before + [new_line] + lines_after
366 return lines_before + [new_line] + lines_after
367
367
368 def make_tokens_by_line(lines):
368 def make_tokens_by_line(lines):
369 """Tokenize a series of lines and group tokens by line.
369 """Tokenize a series of lines and group tokens by line.
370
370
371 The tokens for a multiline Python string or expression are
371 The tokens for a multiline Python string or expression are
372 grouped as one line.
372 grouped as one line.
373 """
373 """
374 # NL tokens are used inside multiline expressions, but also after blank
374 # NL tokens are used inside multiline expressions, but also after blank
375 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
375 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
376 # We want to group the former case together but split the latter, so we
376 # We want to group the former case together but split the latter, so we
377 # track parentheses level, similar to the internals of tokenize.
377 # track parentheses level, similar to the internals of tokenize.
378 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL
378 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL
379 tokens_by_line = [[]]
379 tokens_by_line = [[]]
380 parenlev = 0
380 parenlev = 0
381 try:
381 try:
382 for token in tokenize.generate_tokens(iter(lines).__next__):
382 for token in tokenize.generate_tokens(iter(lines).__next__):
383 tokens_by_line[-1].append(token)
383 tokens_by_line[-1].append(token)
384 if (token.type == NEWLINE) \
384 if (token.type == NEWLINE) \
385 or ((token.type == NL) and (parenlev <= 0)):
385 or ((token.type == NL) and (parenlev <= 0)):
386 tokens_by_line.append([])
386 tokens_by_line.append([])
387 elif token.string in {'(', '[', '{'}:
387 elif token.string in {'(', '[', '{'}:
388 parenlev += 1
388 parenlev += 1
389 elif token.string in {')', ']', '}'}:
389 elif token.string in {')', ']', '}'}:
390 if parenlev > 0:
390 if parenlev > 0:
391 parenlev -= 1
391 parenlev -= 1
392 except tokenize.TokenError:
392 except tokenize.TokenError:
393 # Input ended in a multiline string or expression. That's OK for us.
393 # Input ended in a multiline string or expression. That's OK for us.
394 pass
394 pass
395
395
396 return tokens_by_line
396 return tokens_by_line
397
397
398 def show_linewise_tokens(s: str):
398 def show_linewise_tokens(s: str):
399 """For investigation"""
399 """For investigation"""
400 if not s.endswith('\n'):
400 if not s.endswith('\n'):
401 s += '\n'
401 s += '\n'
402 lines = s.splitlines(keepends=True)
402 lines = s.splitlines(keepends=True)
403 for line in make_tokens_by_line(lines):
403 for line in make_tokens_by_line(lines):
404 print("Line -------")
404 print("Line -------")
405 for tokinfo in line:
405 for tokinfo in line:
406 print(" ", tokinfo)
406 print(" ", tokinfo)
407
407
408 # Arbitrary limit to prevent getting stuck in infinite loops
409 TRANSFORM_LOOP_LIMIT = 500
410
408 class TransformerManager:
411 class TransformerManager:
409 def __init__(self):
412 def __init__(self):
410 self.cleanup_transforms = [
413 self.cleanup_transforms = [
411 leading_indent,
414 leading_indent,
412 classic_prompt,
415 classic_prompt,
413 ipython_prompt,
416 ipython_prompt,
414 ]
417 ]
415 self.line_transforms = [
418 self.line_transforms = [
416 cell_magic,
419 cell_magic,
417 ]
420 ]
418 self.token_transformers = [
421 self.token_transformers = [
419 MagicAssign,
422 MagicAssign,
420 SystemAssign,
423 SystemAssign,
421 EscapedCommand,
424 EscapedCommand,
422 HelpEnd,
425 HelpEnd,
423 ]
426 ]
424
427
425 def do_one_token_transform(self, lines):
428 def do_one_token_transform(self, lines):
426 """Find and run the transform earliest in the code.
429 """Find and run the transform earliest in the code.
427
430
428 Returns (changed, lines).
431 Returns (changed, lines).
429
432
430 This method is called repeatedly until changed is False, indicating
433 This method is called repeatedly until changed is False, indicating
431 that all available transformations are complete.
434 that all available transformations are complete.
432
435
433 The tokens following IPython special syntax might not be valid, so
436 The tokens following IPython special syntax might not be valid, so
434 the transformed code is retokenised every time to identify the next
437 the transformed code is retokenised every time to identify the next
435 piece of special syntax. Hopefully long code cells are mostly valid
438 piece of special syntax. Hopefully long code cells are mostly valid
436 Python, not using lots of IPython special syntax, so this shouldn't be
439 Python, not using lots of IPython special syntax, so this shouldn't be
437 a performance issue.
440 a performance issue.
438 """
441 """
439 tokens_by_line = make_tokens_by_line(lines)
442 tokens_by_line = make_tokens_by_line(lines)
440 candidates = []
443 candidates = []
441 for transformer_cls in self.token_transformers:
444 for transformer_cls in self.token_transformers:
442 transformer = transformer_cls.find(tokens_by_line)
445 transformer = transformer_cls.find(tokens_by_line)
443 if transformer:
446 if transformer:
444 candidates.append(transformer)
447 candidates.append(transformer)
445
448
446 if not candidates:
449 if not candidates:
447 # Nothing to transform
450 # Nothing to transform
448 return False, lines
451 return False, lines
449
452
450 transformer = min(candidates, key=TokenTransformBase.sortby)
453 transformer = min(candidates, key=TokenTransformBase.sortby)
451 return True, transformer.transform(lines)
454 return True, transformer.transform(lines)
452
455
453 def do_token_transforms(self, lines):
456 def do_token_transforms(self, lines):
454 while True:
457 for _ in range(TRANSFORM_LOOP_LIMIT):
455 changed, lines = self.do_one_token_transform(lines)
458 changed, lines = self.do_one_token_transform(lines)
456 if not changed:
459 if not changed:
457 return lines
460 return lines
458
461
462 raise RuntimeError("Input transformation still changing after "
463 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
464
459 def transform_cell(self, cell: str):
465 def transform_cell(self, cell: str):
460 if not cell.endswith('\n'):
466 if not cell.endswith('\n'):
461 cell += '\n' # Ensure the cell has a trailing newline
467 cell += '\n' # Ensure the cell has a trailing newline
462 lines = cell.splitlines(keepends=True)
468 lines = cell.splitlines(keepends=True)
463 for transform in self.cleanup_transforms + self.line_transforms:
469 for transform in self.cleanup_transforms + self.line_transforms:
464 #print(transform, lines)
470 #print(transform, lines)
465 lines = transform(lines)
471 lines = transform(lines)
466
472
467 lines = self.do_token_transforms(lines)
473 lines = self.do_token_transforms(lines)
468 return ''.join(lines)
474 return ''.join(lines)
469
475
470 def check_complete(self, cell: str):
476 def check_complete(self, cell: str):
471 """Return whether a block of code is ready to execute, or should be continued
477 """Return whether a block of code is ready to execute, or should be continued
472
478
473 Parameters
479 Parameters
474 ----------
480 ----------
475 source : string
481 source : string
476 Python input code, which can be multiline.
482 Python input code, which can be multiline.
477
483
478 Returns
484 Returns
479 -------
485 -------
480 status : str
486 status : str
481 One of 'complete', 'incomplete', or 'invalid' if source is not a
487 One of 'complete', 'incomplete', or 'invalid' if source is not a
482 prefix of valid code.
488 prefix of valid code.
483 indent_spaces : int or None
489 indent_spaces : int or None
484 The number of spaces by which to indent the next line of code. If
490 The number of spaces by which to indent the next line of code. If
485 status is not 'incomplete', this is None.
491 status is not 'incomplete', this is None.
486 """
492 """
487 if not cell.endswith('\n'):
493 if not cell.endswith('\n'):
488 cell += '\n' # Ensure the cell has a trailing newline
494 cell += '\n' # Ensure the cell has a trailing newline
489 lines = cell.splitlines(keepends=True)
495 lines = cell.splitlines(keepends=True)
490 if lines[-1][:-1].endswith('\\'):
496 if lines[-1][:-1].endswith('\\'):
491 # Explicit backslash continuation
497 # Explicit backslash continuation
492 return 'incomplete', find_last_indent(lines)
498 return 'incomplete', find_last_indent(lines)
493
499
494 try:
500 try:
495 for transform in self.cleanup_transforms:
501 for transform in self.cleanup_transforms:
496 lines = transform(lines)
502 lines = transform(lines)
497 except SyntaxError:
503 except SyntaxError:
498 return 'invalid', None
504 return 'invalid', None
499
505
500 if lines[0].startswith('%%'):
506 if lines[0].startswith('%%'):
501 # Special case for cell magics - completion marked by blank line
507 # Special case for cell magics - completion marked by blank line
502 if lines[-1].strip():
508 if lines[-1].strip():
503 return 'incomplete', find_last_indent(lines)
509 return 'incomplete', find_last_indent(lines)
504 else:
510 else:
505 return 'complete', None
511 return 'complete', None
506
512
507 try:
513 try:
508 for transform in self.line_transforms:
514 for transform in self.line_transforms:
509 lines = transform(lines)
515 lines = transform(lines)
510 lines = self.do_token_transforms(lines)
516 lines = self.do_token_transforms(lines)
511 except SyntaxError:
517 except SyntaxError:
512 return 'invalid', None
518 return 'invalid', None
513
519
514 tokens_by_line = make_tokens_by_line(lines)
520 tokens_by_line = make_tokens_by_line(lines)
515 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
521 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
516 # We're in a multiline string or expression
522 # We're in a multiline string or expression
517 return 'incomplete', find_last_indent(lines)
523 return 'incomplete', find_last_indent(lines)
518
524
519 # Find the last token on the previous line that's not NEWLINE or COMMENT
525 # Find the last token on the previous line that's not NEWLINE or COMMENT
520 toks_last_line = tokens_by_line[-2]
526 toks_last_line = tokens_by_line[-2]
521 ix = len(toks_last_line) - 1
527 ix = len(toks_last_line) - 1
522 while ix >= 0 and toks_last_line[ix].type in {tokenize.NEWLINE,
528 while ix >= 0 and toks_last_line[ix].type in {tokenize.NEWLINE,
523 tokenize.COMMENT}:
529 tokenize.COMMENT}:
524 ix -= 1
530 ix -= 1
525
531
526 if toks_last_line[ix].string == ':':
532 if toks_last_line[ix].string == ':':
527 # The last line starts a block (e.g. 'if foo:')
533 # The last line starts a block (e.g. 'if foo:')
528 ix = 0
534 ix = 0
529 while toks_last_line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
535 while toks_last_line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
530 ix += 1
536 ix += 1
531 indent = toks_last_line[ix].start[1]
537 indent = toks_last_line[ix].start[1]
532 return 'incomplete', indent + 4
538 return 'incomplete', indent + 4
533
539
534 # If there's a blank line at the end, assume we're ready to execute.
540 # If there's a blank line at the end, assume we're ready to execute.
535 if not lines[-1].strip():
541 if not lines[-1].strip():
536 return 'complete', None
542 return 'complete', None
537
543
538 # At this point, our checks think the code is complete (or invalid).
544 # At this point, our checks think the code is complete (or invalid).
539 # We'll use codeop.compile_command to check this with the real parser.
545 # We'll use codeop.compile_command to check this with the real parser.
540
546
541 try:
547 try:
542 with warnings.catch_warnings():
548 with warnings.catch_warnings():
543 warnings.simplefilter('error', SyntaxWarning)
549 warnings.simplefilter('error', SyntaxWarning)
544 res = compile_command(''.join(lines), symbol='exec')
550 res = compile_command(''.join(lines), symbol='exec')
545 except (SyntaxError, OverflowError, ValueError, TypeError,
551 except (SyntaxError, OverflowError, ValueError, TypeError,
546 MemoryError, SyntaxWarning):
552 MemoryError, SyntaxWarning):
547 return 'invalid', None
553 return 'invalid', None
548 else:
554 else:
549 if res is None:
555 if res is None:
550 return 'incomplete', find_last_indent(lines)
556 return 'incomplete', find_last_indent(lines)
551 return 'complete', None
557 return 'complete', None
552
558
553
559
554 def find_last_indent(lines):
560 def find_last_indent(lines):
555 m = _indent_re.match(lines[-1])
561 m = _indent_re.match(lines[-1])
556 if not m:
562 if not m:
557 return 0
563 return 0
558 return len(m.group(0).replace('\t', ' '*4))
564 return len(m.group(0).replace('\t', ' '*4))
General Comments 0
You need to be logged in to leave comments. Login now