##// END OF EJS Templates
Add some logic to pass all of the check_complete tests...
Tony Fast -
Show More
@@ -1,652 +1,667 b''
1 """Input transformer machinery to support IPython special syntax.
1 """Input transformer machinery to support IPython special syntax.
2
2
3 This includes the machinery to recognise and transform ``%magic`` commands,
3 This includes the machinery to recognise and transform ``%magic`` commands,
4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
4 ``!system`` commands, ``help?`` querying, prompt stripping, and so forth.
5
5
6 Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
6 Added: IPython 7.0. Replaces inputsplitter and inputtransformer which were
7 deprecated in 7.0.
7 deprecated in 7.0.
8 """
8 """
9
9
10 # Copyright (c) IPython Development Team.
10 # Copyright (c) IPython Development Team.
11 # Distributed under the terms of the Modified BSD License.
11 # Distributed under the terms of the Modified BSD License.
12
12
13 from codeop import compile_command
13 from codeop import compile_command
14 import re
14 import re
15 import tokenize
15 import tokenize
16 from typing import List, Tuple
16 from typing import List, Tuple
17 import warnings
17 import warnings
18
18
19 _indent_re = re.compile(r'^[ \t]+')
19 _indent_re = re.compile(r'^[ \t]+')
20
20
21 def leading_indent(lines):
21 def leading_indent(lines):
22 """Remove leading indentation.
22 """Remove leading indentation.
23
23
24 If the first line starts with a spaces or tabs, the same whitespace will be
24 If the first line starts with a spaces or tabs, the same whitespace will be
25 removed from each following line in the cell.
25 removed from each following line in the cell.
26 """
26 """
27 if not lines:
27 if not lines:
28 return lines
28 return lines
29 m = _indent_re.match(lines[0])
29 m = _indent_re.match(lines[0])
30 if not m:
30 if not m:
31 return lines
31 return lines
32 space = m.group(0)
32 space = m.group(0)
33 n = len(space)
33 n = len(space)
34 return [l[n:] if l.startswith(space) else l
34 return [l[n:] if l.startswith(space) else l
35 for l in lines]
35 for l in lines]
36
36
37 class PromptStripper:
37 class PromptStripper:
38 """Remove matching input prompts from a block of input.
38 """Remove matching input prompts from a block of input.
39
39
40 Parameters
40 Parameters
41 ----------
41 ----------
42 prompt_re : regular expression
42 prompt_re : regular expression
43 A regular expression matching any input prompt (including continuation,
43 A regular expression matching any input prompt (including continuation,
44 e.g. ``...``)
44 e.g. ``...``)
45 initial_re : regular expression, optional
45 initial_re : regular expression, optional
46 A regular expression matching only the initial prompt, but not continuation.
46 A regular expression matching only the initial prompt, but not continuation.
47 If no initial expression is given, prompt_re will be used everywhere.
47 If no initial expression is given, prompt_re will be used everywhere.
48 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
48 Used mainly for plain Python prompts (``>>>``), where the continuation prompt
49 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
49 ``...`` is a valid Python expression in Python 3, so shouldn't be stripped.
50
50
51 If initial_re and prompt_re differ,
51 If initial_re and prompt_re differ,
52 only initial_re will be tested against the first line.
52 only initial_re will be tested against the first line.
53 If any prompt is found on the first two lines,
53 If any prompt is found on the first two lines,
54 prompts will be stripped from the rest of the block.
54 prompts will be stripped from the rest of the block.
55 """
55 """
56 def __init__(self, prompt_re, initial_re=None):
56 def __init__(self, prompt_re, initial_re=None):
57 self.prompt_re = prompt_re
57 self.prompt_re = prompt_re
58 self.initial_re = initial_re or prompt_re
58 self.initial_re = initial_re or prompt_re
59
59
60 def _strip(self, lines):
60 def _strip(self, lines):
61 return [self.prompt_re.sub('', l, count=1) for l in lines]
61 return [self.prompt_re.sub('', l, count=1) for l in lines]
62
62
63 def __call__(self, lines):
63 def __call__(self, lines):
64 if not lines:
64 if not lines:
65 return lines
65 return lines
66 if self.initial_re.match(lines[0]) or \
66 if self.initial_re.match(lines[0]) or \
67 (len(lines) > 1 and self.prompt_re.match(lines[1])):
67 (len(lines) > 1 and self.prompt_re.match(lines[1])):
68 return self._strip(lines)
68 return self._strip(lines)
69 return lines
69 return lines
70
70
71 classic_prompt = PromptStripper(
71 classic_prompt = PromptStripper(
72 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
72 prompt_re=re.compile(r'^(>>>|\.\.\.)( |$)'),
73 initial_re=re.compile(r'^>>>( |$)')
73 initial_re=re.compile(r'^>>>( |$)')
74 )
74 )
75
75
76 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
76 ipython_prompt = PromptStripper(re.compile(r'^(In \[\d+\]: |\s*\.{3,}: ?)'))
77
77
78 def cell_magic(lines):
78 def cell_magic(lines):
79 if not lines or not lines[0].startswith('%%'):
79 if not lines or not lines[0].startswith('%%'):
80 return lines
80 return lines
81 if re.match('%%\w+\?', lines[0]):
81 if re.match('%%\w+\?', lines[0]):
82 # This case will be handled by help_end
82 # This case will be handled by help_end
83 return lines
83 return lines
84 magic_name, _, first_line = lines[0][2:-1].partition(' ')
84 magic_name, _, first_line = lines[0][2:-1].partition(' ')
85 body = ''.join(lines[1:])
85 body = ''.join(lines[1:])
86 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
86 return ['get_ipython().run_cell_magic(%r, %r, %r)\n'
87 % (magic_name, first_line, body)]
87 % (magic_name, first_line, body)]
88
88
89
89
90 def _find_assign_op(token_line):
90 def _find_assign_op(token_line):
91 """Get the index of the first assignment in the line ('=' not inside brackets)
91 """Get the index of the first assignment in the line ('=' not inside brackets)
92
92
93 Note: We don't try to support multiple special assignment (a = b = %foo)
93 Note: We don't try to support multiple special assignment (a = b = %foo)
94 """
94 """
95 paren_level = 0
95 paren_level = 0
96 for i, ti in enumerate(token_line):
96 for i, ti in enumerate(token_line):
97 s = ti.string
97 s = ti.string
98 if s == '=' and paren_level == 0:
98 if s == '=' and paren_level == 0:
99 return i
99 return i
100 if s in '([{':
100 if s in '([{':
101 paren_level += 1
101 paren_level += 1
102 elif s in ')]}':
102 elif s in ')]}':
103 if paren_level > 0:
103 if paren_level > 0:
104 paren_level -= 1
104 paren_level -= 1
105
105
106 def find_end_of_continued_line(lines, start_line: int):
106 def find_end_of_continued_line(lines, start_line: int):
107 """Find the last line of a line explicitly extended using backslashes.
107 """Find the last line of a line explicitly extended using backslashes.
108
108
109 Uses 0-indexed line numbers.
109 Uses 0-indexed line numbers.
110 """
110 """
111 end_line = start_line
111 end_line = start_line
112 while lines[end_line].endswith('\\\n'):
112 while lines[end_line].endswith('\\\n'):
113 end_line += 1
113 end_line += 1
114 if end_line >= len(lines):
114 if end_line >= len(lines):
115 break
115 break
116 return end_line
116 return end_line
117
117
118 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
118 def assemble_continued_line(lines, start: Tuple[int, int], end_line: int):
119 """Assemble a single line from multiple continued line pieces
119 """Assemble a single line from multiple continued line pieces
120
120
121 Continued lines are lines ending in ``\``, and the line following the last
121 Continued lines are lines ending in ``\``, and the line following the last
122 ``\`` in the block.
122 ``\`` in the block.
123
123
124 For example, this code continues over multiple lines::
124 For example, this code continues over multiple lines::
125
125
126 if (assign_ix is not None) \
126 if (assign_ix is not None) \
127 and (len(line) >= assign_ix + 2) \
127 and (len(line) >= assign_ix + 2) \
128 and (line[assign_ix+1].string == '%') \
128 and (line[assign_ix+1].string == '%') \
129 and (line[assign_ix+2].type == tokenize.NAME):
129 and (line[assign_ix+2].type == tokenize.NAME):
130
130
131 This statement contains four continued line pieces.
131 This statement contains four continued line pieces.
132 Assembling these pieces into a single line would give::
132 Assembling these pieces into a single line would give::
133
133
134 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
134 if (assign_ix is not None) and (len(line) >= assign_ix + 2) and (line[...
135
135
136 This uses 0-indexed line numbers. *start* is (lineno, colno).
136 This uses 0-indexed line numbers. *start* is (lineno, colno).
137
137
138 Used to allow ``%magic`` and ``!system`` commands to be continued over
138 Used to allow ``%magic`` and ``!system`` commands to be continued over
139 multiple lines.
139 multiple lines.
140 """
140 """
141 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
141 parts = [lines[start[0]][start[1]:]] + lines[start[0]+1:end_line+1]
142 return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline
142 return ' '.join([p[:-2] for p in parts[:-1]] # Strip backslash+newline
143 + [parts[-1][:-1]]) # Strip newline from last line
143 + [parts[-1][:-1]]) # Strip newline from last line
144
144
145 class TokenTransformBase:
145 class TokenTransformBase:
146 """Base class for transformations which examine tokens.
146 """Base class for transformations which examine tokens.
147
147
148 Special syntax should not be transformed when it occurs inside strings or
148 Special syntax should not be transformed when it occurs inside strings or
149 comments. This is hard to reliably avoid with regexes. The solution is to
149 comments. This is hard to reliably avoid with regexes. The solution is to
150 tokenise the code as Python, and recognise the special syntax in the tokens.
150 tokenise the code as Python, and recognise the special syntax in the tokens.
151
151
152 IPython's special syntax is not valid Python syntax, so tokenising may go
152 IPython's special syntax is not valid Python syntax, so tokenising may go
153 wrong after the special syntax starts. These classes therefore find and
153 wrong after the special syntax starts. These classes therefore find and
154 transform *one* instance of special syntax at a time into regular Python
154 transform *one* instance of special syntax at a time into regular Python
155 syntax. After each transformation, tokens are regenerated to find the next
155 syntax. After each transformation, tokens are regenerated to find the next
156 piece of special syntax.
156 piece of special syntax.
157
157
158 Subclasses need to implement one class method (find)
158 Subclasses need to implement one class method (find)
159 and one regular method (transform).
159 and one regular method (transform).
160
160
161 The priority attribute can select which transformation to apply if multiple
161 The priority attribute can select which transformation to apply if multiple
162 transformers match in the same place. Lower numbers have higher priority.
162 transformers match in the same place. Lower numbers have higher priority.
163 This allows "%magic?" to be turned into a help call rather than a magic call.
163 This allows "%magic?" to be turned into a help call rather than a magic call.
164 """
164 """
165 # Lower numbers -> higher priority (for matches in the same location)
165 # Lower numbers -> higher priority (for matches in the same location)
166 priority = 10
166 priority = 10
167
167
168 def sortby(self):
168 def sortby(self):
169 return self.start_line, self.start_col, self.priority
169 return self.start_line, self.start_col, self.priority
170
170
171 def __init__(self, start):
171 def __init__(self, start):
172 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
172 self.start_line = start[0] - 1 # Shift from 1-index to 0-index
173 self.start_col = start[1]
173 self.start_col = start[1]
174
174
175 @classmethod
175 @classmethod
176 def find(cls, tokens_by_line):
176 def find(cls, tokens_by_line):
177 """Find one instance of special syntax in the provided tokens.
177 """Find one instance of special syntax in the provided tokens.
178
178
179 Tokens are grouped into logical lines for convenience,
179 Tokens are grouped into logical lines for convenience,
180 so it is easy to e.g. look at the first token of each line.
180 so it is easy to e.g. look at the first token of each line.
181 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
181 *tokens_by_line* is a list of lists of tokenize.TokenInfo objects.
182
182
183 This should return an instance of its class, pointing to the start
183 This should return an instance of its class, pointing to the start
184 position it has found, or None if it found no match.
184 position it has found, or None if it found no match.
185 """
185 """
186 raise NotImplementedError
186 raise NotImplementedError
187
187
188 def transform(self, lines: List[str]):
188 def transform(self, lines: List[str]):
189 """Transform one instance of special syntax found by ``find()``
189 """Transform one instance of special syntax found by ``find()``
190
190
191 Takes a list of strings representing physical lines,
191 Takes a list of strings representing physical lines,
192 returns a similar list of transformed lines.
192 returns a similar list of transformed lines.
193 """
193 """
194 raise NotImplementedError
194 raise NotImplementedError
195
195
196 class MagicAssign(TokenTransformBase):
196 class MagicAssign(TokenTransformBase):
197 """Transformer for assignments from magics (a = %foo)"""
197 """Transformer for assignments from magics (a = %foo)"""
198 @classmethod
198 @classmethod
199 def find(cls, tokens_by_line):
199 def find(cls, tokens_by_line):
200 """Find the first magic assignment (a = %foo) in the cell.
200 """Find the first magic assignment (a = %foo) in the cell.
201 """
201 """
202 for line in tokens_by_line:
202 for line in tokens_by_line:
203 assign_ix = _find_assign_op(line)
203 assign_ix = _find_assign_op(line)
204 if (assign_ix is not None) \
204 if (assign_ix is not None) \
205 and (len(line) >= assign_ix + 2) \
205 and (len(line) >= assign_ix + 2) \
206 and (line[assign_ix+1].string == '%') \
206 and (line[assign_ix+1].string == '%') \
207 and (line[assign_ix+2].type == tokenize.NAME):
207 and (line[assign_ix+2].type == tokenize.NAME):
208 return cls(line[assign_ix+1].start)
208 return cls(line[assign_ix+1].start)
209
209
210 def transform(self, lines: List[str]):
210 def transform(self, lines: List[str]):
211 """Transform a magic assignment found by the ``find()`` classmethod.
211 """Transform a magic assignment found by the ``find()`` classmethod.
212 """
212 """
213 start_line, start_col = self.start_line, self.start_col
213 start_line, start_col = self.start_line, self.start_col
214 lhs = lines[start_line][:start_col]
214 lhs = lines[start_line][:start_col]
215 end_line = find_end_of_continued_line(lines, start_line)
215 end_line = find_end_of_continued_line(lines, start_line)
216 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
216 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
217 assert rhs.startswith('%'), rhs
217 assert rhs.startswith('%'), rhs
218 magic_name, _, args = rhs[1:].partition(' ')
218 magic_name, _, args = rhs[1:].partition(' ')
219
219
220 lines_before = lines[:start_line]
220 lines_before = lines[:start_line]
221 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
221 call = "get_ipython().run_line_magic({!r}, {!r})".format(magic_name, args)
222 new_line = lhs + call + '\n'
222 new_line = lhs + call + '\n'
223 lines_after = lines[end_line+1:]
223 lines_after = lines[end_line+1:]
224
224
225 return lines_before + [new_line] + lines_after
225 return lines_before + [new_line] + lines_after
226
226
227
227
228 class SystemAssign(TokenTransformBase):
228 class SystemAssign(TokenTransformBase):
229 """Transformer for assignments from system commands (a = !foo)"""
229 """Transformer for assignments from system commands (a = !foo)"""
230 @classmethod
230 @classmethod
231 def find(cls, tokens_by_line):
231 def find(cls, tokens_by_line):
232 """Find the first system assignment (a = !foo) in the cell.
232 """Find the first system assignment (a = !foo) in the cell.
233 """
233 """
234 for line in tokens_by_line:
234 for line in tokens_by_line:
235 assign_ix = _find_assign_op(line)
235 assign_ix = _find_assign_op(line)
236 if (assign_ix is not None) \
236 if (assign_ix is not None) \
237 and (len(line) >= assign_ix + 2) \
237 and (len(line) >= assign_ix + 2) \
238 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
238 and (line[assign_ix + 1].type == tokenize.ERRORTOKEN):
239 ix = assign_ix + 1
239 ix = assign_ix + 1
240
240
241 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
241 while ix < len(line) and line[ix].type == tokenize.ERRORTOKEN:
242 if line[ix].string == '!':
242 if line[ix].string == '!':
243 return cls(line[ix].start)
243 return cls(line[ix].start)
244 elif not line[ix].string.isspace():
244 elif not line[ix].string.isspace():
245 break
245 break
246 ix += 1
246 ix += 1
247
247
248 def transform(self, lines: List[str]):
248 def transform(self, lines: List[str]):
249 """Transform a system assignment found by the ``find()`` classmethod.
249 """Transform a system assignment found by the ``find()`` classmethod.
250 """
250 """
251 start_line, start_col = self.start_line, self.start_col
251 start_line, start_col = self.start_line, self.start_col
252
252
253 lhs = lines[start_line][:start_col]
253 lhs = lines[start_line][:start_col]
254 end_line = find_end_of_continued_line(lines, start_line)
254 end_line = find_end_of_continued_line(lines, start_line)
255 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
255 rhs = assemble_continued_line(lines, (start_line, start_col), end_line)
256 assert rhs.startswith('!'), rhs
256 # assert rhs.startswith('!'), rhs
257 cmd = rhs[1:]
257 cmd = rhs[1:]
258
258
259 lines_before = lines[:start_line]
259 lines_before = lines[:start_line]
260 call = "get_ipython().getoutput({!r})".format(cmd)
260 call = "get_ipython().getoutput({!r})".format(cmd)
261 new_line = lhs + call + '\n'
261 new_line = lhs + call + '\n'
262 lines_after = lines[end_line + 1:]
262 lines_after = lines[end_line + 1:]
263
263
264 return lines_before + [new_line] + lines_after
264 return lines_before + [new_line] + lines_after
265
265
266 # The escape sequences that define the syntax transformations IPython will
266 # The escape sequences that define the syntax transformations IPython will
267 # apply to user input. These can NOT be just changed here: many regular
267 # apply to user input. These can NOT be just changed here: many regular
268 # expressions and other parts of the code may use their hardcoded values, and
268 # expressions and other parts of the code may use their hardcoded values, and
269 # for all intents and purposes they constitute the 'IPython syntax', so they
269 # for all intents and purposes they constitute the 'IPython syntax', so they
270 # should be considered fixed.
270 # should be considered fixed.
271
271
272 ESC_SHELL = '!' # Send line to underlying system shell
272 ESC_SHELL = '!' # Send line to underlying system shell
273 ESC_SH_CAP = '!!' # Send line to system shell and capture output
273 ESC_SH_CAP = '!!' # Send line to system shell and capture output
274 ESC_HELP = '?' # Find information about object
274 ESC_HELP = '?' # Find information about object
275 ESC_HELP2 = '??' # Find extra-detailed information about object
275 ESC_HELP2 = '??' # Find extra-detailed information about object
276 ESC_MAGIC = '%' # Call magic function
276 ESC_MAGIC = '%' # Call magic function
277 ESC_MAGIC2 = '%%' # Call cell-magic function
277 ESC_MAGIC2 = '%%' # Call cell-magic function
278 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
278 ESC_QUOTE = ',' # Split args on whitespace, quote each as string and call
279 ESC_QUOTE2 = ';' # Quote all args as a single string, call
279 ESC_QUOTE2 = ';' # Quote all args as a single string, call
280 ESC_PAREN = '/' # Call first argument with rest of line as arguments
280 ESC_PAREN = '/' # Call first argument with rest of line as arguments
281
281
282 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
282 ESCAPE_SINGLES = {'!', '?', '%', ',', ';', '/'}
283 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
283 ESCAPE_DOUBLES = {'!!', '??'} # %% (cell magic) is handled separately
284
284
285 def _make_help_call(target, esc, next_input=None):
285 def _make_help_call(target, esc, next_input=None):
286 """Prepares a pinfo(2)/psearch call from a target name and the escape
286 """Prepares a pinfo(2)/psearch call from a target name and the escape
287 (i.e. ? or ??)"""
287 (i.e. ? or ??)"""
288 method = 'pinfo2' if esc == '??' \
288 method = 'pinfo2' if esc == '??' \
289 else 'psearch' if '*' in target \
289 else 'psearch' if '*' in target \
290 else 'pinfo'
290 else 'pinfo'
291 arg = " ".join([method, target])
291 arg = " ".join([method, target])
292 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
292 #Prepare arguments for get_ipython().run_line_magic(magic_name, magic_args)
293 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
293 t_magic_name, _, t_magic_arg_s = arg.partition(' ')
294 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
294 t_magic_name = t_magic_name.lstrip(ESC_MAGIC)
295 if next_input is None:
295 if next_input is None:
296 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
296 return 'get_ipython().run_line_magic(%r, %r)' % (t_magic_name, t_magic_arg_s)
297 else:
297 else:
298 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
298 return 'get_ipython().set_next_input(%r);get_ipython().run_line_magic(%r, %r)' % \
299 (next_input, t_magic_name, t_magic_arg_s)
299 (next_input, t_magic_name, t_magic_arg_s)
300
300
301 def _tr_help(content):
301 def _tr_help(content):
302 """Translate lines escaped with: ?
302 """Translate lines escaped with: ?
303
303
304 A naked help line should fire the intro help screen (shell.show_usage())
304 A naked help line should fire the intro help screen (shell.show_usage())
305 """
305 """
306 if not content:
306 if not content:
307 return 'get_ipython().show_usage()'
307 return 'get_ipython().show_usage()'
308
308
309 return _make_help_call(content, '?')
309 return _make_help_call(content, '?')
310
310
311 def _tr_help2(content):
311 def _tr_help2(content):
312 """Translate lines escaped with: ??
312 """Translate lines escaped with: ??
313
313
314 A naked help line should fire the intro help screen (shell.show_usage())
314 A naked help line should fire the intro help screen (shell.show_usage())
315 """
315 """
316 if not content:
316 if not content:
317 return 'get_ipython().show_usage()'
317 return 'get_ipython().show_usage()'
318
318
319 return _make_help_call(content, '??')
319 return _make_help_call(content, '??')
320
320
321 def _tr_magic(content):
321 def _tr_magic(content):
322 "Translate lines escaped with a percent sign: %"
322 "Translate lines escaped with a percent sign: %"
323 name, _, args = content.partition(' ')
323 name, _, args = content.partition(' ')
324 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
324 return 'get_ipython().run_line_magic(%r, %r)' % (name, args)
325
325
326 def _tr_quote(content):
326 def _tr_quote(content):
327 "Translate lines escaped with a comma: ,"
327 "Translate lines escaped with a comma: ,"
328 name, _, args = content.partition(' ')
328 name, _, args = content.partition(' ')
329 return '%s("%s")' % (name, '", "'.join(args.split()) )
329 return '%s("%s")' % (name, '", "'.join(args.split()) )
330
330
331 def _tr_quote2(content):
331 def _tr_quote2(content):
332 "Translate lines escaped with a semicolon: ;"
332 "Translate lines escaped with a semicolon: ;"
333 name, _, args = content.partition(' ')
333 name, _, args = content.partition(' ')
334 return '%s("%s")' % (name, args)
334 return '%s("%s")' % (name, args)
335
335
336 def _tr_paren(content):
336 def _tr_paren(content):
337 "Translate lines escaped with a slash: /"
337 "Translate lines escaped with a slash: /"
338 name, _, args = content.partition(' ')
338 name, _, args = content.partition(' ')
339 return '%s(%s)' % (name, ", ".join(args.split()))
339 return '%s(%s)' % (name, ", ".join(args.split()))
340
340
341 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
341 tr = { ESC_SHELL : 'get_ipython().system({!r})'.format,
342 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
342 ESC_SH_CAP : 'get_ipython().getoutput({!r})'.format,
343 ESC_HELP : _tr_help,
343 ESC_HELP : _tr_help,
344 ESC_HELP2 : _tr_help2,
344 ESC_HELP2 : _tr_help2,
345 ESC_MAGIC : _tr_magic,
345 ESC_MAGIC : _tr_magic,
346 ESC_QUOTE : _tr_quote,
346 ESC_QUOTE : _tr_quote,
347 ESC_QUOTE2 : _tr_quote2,
347 ESC_QUOTE2 : _tr_quote2,
348 ESC_PAREN : _tr_paren }
348 ESC_PAREN : _tr_paren }
349
349
350 class EscapedCommand(TokenTransformBase):
350 class EscapedCommand(TokenTransformBase):
351 """Transformer for escaped commands like %foo, !foo, or /foo"""
351 """Transformer for escaped commands like %foo, !foo, or /foo"""
352 @classmethod
352 @classmethod
353 def find(cls, tokens_by_line):
353 def find(cls, tokens_by_line):
354 """Find the first escaped command (%foo, !foo, etc.) in the cell.
354 """Find the first escaped command (%foo, !foo, etc.) in the cell.
355 """
355 """
356 for line in tokens_by_line:
356 for line in tokens_by_line:
357 ix = 0
357 ix = 0
358 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
358 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
359 ix += 1
359 ix += 1
360 if line[ix].string in ESCAPE_SINGLES:
360 if line[ix].string in ESCAPE_SINGLES:
361 return cls(line[ix].start)
361 return cls(line[ix].start)
362
362
363 def transform(self, lines):
363 def transform(self, lines):
364 """Transform an escaped line found by the ``find()`` classmethod.
364 """Transform an escaped line found by the ``find()`` classmethod.
365 """
365 """
366 start_line, start_col = self.start_line, self.start_col
366 start_line, start_col = self.start_line, self.start_col
367
367
368 indent = lines[start_line][:start_col]
368 indent = lines[start_line][:start_col]
369 end_line = find_end_of_continued_line(lines, start_line)
369 end_line = find_end_of_continued_line(lines, start_line)
370 line = assemble_continued_line(lines, (start_line, start_col), end_line)
370 line = assemble_continued_line(lines, (start_line, start_col), end_line)
371
371
372 if line[:2] in ESCAPE_DOUBLES:
372 if len(line) > 1 and line[:2] in ESCAPE_DOUBLES:
373 escape, content = line[:2], line[2:]
373 escape, content = line[:2], line[2:]
374 else:
374 else:
375 escape, content = line[:1], line[1:]
375 escape, content = line[:1], line[1:]
376
377 if escape in tr:
376 call = tr[escape](content)
378 call = tr[escape](content)
379 else:
380 call = ''
377
381
378 lines_before = lines[:start_line]
382 lines_before = lines[:start_line]
379 new_line = indent + call + '\n'
383 new_line = indent + call + '\n'
380 lines_after = lines[end_line + 1:]
384 lines_after = lines[end_line + 1:]
381
385
382 return lines_before + [new_line] + lines_after
386 return lines_before + [new_line] + lines_after
383
387
384 _help_end_re = re.compile(r"""(%{0,2}
388 _help_end_re = re.compile(r"""(%{0,2}
385 [a-zA-Z_*][\w*]* # Variable name
389 [a-zA-Z_*][\w*]* # Variable name
386 (\.[a-zA-Z_*][\w*]*)* # .etc.etc
390 (\.[a-zA-Z_*][\w*]*)* # .etc.etc
387 )
391 )
388 (\?\??)$ # ? or ??
392 (\?\??)$ # ? or ??
389 """,
393 """,
390 re.VERBOSE)
394 re.VERBOSE)
391
395
392 class HelpEnd(TokenTransformBase):
396 class HelpEnd(TokenTransformBase):
393 """Transformer for help syntax: obj? and obj??"""
397 """Transformer for help syntax: obj? and obj??"""
394 # This needs to be higher priority (lower number) than EscapedCommand so
398 # This needs to be higher priority (lower number) than EscapedCommand so
395 # that inspecting magics (%foo?) works.
399 # that inspecting magics (%foo?) works.
396 priority = 5
400 priority = 5
397
401
398 def __init__(self, start, q_locn):
402 def __init__(self, start, q_locn):
399 super().__init__(start)
403 super().__init__(start)
400 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
404 self.q_line = q_locn[0] - 1 # Shift from 1-indexed to 0-indexed
401 self.q_col = q_locn[1]
405 self.q_col = q_locn[1]
402
406
403 @classmethod
407 @classmethod
404 def find(cls, tokens_by_line):
408 def find(cls, tokens_by_line):
405 """Find the first help command (foo?) in the cell.
409 """Find the first help command (foo?) in the cell.
406 """
410 """
407 for line in tokens_by_line:
411 for line in tokens_by_line:
408 # Last token is NEWLINE; look at last but one
412 # Last token is NEWLINE; look at last but one
409 if len(line) > 2 and line[-2].string == '?':
413 if len(line) > 2 and line[-2].string == '?':
410 # Find the first token that's not INDENT/DEDENT
414 # Find the first token that's not INDENT/DEDENT
411 ix = 0
415 ix = 0
412 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
416 while line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
413 ix += 1
417 ix += 1
414 return cls(line[ix].start, line[-2].start)
418 return cls(line[ix].start, line[-2].start)
415
419
416 def transform(self, lines):
420 def transform(self, lines):
417 """Transform a help command found by the ``find()`` classmethod.
421 """Transform a help command found by the ``find()`` classmethod.
418 """
422 """
419 piece = ''.join(lines[self.start_line:self.q_line+1])
423 piece = ''.join(lines[self.start_line:self.q_line+1])
420 indent, content = piece[:self.start_col], piece[self.start_col:]
424 indent, content = piece[:self.start_col], piece[self.start_col:]
421 lines_before = lines[:self.start_line]
425 lines_before = lines[:self.start_line]
422 lines_after = lines[self.q_line + 1:]
426 lines_after = lines[self.q_line + 1:]
423
427
424 m = _help_end_re.search(content)
428 m = _help_end_re.search(content)
425 if not m:
429 if not m:
426 raise SyntaxError(content)
430 raise SyntaxError(content)
427 assert m is not None, content
431 assert m is not None, content
428 target = m.group(1)
432 target = m.group(1)
429 esc = m.group(3)
433 esc = m.group(3)
430
434
431 # If we're mid-command, put it back on the next prompt for the user.
435 # If we're mid-command, put it back on the next prompt for the user.
432 next_input = None
436 next_input = None
433 if (not lines_before) and (not lines_after) \
437 if (not lines_before) and (not lines_after) \
434 and content.strip() != m.group(0):
438 and content.strip() != m.group(0):
435 next_input = content.rstrip('?\n')
439 next_input = content.rstrip('?\n')
436
440
437 call = _make_help_call(target, esc, next_input=next_input)
441 call = _make_help_call(target, esc, next_input=next_input)
438 new_line = indent + call + '\n'
442 new_line = indent + call + '\n'
439
443
440 return lines_before + [new_line] + lines_after
444 return lines_before + [new_line] + lines_after
441
445
442 def make_tokens_by_line(lines):
446 def make_tokens_by_line(lines):
443 """Tokenize a series of lines and group tokens by line.
447 """Tokenize a series of lines and group tokens by line.
444
448
445 The tokens for a multiline Python string or expression are
449 The tokens for a multiline Python string or expression are
446 grouped as one line.
450 grouped as one line.
447 """
451 """
448 # NL tokens are used inside multiline expressions, but also after blank
452 # NL tokens are used inside multiline expressions, but also after blank
449 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
453 # lines or comments. This is intentional - see https://bugs.python.org/issue17061
450 # We want to group the former case together but split the latter, so we
454 # We want to group the former case together but split the latter, so we
451 # track parentheses level, similar to the internals of tokenize.
455 # track parentheses level, similar to the internals of tokenize.
452 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL
456 NEWLINE, NL = tokenize.NEWLINE, tokenize.NL
453 tokens_by_line = [[]]
457 tokens_by_line = [[]]
454 parenlev = 0
458 parenlev = 0
455 try:
459 try:
456 for token in tokenize.generate_tokens(iter(lines).__next__):
460 for token in tokenize.generate_tokens(iter(lines).__next__):
457 tokens_by_line[-1].append(token)
461 tokens_by_line[-1].append(token)
458 if (token.type == NEWLINE) \
462 if (token.type == NEWLINE) \
459 or ((token.type == NL) and (parenlev <= 0)):
463 or ((token.type == NL) and (parenlev <= 0)):
460 tokens_by_line.append([])
464 tokens_by_line.append([])
461 elif token.string in {'(', '[', '{'}:
465 elif token.string in {'(', '[', '{'}:
462 parenlev += 1
466 parenlev += 1
463 elif token.string in {')', ']', '}'}:
467 elif token.string in {')', ']', '}'}:
464 if parenlev > 0:
468 if parenlev > 0:
465 parenlev -= 1
469 parenlev -= 1
466 except tokenize.TokenError:
470 except tokenize.TokenError:
467 # Input ended in a multiline string or expression. That's OK for us.
471 # Input ended in a multiline string or expression. That's OK for us.
468 pass
472 pass
469 if not tokens_by_line[-1]:
473 if not tokens_by_line[-1]:
470 tokens_by_line.pop()
474 tokens_by_line.pop()
471
475
472 return tokens_by_line
476 return tokens_by_line
473
477
474 def show_linewise_tokens(s: str):
478 def show_linewise_tokens(s: str):
475 """For investigation and debugging"""
479 """For investigation and debugging"""
476 if not s.endswith('\n'):
480 if not s.endswith('\n'):
477 s += '\n'
481 s += '\n'
478 lines = s.splitlines(keepends=True)
482 lines = s.splitlines(keepends=True)
479 for line in make_tokens_by_line(lines):
483 for line in make_tokens_by_line(lines):
480 print("Line -------")
484 print("Line -------")
481 for tokinfo in line:
485 for tokinfo in line:
482 print(" ", tokinfo)
486 print(" ", tokinfo)
483
487
484 # Arbitrary limit to prevent getting stuck in infinite loops
488 # Arbitrary limit to prevent getting stuck in infinite loops
485 TRANSFORM_LOOP_LIMIT = 500
489 TRANSFORM_LOOP_LIMIT = 500
486
490
487 class TransformerManager:
491 class TransformerManager:
488 """Applies various transformations to a cell or code block.
492 """Applies various transformations to a cell or code block.
489
493
490 The key methods for external use are ``transform_cell()``
494 The key methods for external use are ``transform_cell()``
491 and ``check_complete()``.
495 and ``check_complete()``.
492 """
496 """
493 def __init__(self):
497 def __init__(self):
494 self.cleanup_transforms = [
498 self.cleanup_transforms = [
495 leading_indent,
499 leading_indent,
496 classic_prompt,
500 classic_prompt,
497 ipython_prompt,
501 ipython_prompt,
498 ]
502 ]
499 self.line_transforms = [
503 self.line_transforms = [
500 cell_magic,
504 cell_magic,
501 ]
505 ]
502 self.token_transformers = [
506 self.token_transformers = [
503 MagicAssign,
507 MagicAssign,
504 SystemAssign,
508 SystemAssign,
505 EscapedCommand,
509 EscapedCommand,
506 HelpEnd,
510 HelpEnd,
507 ]
511 ]
508
512
509 def do_one_token_transform(self, lines):
513 def do_one_token_transform(self, lines):
510 """Find and run the transform earliest in the code.
514 """Find and run the transform earliest in the code.
511
515
512 Returns (changed, lines).
516 Returns (changed, lines).
513
517
514 This method is called repeatedly until changed is False, indicating
518 This method is called repeatedly until changed is False, indicating
515 that all available transformations are complete.
519 that all available transformations are complete.
516
520
517 The tokens following IPython special syntax might not be valid, so
521 The tokens following IPython special syntax might not be valid, so
518 the transformed code is retokenised every time to identify the next
522 the transformed code is retokenised every time to identify the next
519 piece of special syntax. Hopefully long code cells are mostly valid
523 piece of special syntax. Hopefully long code cells are mostly valid
520 Python, not using lots of IPython special syntax, so this shouldn't be
524 Python, not using lots of IPython special syntax, so this shouldn't be
521 a performance issue.
525 a performance issue.
522 """
526 """
523 tokens_by_line = make_tokens_by_line(lines)
527 tokens_by_line = make_tokens_by_line(lines)
524 candidates = []
528 candidates = []
525 for transformer_cls in self.token_transformers:
529 for transformer_cls in self.token_transformers:
526 transformer = transformer_cls.find(tokens_by_line)
530 transformer = transformer_cls.find(tokens_by_line)
527 if transformer:
531 if transformer:
528 candidates.append(transformer)
532 candidates.append(transformer)
529
533
530 if not candidates:
534 if not candidates:
531 # Nothing to transform
535 # Nothing to transform
532 return False, lines
536 return False, lines
533 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
537 ordered_transformers = sorted(candidates, key=TokenTransformBase.sortby)
534 for transformer in ordered_transformers:
538 for transformer in ordered_transformers:
535 try:
539 try:
536 return True, transformer.transform(lines)
540 return True, transformer.transform(lines)
537 except SyntaxError:
541 except SyntaxError:
538 pass
542 pass
539 return False, lines
543 return False, lines
540
544
541 def do_token_transforms(self, lines):
545 def do_token_transforms(self, lines):
542 for _ in range(TRANSFORM_LOOP_LIMIT):
546 for _ in range(TRANSFORM_LOOP_LIMIT):
543 changed, lines = self.do_one_token_transform(lines)
547 changed, lines = self.do_one_token_transform(lines)
544 if not changed:
548 if not changed:
545 return lines
549 return lines
546
550
547 raise RuntimeError("Input transformation still changing after "
551 raise RuntimeError("Input transformation still changing after "
548 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
552 "%d iterations. Aborting." % TRANSFORM_LOOP_LIMIT)
549
553
550 def transform_cell(self, cell: str) -> str:
554 def transform_cell(self, cell: str) -> str:
551 """Transforms a cell of input code"""
555 """Transforms a cell of input code"""
552 if not cell.endswith('\n'):
556 if not cell.endswith('\n'):
553 cell += '\n' # Ensure the cell has a trailing newline
557 cell += '\n' # Ensure the cell has a trailing newline
554 lines = cell.splitlines(keepends=True)
558 lines = cell.splitlines(keepends=True)
555 for transform in self.cleanup_transforms + self.line_transforms:
559 for transform in self.cleanup_transforms + self.line_transforms:
556 lines = transform(lines)
560 lines = transform(lines)
557
561
558 lines = self.do_token_transforms(lines)
562 lines = self.do_token_transforms(lines)
559 return ''.join(lines)
563 return ''.join(lines)
560
564
561 def check_complete(self, cell: str):
565 def check_complete(self, cell: str):
562 """Return whether a block of code is ready to execute, or should be continued
566 """Return whether a block of code is ready to execute, or should be continued
563
567
564 Parameters
568 Parameters
565 ----------
569 ----------
566 source : string
570 source : string
567 Python input code, which can be multiline.
571 Python input code, which can be multiline.
568
572
569 Returns
573 Returns
570 -------
574 -------
571 status : str
575 status : str
572 One of 'complete', 'incomplete', or 'invalid' if source is not a
576 One of 'complete', 'incomplete', or 'invalid' if source is not a
573 prefix of valid code.
577 prefix of valid code.
574 indent_spaces : int or None
578 indent_spaces : int or None
575 The number of spaces by which to indent the next line of code. If
579 The number of spaces by which to indent the next line of code. If
576 status is not 'incomplete', this is None.
580 status is not 'incomplete', this is None.
577 """
581 """
578 cell += '\n' # Ensure the cell has a trailing newline
579 lines = cell.splitlines(keepends=True)
582 lines = cell.splitlines(keepends=True)
580 if lines[-1][:-1].endswith('\\'):
583 if not lines:
584 return 'complete', None
585
586 if lines[-1].endswith('\\'):
581 # Explicit backslash continuation
587 # Explicit backslash continuation
582 return 'incomplete', find_last_indent(lines)
588 return 'incomplete', find_last_indent(lines)
583
589
584 try:
590 try:
585 for transform in self.cleanup_transforms:
591 for transform in self.cleanup_transforms:
586 lines = transform(lines)
592 lines = transform(lines)
587 except SyntaxError:
593 except SyntaxError:
588 return 'invalid', None
594 return 'invalid', None
589
595
590 if lines[0].startswith('%%'):
596 if lines[0].startswith('%%'):
591 # Special case for cell magics - completion marked by blank line
597 # Special case for cell magics - completion marked by blank line
592 if lines[-1].strip():
598 if lines[-1].strip():
593 return 'incomplete', find_last_indent(lines)
599 return 'incomplete', find_last_indent(lines)
594 else:
600 else:
595 return 'complete', None
601 return 'complete', None
596
602
597 try:
603 try:
598 for transform in self.line_transforms:
604 for transform in self.line_transforms:
599 lines = transform(lines)
605 lines = transform(lines)
600 lines = self.do_token_transforms(lines)
606 lines = self.do_token_transforms(lines)
601 except SyntaxError:
607 except SyntaxError:
602 return 'invalid', None
608 return 'invalid', None
603
609
604 tokens_by_line = make_tokens_by_line(lines)
610 tokens_by_line = make_tokens_by_line(lines)
605 if not tokens_by_line:
611 if not tokens_by_line:
606 return 'incomplete', find_last_indent(lines)
612 return 'incomplete', find_last_indent(lines)
613
607 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
614 if tokens_by_line[-1][-1].type != tokenize.ENDMARKER:
608 # We're in a multiline string or expression
615 # We're in a multiline string or expression
609 return 'incomplete', find_last_indent(lines)
616 return 'incomplete', find_last_indent(lines)
610 if len(tokens_by_line) == 1:
617
618 if len(tokens_by_line[-1]) == 1:
611 return 'incomplete', find_last_indent(lines)
619 return 'incomplete', find_last_indent(lines)
612 # Find the last token on the previous line that's not NEWLINE or COMMENT
620 # Find the last token on the previous line that's not NEWLINE or COMMENT
613 toks_last_line = tokens_by_line[-2]
621 toks_last_line = tokens_by_line[-1]
614 ix = len(toks_last_line) - 1
622 ix = len(tokens_by_line) - 1
615 while ix >= 0 and toks_last_line[ix].type in {tokenize.NEWLINE,
623
624
625 while ix >= 0 and toks_last_line[-1].type in {tokenize.NEWLINE,
616 tokenize.COMMENT}:
626 tokenize.COMMENT}:
617 ix -= 1
627 ix -= 1
618
628 if tokens_by_line[ix][-2].string == ':':
619 if toks_last_line[ix].string == ':':
620 # The last line starts a block (e.g. 'if foo:')
629 # The last line starts a block (e.g. 'if foo:')
621 ix = 0
630 ix = 0
622 while toks_last_line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
631 while toks_last_line[ix].type in {tokenize.INDENT, tokenize.DEDENT}:
623 ix += 1
632 ix += 1
624 indent = toks_last_line[ix].start[1]
633 indent = toks_last_line[ix].start[1]
625 return 'incomplete', indent + 4
634 return 'incomplete', indent + 4
635 if tokens_by_line[ix][-2].string == '\\':
636 if not tokens_by_line[ix][-2].line.endswith('\\'):
637 return 'invalid', None
626
638
627 # If there's a blank line at the end, assume we're ready to execute.
639 # If there's a blank line at the end, assume we're ready to execute
628 if not lines[-1].strip():
640 if not lines[-1].strip():
629 return 'complete', None
641 return 'complete', None
630
642
631 # At this point, our checks think the code is complete (or invalid).
643 # At this point, our checks think the code is complete (or invalid).
632 # We'll use codeop.compile_command to check this with the real parser.
644 # We'll use codeop.compile_command to check this with the real parser
633
634 try:
645 try:
635 with warnings.catch_warnings():
646 with warnings.catch_warnings():
636 warnings.simplefilter('error', SyntaxWarning)
647 warnings.simplefilter('error', SyntaxWarning)
637 compile_command(''.join(lines), symbol='exec')
648 res = compile_command(''.join(lines), symbol='exec')
638 except (SyntaxError, OverflowError, ValueError, TypeError,
649 except (SyntaxError, OverflowError, ValueError, TypeError,
639 MemoryError, SyntaxWarning):
650 MemoryError, SyntaxWarning):
640 return 'invalid', None
651 return 'invalid', None
641 else:
652 else:
642 if len(lines) > 1 and not lines[-1].strip().endswith(':') \
653 if res is None:
643 and not lines[-2][:-1].endswith('\\'):
654 return 'incomplete', find_last_indent(lines)
655
656 if toks_last_line[-2].type == tokenize.DEDENT:
657 if not lines[-1].endswith('\n'):
644 return 'incomplete', find_last_indent(lines)
658 return 'incomplete', find_last_indent(lines)
659
645 return 'complete', None
660 return 'complete', None
646
661
647
662
648 def find_last_indent(lines):
663 def find_last_indent(lines):
649 m = _indent_re.match(lines[-1])
664 m = _indent_re.match(lines[-1])
650 if not m:
665 if not m:
651 return 0
666 return 0
652 return len(m.group(0).replace('\t', ' '*4))
667 return len(m.group(0).replace('\t', ' '*4))
General Comments 0
You need to be logged in to leave comments. Login now